from scipy.sparse.construct import random from normal_use import * sumRegressors = [LGBMRegressor, RandomForestRegressor, XGBRegressor, CatBoostRegressor] sumRegressor = Union[type(sumRegressors)] sumParams = [{},{},{},{"silent": True}] weight = [0.1, 0.2, 0.3, 0.4] Sums = {} train_test_data = None out_weights = [] oof_train = {} oof_test = {} # Some changes # LinearRegression, Ridge, XGBRegressor, CatBoostRegressor, LGBMRegressor # deepforest.CascadeForestRegressor REG_TOTAL = Ridge def get_random_small_train(X, y, Percentage = 0.8, seed = 0): # return X_train & y_train data_keys = ['X_train', 'X_test', 'y_train', 'y_test'] data_vals = train_test_split(X, y, random_state=seed,test_size=(1-Percentage)) train_test_data = dict(zip(data_keys, data_vals)) return train_test_data['X_train'], train_test_data['y_train'] def train_one_regressor(id, regType: sumRegressor, use_RFsample = False, seed = 0): full_X, full_ys = train_test_data['X_train'], train_test_data['y_train'] tX, tys = train_test_data['X_test'], train_test_data['y_test'] X, ys = full_X, full_ys if use_RFsample: X, ys = get_random_small_train(X, ys, seed=seed) # which xxx_moon? # make_moons(n_samples=100, shuffle=True, noise=None, random_state=None) # pass check_X_y(X, ys, multi_output=True) models = {} evals = [] for target_col in ys.columns: y = ys[target_col] reg = regType(**sumParams[id]) reg.fit(X, y) models[target_col] = reg # test in full train_test y_hat = reg.predict(full_X) oof_train[target_col].append(y_hat.reshape(-1, 1)) # predict test ty_hat = reg.predict(tX) oof_test[target_col].append(ty_hat.reshape(-1, 1)) ty = tys[target_col] # one evals rmse = metrics.mean_squared_error(ty, ty_hat, squared=False) r2 = metrics.r2_score(ty, ty_hat) eval_dict = {'Error': target_col, 'RMSE': rmse, 'R^2': r2} evals.append(eval_dict) print(regType.__name__) print(pd.DataFrame(evals)) print("Average R2: ", average_R2(evals)) joblib.dump(models, f"linear/{regType.__name__}_study_{id}.model") def train_linear_sumer(): ys = train_test_data['y_train'] tys = train_test_data['y_test'] # real evals = [] for target_col in oof_train: X = np.hstack(oof_train[target_col]) tX = np.hstack(oof_test[target_col]) print(ys.shape,X.shape,tys.shape,tX.shape) y = ys[target_col] ty = tys[target_col] clf = REG_TOTAL() clf.fit(X, y) ty_hat = clf.predict(tX) # fake rmse = metrics.mean_squared_error(ty, ty_hat, squared=False) r2 = metrics.r2_score(ty, ty_hat) eval_dict = {'Error': target_col, 'RMSE': rmse, 'R^2': r2} evals.append(eval_dict) print("linear *study* for {} regressors!".format(len(sumRegressors))) print(pd.DataFrame(evals)) print("Average R2: ", average_R2(evals)) def study_linear(trainset): """ Description ----------- create a linear combination, weight and regressors here to change Parameters ---------- trainset : dict train_data_set Returns ------- print result on screen """ global train_test_data train_test_data = trainset for target_col in train_test_data['y_train'].columns: oof_train[target_col] = [] oof_test[target_col] = [] for i, reg in enumerate(sumRegressors): train_one_regressor(i, reg, use_RFsample=True, seed=1024) train_linear_sumer()