from normal_use import * sumRegressors = [LGBMRegressor, RandomForestRegressor, XGBRegressor, CatBoostRegressor] sumRegressor = Union[type(sumRegressors)] sumParams = [{},{},{},{"silent": True}] weight = [0.1, 0.2, 0.3, 0.4] Sums = {} train_test_data = None out_weights = [] def get_random_small_train(X, y, Percentage = 0.8): # return X_train & y_train data_keys = ['X_train', 'X_test', 'y_train', 'y_test'] data_vals = train_test_split(X, y, test_size=(1-Percentage)) train_test_data = dict(zip(data_keys, data_vals)) return train_test_data['X_train'], train_test_data['y_train'] def train_linear(id, regType: sumRegressor, use_RFsample = False): X, ys = train_test_data['X_train'], train_test_data['y_train'] if use_RFsample: # X, ys = get_random_small_train(X, ys) # make_moons(n_samples=100, shuffle=True, noise=None, random_state=None) pass check_X_y(X, ys, multi_output=True) models = {} for target_col in ys.columns: y = ys[target_col] reg = regType(**sumParams[id]) reg.fit(X, y) models[target_col] = reg joblib.dump(models, f"linear/{regType.__name__}_{id}.model") def eval_linear(id, regType: sumRegressor): models = joblib.load(f"linear/{regType.__name__}_{id}.model") X, ys = train_test_data['X_test'], train_test_data['y_test'] evals = [] out_w_dict = {'Regressor': regType.__name__, 'Weight': weight[id]} out_weights.append(out_w_dict) for target_col, reg in models.items(): y_hat = reg.predict(X) # fake y = ys[target_col] # real rmse = metrics.mean_squared_error(y, y_hat, squared=False) r2 = metrics.r2_score(y, y_hat) eval_dict = {'Error': target_col, 'RMSE': rmse, 'R^2': r2} evals.append(eval_dict) if Sums.get(target_col) is None: Sums[target_col] = weight[id] * y_hat else: Sums[target_col] += weight[id] * y_hat print(f"{regType.__name__}_{id}") print(pd.DataFrame(evals)) def only_linear(trainset): """ Description ----------- create a linear combination, weight and regressors here to change Parameters ---------- trainset : dict train_data_set Returns ------- print result on screen """ global train_test_data train_test_data = trainset for i, reg in enumerate(sumRegressors): train_linear(i, reg) for i, reg in enumerate(sumRegressors): eval_linear(i, reg) ys = train_test_data['y_test'] evals = [] for target_col in Sums: y_hat = Sums[target_col] # fake y = ys[target_col] # real rmse = metrics.mean_squared_error(y, y_hat, squared=False) r2 = metrics.r2_score(y, y_hat) eval_dict = {'Error': target_col, 'RMSE': rmse, 'R^2': r2} evals.append(eval_dict) print("linear sum for {} regressors!".format(len(sumRegressors))) print(pd.DataFrame(out_weights)) print(pd.DataFrame(evals)) print("Average R2: ", average_R2(evals))