You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

91 lines
3.0 KiB
Python

from normal_use import *
sumRegressors = [LGBMRegressor, RandomForestRegressor, XGBRegressor, CatBoostRegressor]
sumRegressor = Union[type(sumRegressors)]
sumParams = [{},{},{},{"silent": True}]
weight = [0.1, 0.2, 0.3, 0.4]
Sums = {}
train_test_data = None
out_weights = []
def get_random_small_train(X, y, Percentage = 0.8):
# return X_train & y_train
data_keys = ['X_train', 'X_test', 'y_train', 'y_test']
data_vals = train_test_split(X, y, test_size=(1-Percentage))
train_test_data = dict(zip(data_keys, data_vals))
return train_test_data['X_train'], train_test_data['y_train']
def train_linear(id, regType: sumRegressor, use_RFsample = False):
X, ys = train_test_data['X_train'], train_test_data['y_train']
if use_RFsample:
# X, ys = get_random_small_train(X, ys)
# make_moons(n_samples=100, shuffle=True, noise=None, random_state=None)
pass
check_X_y(X, ys, multi_output=True)
models = {}
for target_col in ys.columns:
y = ys[target_col]
reg = regType(**sumParams[id])
reg.fit(X, y)
models[target_col] = reg
joblib.dump(models, f"linear/{regType.__name__}_{id}.model")
def eval_linear(id, regType: sumRegressor):
models = joblib.load(f"linear/{regType.__name__}_{id}.model")
X, ys = train_test_data['X_test'], train_test_data['y_test']
evals = []
out_w_dict = {'Regressor': regType.__name__, 'Weight': weight[id]}
out_weights.append(out_w_dict)
for target_col, reg in models.items():
y_hat = reg.predict(X) # fake
y = ys[target_col] # real
rmse = metrics.mean_squared_error(y, y_hat, squared=False)
r2 = metrics.r2_score(y, y_hat)
eval_dict = {'Error': target_col, 'RMSE': rmse, 'R^2': r2}
evals.append(eval_dict)
if Sums.get(target_col) is None:
Sums[target_col] = weight[id] * y_hat
else:
Sums[target_col] += weight[id] * y_hat
print(f"{regType.__name__}_{id}")
print(pd.DataFrame(evals))
def only_linear(trainset):
"""
Description
-----------
create a linear combination, weight and regressors here to change
Parameters
----------
trainset : dict
train_data_set
Returns
-------
print result on screen
"""
global train_test_data
train_test_data = trainset
for i, reg in enumerate(sumRegressors):
train_linear(i, reg)
for i, reg in enumerate(sumRegressors):
eval_linear(i, reg)
ys = train_test_data['y_test']
evals = []
for target_col in Sums:
y_hat = Sums[target_col] # fake
y = ys[target_col] # real
rmse = metrics.mean_squared_error(y, y_hat, squared=False)
r2 = metrics.r2_score(y, y_hat)
eval_dict = {'Error': target_col, 'RMSE': rmse, 'R^2': r2}
evals.append(eval_dict)
print("linear sum for {} regressors!".format(len(sumRegressors)))
print(pd.DataFrame(out_weights))
print(pd.DataFrame(evals))
print("Average R2: ", average_R2(evals))