Sat, 13 Nov 2021 20:35:45 GMT

master
大蒟蒻 4 years ago
parent d9d447438e
commit c7538f3a61

BIN
1.xlsx

Binary file not shown.

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

Binary file not shown.

@ -54,12 +54,9 @@ from sklearn.utils.validation import check_X_y
import joblib
from catboost import CatBoostRegressor
from lightgbm import LGBMRegressor
from xgboost import XGBRegressor
from sklearn.ensemble import RandomForestRegressor
from deepforest import CascadeForestRegressor
Regressor = Union[CatBoostRegressor, LGBMRegressor, XGBRegressor]
Regressor = Union[CatBoostRegressor, RandomForestRegressor]
def train_model(regType: Regressor):
@ -79,7 +76,7 @@ def train_model(regType: Regressor):
for reg in [
#CatBoostRegressor, LGBMRegressor, XGBRegressor, RandomForestRegressor,
CascadeForestRegressor
CatBoostRegressor
]:
train_model(reg)
@ -90,23 +87,28 @@ from sklearn import metrics
def eval_model(regType: Regressor):
models = joblib.load(f"models/{regType.__name__}.model")
X, ys = train_test_data['X_test'], train_test_data['y_test']
if isinstance(X, CascadeForestRegressor):
X = X.values
evals = []
for target_col, reg in models.items():
y_hat = reg.predict(X) # fake
y = ys[target_col] # real
dy = (y - y_hat).abs()
rmse = metrics.mean_squared_error(y, y_hat, squared=False)
r2 = metrics.r2_score(y, y_hat)
eval_dict = {'Error': target_col, 'RMSE': rmse, 'R^2': r2}
eval_dict = {
'Error': target_col,
'RMSE': rmse,
'R^2': r2,
"err_max": dy.max(),
"err_min": dy.min(),
"err_mean": dy.mean(),
}
evals.append(eval_dict)
print(regType.__name__)
print(pd.DataFrame(evals))
for reg in [
CatBoostRegressor, LGBMRegressor, XGBRegressor, RandomForestRegressor,
CascadeForestRegressor
CatBoostRegressor,
]:
eval_model(reg)
# %%

@ -20,11 +20,7 @@ from sklearn.model_selection import train_test_split
feature_cols = ['elapsed_seconds'
] + get_state_vect_cols('pred') + get_state_vect_cols('start')
# The target values are the errors between the physical model predictions
# and the ground truth observations
target_cols = get_state_vect_cols('err')
print(target_cols)
# Create feature and target matrices
X = df[feature_cols]
y = df[target_cols]
data_keys = ['X_train', 'X_test', 'y_train', 'y_test']
@ -64,12 +60,17 @@ def eval_model():
for target_col, reg in models.items():
y_hat = reg.predict(X) # fake
y = ys[target_col] # real
print(y_hat[:10])
print(y[:10])
print("-----")
dy = (y - y_hat).abs()
rmse = metrics.mean_squared_error(y, y_hat, squared=False)
r2 = metrics.r2_score(y, y_hat)
eval_dict = {'Error': target_col, 'RMSE': rmse, 'R^2': r2}
eval_dict = {
'Error': target_col,
'RMSE': rmse,
'R^2': r2,
"err_max": dy.max(),
"err_min": dy.min(),
"err_mean": dy.mean(),
}
evals.append(eval_dict)
print(pd.DataFrame(evals))

Loading…
Cancel
Save