Sat, 13 Nov 2021 20:35:45 GMT

4 years ago · c7538f3a61
parent d9d447438e
commit c7538f3a61
9 changed files with 3020 additions and 3017 deletions
--- a/1.xlsx
+++ b/1.xlsx
--- a/catboost_info/catboost_training.json
+++ b/catboost_info/catboost_training.json
--- a/catboost_info/learn/events.out.tfevents
+++ b/catboost_info/learn/events.out.tfevents
--- a/catboost_info/learn_error.tsv
+++ b/catboost_info/learn_error.tsv
--- a/catboost_info/time_left.tsv
+++ b/catboost_info/time_left.tsv
--- a/catboost_info/tmp/cat_feature_index.704a9020-9e036d5d-7e7f044c-be502316.tmp
+++ b/catboost_info/tmp/cat_feature_index.704a9020-9e036d5d-7e7f044c-be502316.tmp
--- a/models/CatBoostRegressor.model
+++ b/models/CatBoostRegressor.model
--- a/train_model.py
+++ b/train_model.py
@ -54,12 +54,9 @@ from sklearn.utils.validation import check_X_y
 import joblib

 from catboost import CatBoostRegressor
-from lightgbm import LGBMRegressor
-from xgboost import XGBRegressor
 from sklearn.ensemble import RandomForestRegressor
-from deepforest import CascadeForestRegressor

-Regressor = Union[CatBoostRegressor, LGBMRegressor, XGBRegressor]
+Regressor = Union[CatBoostRegressor, RandomForestRegressor]


 def train_model(regType: Regressor):
@ -79,7 +76,7 @@ def train_model(regType: Regressor):

 for reg in [
        #CatBoostRegressor, LGBMRegressor, XGBRegressor, RandomForestRegressor,
-        CascadeForestRegressor
+        CatBoostRegressor
 ]:
    train_model(reg)

@ -90,23 +87,28 @@ from sklearn import metrics
 def eval_model(regType: Regressor):
    models = joblib.load(f"models/{regType.__name__}.model")
    X, ys = train_test_data['X_test'], train_test_data['y_test']
-    if isinstance(X, CascadeForestRegressor):
-        X = X.values
    evals = []
    for target_col, reg in models.items():
        y_hat = reg.predict(X)  # fake
        y = ys[target_col]  # real
+        dy = (y - y_hat).abs()
        rmse = metrics.mean_squared_error(y, y_hat, squared=False)
        r2 = metrics.r2_score(y, y_hat)
-        eval_dict = {'Error': target_col, 'RMSE': rmse, 'R^2': r2}
+        eval_dict = {
+            'Error': target_col,
+            'RMSE': rmse,
+            'R^2': r2,
+            "err_max": dy.max(),
+            "err_min": dy.min(),
+            "err_mean": dy.mean(),
+        }
        evals.append(eval_dict)
    print(regType.__name__)
    print(pd.DataFrame(evals))


 for reg in [
-        CatBoostRegressor, LGBMRegressor, XGBRegressor, RandomForestRegressor,
-        CascadeForestRegressor
+        CatBoostRegressor,
 ]:
    eval_model(reg)
 # %%
--- a/train_model_copy.py
+++ b/train_model_copy.py
@ -20,11 +20,7 @@ from sklearn.model_selection import train_test_split

 feature_cols = ['elapsed_seconds'
                ] + get_state_vect_cols('pred') + get_state_vect_cols('start')
-# The target values are the errors between the physical model predictions
-# and the ground truth observations
 target_cols = get_state_vect_cols('err')
-print(target_cols)
-# Create feature and target matrices
 X = df[feature_cols]
 y = df[target_cols]
 data_keys = ['X_train', 'X_test', 'y_train', 'y_test']
@ -64,12 +60,17 @@ def eval_model():
    for target_col, reg in models.items():
        y_hat = reg.predict(X)  # fake
        y = ys[target_col]  # real
-        print(y_hat[:10])
-        print(y[:10])
-        print("-----")
+        dy = (y - y_hat).abs()
        rmse = metrics.mean_squared_error(y, y_hat, squared=False)
        r2 = metrics.r2_score(y, y_hat)
-        eval_dict = {'Error': target_col, 'RMSE': rmse, 'R^2': r2}
+        eval_dict = {
+            'Error': target_col,
+            'RMSE': rmse,
+            'R^2': r2,
+            "err_max": dy.max(),
+            "err_min": dy.min(),
+            "err_mean": dy.mean(),
+        }
        evals.append(eval_dict)
    print(pd.DataFrame(evals))