You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
ssa/train_model_copy.py

80 lines
2.0 KiB
Python

# %%
import pandas as pd
from itertools import product
def get_state_vect_cols(prefix=''):
if prefix:
prefix += '_'
vectors = ['r', 'v']
components = ['x', 'y', 'z']
col_names = [f'{prefix}{v}_{c}' for v, c in product(vectors, components)]
return col_names
# %%
df = pd.read_parquet("data.pq")
# %%
from sklearn.model_selection import train_test_split
feature_cols = ['elapsed_seconds'
] + get_state_vect_cols('pred') + get_state_vect_cols('start')
target_cols = get_state_vect_cols('err')
X = df[feature_cols]
y = df[target_cols]
data_keys = ['X_train', 'X_test', 'y_train', 'y_test']
data_vals = train_test_split(X, y, test_size=0.2)
train_test_data = dict(zip(data_keys, data_vals))
# %%
from sklearn.utils.validation import check_X_y
import joblib
from catboost import CatBoostRegressor
def train_model():
X, ys = train_test_data['X_train'], train_test_data['y_train']
check_X_y(X, ys, multi_output=True)
models = {}
for target_col in ys.columns:
y1 = ys[target_col]
print(X.shape, y1.shape)
reg = CatBoostRegressor()
reg.fit(X, y1)
models[target_col] = reg
print(target_col)
joblib.dump(models, f"models/{CatBoostRegressor.__name__}.model")
# train_model()
# %%
from sklearn import metrics
def eval_model():
models = joblib.load(f"models/{CatBoostRegressor.__name__}.model")
X, ys = train_test_data['X_test'], train_test_data['y_test']
evals = []
for target_col, reg in models.items():
y_hat = reg.predict(X) # fake
y = ys[target_col] # real
dy = (y - y_hat).abs()
rmse = metrics.mean_squared_error(y, y_hat, squared=False)
r2 = metrics.r2_score(y, y_hat)
eval_dict = {
'Error': target_col,
'RMSE': rmse,
'R^2': r2,
"err_max": dy.max(),
"err_min": dy.min(),
"err_mean": dy.mean(),
}
evals.append(eval_dict)
print(pd.DataFrame(evals))
eval_model()
# %%