Sat, 16 Oct 2021 20:49:55 GMT
parent
5b65d871a5
commit
1135697a8a
@ -0,0 +1,3 @@
|
||||
{
|
||||
"python.pythonPath": "D:\\PortableApps\\Python\\3.9\\Scripts\\python.exe"
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
Binary file not shown.
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
Binary file not shown.
Binary file not shown.
Binary file not shown.
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,95 @@
|
||||
# 要添加一个新单元,输入 '# %%'
|
||||
# 要添加一个新的标记单元,输入 '# %% [markdown]'
|
||||
# %%
|
||||
import pandas as pd
|
||||
|
||||
# %%
|
||||
from itertools import product
|
||||
|
||||
|
||||
def get_state_vect_cols(prefix=''):
|
||||
if prefix:
|
||||
prefix += '_'
|
||||
vectors = ['r', 'v']
|
||||
components = ['x', 'y', 'z']
|
||||
col_names = [f'{prefix}{v}_{c}' for v, c in product(vectors, components)]
|
||||
return col_names
|
||||
|
||||
|
||||
# %%
|
||||
df = pd.read_parquet("traindata/physics_preds.parquet")
|
||||
test_set = df[df['aso_id'] == "05277"]
|
||||
|
||||
train_set = df.groupby('aso_id').apply(lambda x: x.head(x.count()[0] - 3))
|
||||
print(df.count()[0], train_set.count()[0], test_set.count()[0])
|
||||
test_set
|
||||
|
||||
# %%
|
||||
from sklearn.model_selection import train_test_split
|
||||
|
||||
feature_cols = [
|
||||
'elapsed_seconds'
|
||||
] + get_state_vect_cols('physics_pred') + get_state_vect_cols('start')
|
||||
print(feature_cols)
|
||||
# The target values are the errors between the physical model predictions
|
||||
# and the ground truth observations
|
||||
target_cols = get_state_vect_cols('physics_err')
|
||||
print(target_cols)
|
||||
# Create feature and target matrices
|
||||
X = df[feature_cols]
|
||||
y = df[target_cols]
|
||||
data_keys = ['X_train', 'X_test', 'y_train', 'y_test']
|
||||
data_vals = train_test_split(X, y, test_size=0.2)
|
||||
train_test_data = dict(zip(data_keys, data_vals))
|
||||
train_test_data['X_test'] = test_set[feature_cols]
|
||||
train_test_data['y_test'] = test_set[target_cols]
|
||||
# train_test_data = {
|
||||
# 'X_train': train_set[feature_cols],
|
||||
# 'y_train': train_set[target_cols],
|
||||
# 'X_test': test_set[feature_cols],
|
||||
# 'y_test': test_set[target_cols],
|
||||
# }
|
||||
|
||||
# %%
|
||||
from sklearn.utils.validation import check_X_y
|
||||
import pickle
|
||||
|
||||
from catboost import CatBoostRegressor
|
||||
from lightgbm import LGBMRegressor
|
||||
from xgboost import XGBRegressor
|
||||
|
||||
|
||||
def train_model(regType):
|
||||
X, ys = train_test_data['X_train'], train_test_data['y_train']
|
||||
check_X_y(X, ys, multi_output=True)
|
||||
models = {}
|
||||
for target_col in ys.columns:
|
||||
y = ys[target_col]
|
||||
reg = regType()
|
||||
reg.fit(X, y, verbose=False)
|
||||
models[target_col] = reg
|
||||
print(target_col)
|
||||
CatBoostRegressor.__name__
|
||||
with open(f"models/model_{regType.__name__}.pickle", "wb") as f:
|
||||
pickle.dump(models, f)
|
||||
|
||||
|
||||
for reg in [CatBoostRegressor, LGBMRegressor, XGBRegressor]:
|
||||
train_model(reg)
|
||||
|
||||
# %%
|
||||
from sklearn import metrics
|
||||
with open("cat.pickle", "rb") as f:
|
||||
models = pickle.load(f)
|
||||
X, ys = train_test_data['X_test'], train_test_data['y_test']
|
||||
evals = []
|
||||
for target_col, reg in models.items():
|
||||
y_hat = reg.predict(X) # fake
|
||||
y = ys[target_col] # real
|
||||
rmse = metrics.mean_squared_error(y, y_hat, squared=False)
|
||||
r2 = metrics.r2_score(y, y_hat)
|
||||
eval_dict = {'Error': target_col, 'RMSE': rmse, 'R^2': r2}
|
||||
evals.append(eval_dict)
|
||||
pd.DataFrame(evals)
|
||||
|
||||
# %%
|
||||
Loading…
Reference in new issue