Sat, 16 Oct 2021 20:49:55 GMT

master
大蒟蒻 4 years ago
parent 5b65d871a5
commit 1135697a8a

@ -0,0 +1,3 @@
{
"python.pythonPath": "D:\\PortableApps\\Python\\3.9\\Scripts\\python.exe"
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

Binary file not shown.

Binary file not shown.

File diff suppressed because it is too large Load Diff

@ -0,0 +1,95 @@
# 要添加一个新单元,输入 '# %%'
# 要添加一个新的标记单元,输入 '# %% [markdown]'
# %%
import pandas as pd
# %%
from itertools import product
def get_state_vect_cols(prefix=''):
if prefix:
prefix += '_'
vectors = ['r', 'v']
components = ['x', 'y', 'z']
col_names = [f'{prefix}{v}_{c}' for v, c in product(vectors, components)]
return col_names
# %%
df = pd.read_parquet("traindata/physics_preds.parquet")
test_set = df[df['aso_id'] == "05277"]
train_set = df.groupby('aso_id').apply(lambda x: x.head(x.count()[0] - 3))
print(df.count()[0], train_set.count()[0], test_set.count()[0])
test_set
# %%
from sklearn.model_selection import train_test_split
feature_cols = [
'elapsed_seconds'
] + get_state_vect_cols('physics_pred') + get_state_vect_cols('start')
print(feature_cols)
# The target values are the errors between the physical model predictions
# and the ground truth observations
target_cols = get_state_vect_cols('physics_err')
print(target_cols)
# Create feature and target matrices
X = df[feature_cols]
y = df[target_cols]
data_keys = ['X_train', 'X_test', 'y_train', 'y_test']
data_vals = train_test_split(X, y, test_size=0.2)
train_test_data = dict(zip(data_keys, data_vals))
train_test_data['X_test'] = test_set[feature_cols]
train_test_data['y_test'] = test_set[target_cols]
# train_test_data = {
# 'X_train': train_set[feature_cols],
# 'y_train': train_set[target_cols],
# 'X_test': test_set[feature_cols],
# 'y_test': test_set[target_cols],
# }
# %%
from sklearn.utils.validation import check_X_y
import pickle
from catboost import CatBoostRegressor
from lightgbm import LGBMRegressor
from xgboost import XGBRegressor
def train_model(regType):
X, ys = train_test_data['X_train'], train_test_data['y_train']
check_X_y(X, ys, multi_output=True)
models = {}
for target_col in ys.columns:
y = ys[target_col]
reg = regType()
reg.fit(X, y, verbose=False)
models[target_col] = reg
print(target_col)
CatBoostRegressor.__name__
with open(f"models/model_{regType.__name__}.pickle", "wb") as f:
pickle.dump(models, f)
for reg in [CatBoostRegressor, LGBMRegressor, XGBRegressor]:
train_model(reg)
# %%
from sklearn import metrics
with open("cat.pickle", "rb") as f:
models = pickle.load(f)
X, ys = train_test_data['X_test'], train_test_data['y_test']
evals = []
for target_col, reg in models.items():
y_hat = reg.predict(X) # fake
y = ys[target_col] # real
rmse = metrics.mean_squared_error(y, y_hat, squared=False)
r2 = metrics.r2_score(y, y_hat)
eval_dict = {'Error': target_col, 'RMSE': rmse, 'R^2': r2}
evals.append(eval_dict)
pd.DataFrame(evals)
# %%
Loading…
Cancel
Save