You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
ssa/train_model_nn.py

97 lines
2.6 KiB
Python

# %%
from typing import Union
import pandas as pd
# %%
from itertools import product
from torch.nn.modules.activation import ReLU
from torch.nn.modules.linear import Linear
def get_state_vect_cols(prefix=''):
if prefix:
prefix += '_'
vectors = ['r', 'v']
components = ['x', 'y', 'z']
col_names = [f'{prefix}{v}_{c}' for v, c in product(vectors, components)]
return col_names
# %%
df = pd.read_parquet("traindata/physics_preds.parquet")
test_set = df[df['aso_id'] == "05277"]
train_set = df.groupby('aso_id').apply(lambda x: x.head(x.count()[0] - 3))
print(df.count()[0], train_set.count()[0], test_set.count()[0])
test_set
# %%
from sklearn.model_selection import train_test_split
feature_cols = [
'elapsed_seconds'
] + get_state_vect_cols('physics_pred') + get_state_vect_cols('start')
print(feature_cols)
# The target values are the errors between the physical model predictions
# and the ground truth observations
target_cols = get_state_vect_cols('physics_err')
print(target_cols)
# Create feature and target matrices
X = df[feature_cols]
y = df[target_cols]
data_keys = ['X_train', 'X_test', 'y_train', 'y_test']
data_vals = train_test_split(X, y, test_size=0.2)
train_test_data = dict(zip(data_keys, data_vals))
# %%
import torch
import torch.nn as nn
TestNet2 = lambda: nn.Sequential(
nn.Linear(13, 32),
nn.LeakyReLU(),
nn.Linear(32, 16),
nn.Sigmoid(),
nn.Linear(16, 1),
)
nets = {}
X_train = torch.tensor(train_test_data["X_train"].values)
y_train = train_test_data["y_train"]
for target_col in y_train.columns:
y1 = torch.tensor(y_train[target_col].values).reshape(-1, 1)
print(X.shape, y1.shape)
net = TestNet2().double()
opti = torch.optim.SGD(net.parameters(), lr=0.04)
loss_func = nn.MSELoss()
for t in range(10000):
pred = net(X_train)
loss = loss_func(pred, y1)
if t % 1000 == 0:
print(f'Epoch {t}, loss {loss}')
opti.zero_grad()
loss.backward()
opti.step()
nets[target_col] = net
print(target_col)
break
# %%
X, ys = train_test_data['X_test'], train_test_data['y_test']
evals = []
from sklearn import metrics
with torch.no_grad():
for target_col, net in nets.items():
y_hat = net(torch.tensor(X.values)) # fake
y_hat = y_hat.detach().numpy()
y = ys[target_col] # real
print(y)
print(y_hat)
rmse = metrics.mean_squared_error(y, y_hat, squared=False)
r2 = metrics.r2_score(y, y_hat)
eval_dict = {'Error': target_col, 'RMSE': rmse, 'R^2': r2}
evals.append(eval_dict)
print(pd.DataFrame(evals))
# %%