# %% import pandas as pd from itertools import product def get_state_vect_cols(prefix=''): if prefix: prefix += '_' vectors = ['r', 'v'] components = ['x', 'y', 'z'] col_names = [f'{prefix}{v}_{c}' for v, c in product(vectors, components)] return col_names # %% df = pd.read_parquet("traindata/physics_preds.parquet") test_set = df[df['aso_id'] == "05277"] train_set = df.groupby('aso_id').apply(lambda x: x.head(x.count()[0] - 3)) print(df.count()[0], train_set.count()[0], test_set.count()[0]) # %% from sklearn.model_selection import train_test_split feature_cols = [ 'elapsed_seconds' ] + get_state_vect_cols('physics_pred') + get_state_vect_cols('start') print(feature_cols) # The target values are the errors between the physical model predictions # and the ground truth observations target_cols = get_state_vect_cols('physics_err') print(target_cols) # Create feature and target matrices X = df[feature_cols] y = df[target_cols] data_keys = ['X_train', 'X_test', 'y_train', 'y_test'] data_vals = train_test_split(X, y, test_size=0.2) train_test_data = dict(zip(data_keys, data_vals)) # %% import torch import torch.nn as nn from torch.utils.data import TensorDataset, DataLoader from sklearn import metrics TestNet2 = lambda: nn.Sequential( nn.Linear(13, 64), nn.ReLU6(), nn.Linear(64, 64), nn.LeakyReLU(), nn.Linear(64, 1), ) nets = {} X_train = torch.tensor(train_test_data["X_train"].values, dtype=torch.float32).cuda() y_train = train_test_data["y_train"] X_test = torch.tensor(train_test_data['X_test'].values, dtype=torch.float32).cuda() y_test = train_test_data['y_test'] r2s = [] for target_col in y_train.columns: y1 = torch.tensor(y_train[target_col].values, dtype=torch.float32).reshape(-1, 1).cuda() print(X.shape, y1.shape) net = TestNet2().cuda() opti = torch.optim.SGD(net.parameters(), lr=0.02) loss_func = nn.MSELoss() train_dataloader = DataLoader(TensorDataset(X_train, y1), batch_size=320) for t in range(10000): for batch, (x, y) in enumerate(train_dataloader): pred = net(x) loss = loss_func(pred, y) opti.zero_grad() torch.sqrt(loss).backward() opti.step() with torch.no_grad(): y = y_test[target_col] y_hat = net(X_test).cpu().numpy() rmse = metrics.mean_squared_error(y, y_hat, squared=False) r2 = metrics.r2_score(y, y_hat) r2s.append(r2) print(f"Epoch {t}: r2={r2}, rmse={rmse}") nets[target_col] = net print(target_col) break import matplotlib.pyplot as plt plt.plot(r2s) plt.show() # %% X, ys = train_test_data['X_test'], train_test_data['y_test'] evals = [] with torch.no_grad(): for target_col, net in nets.items(): y_hat = net(torch.tensor(X.values)) # fake y_hat = y_hat.detach().numpy() y = ys[target_col] # real print(y) print(y_hat) rmse = metrics.mean_squared_error(y, y_hat, squared=False) r2 = metrics.r2_score(y, y_hat) eval_dict = {'Error': target_col, 'RMSE': rmse, 'R^2': r2} evals.append(eval_dict) print(pd.DataFrame(evals)) # %%