|
|
|
|
@ -1,13 +1,8 @@
|
|
|
|
|
# %%
|
|
|
|
|
from typing import Union
|
|
|
|
|
import pandas as pd
|
|
|
|
|
|
|
|
|
|
# %%
|
|
|
|
|
from itertools import product
|
|
|
|
|
|
|
|
|
|
from torch.nn.modules.activation import ReLU
|
|
|
|
|
from torch.nn.modules.linear import Linear
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_state_vect_cols(prefix=''):
|
|
|
|
|
if prefix:
|
|
|
|
|
@ -24,7 +19,6 @@ test_set = df[df['aso_id'] == "05277"]
|
|
|
|
|
|
|
|
|
|
train_set = df.groupby('aso_id').apply(lambda x: x.head(x.count()[0] - 3))
|
|
|
|
|
print(df.count()[0], train_set.count()[0], test_set.count()[0])
|
|
|
|
|
test_set
|
|
|
|
|
|
|
|
|
|
# %%
|
|
|
|
|
from sklearn.model_selection import train_test_split
|
|
|
|
|
@ -47,40 +41,60 @@ train_test_data = dict(zip(data_keys, data_vals))
|
|
|
|
|
# %%
|
|
|
|
|
import torch
|
|
|
|
|
import torch.nn as nn
|
|
|
|
|
from torch.utils.data import TensorDataset, DataLoader
|
|
|
|
|
from sklearn import metrics
|
|
|
|
|
|
|
|
|
|
TestNet2 = lambda: nn.Sequential(
|
|
|
|
|
nn.Linear(13, 32),
|
|
|
|
|
nn.Linear(13, 64),
|
|
|
|
|
nn.ReLU6(),
|
|
|
|
|
nn.Linear(64, 64),
|
|
|
|
|
nn.LeakyReLU(),
|
|
|
|
|
nn.Linear(32, 16),
|
|
|
|
|
nn.Sigmoid(),
|
|
|
|
|
nn.Linear(16, 1),
|
|
|
|
|
nn.Linear(64, 1),
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
nets = {}
|
|
|
|
|
|
|
|
|
|
X_train = torch.tensor(train_test_data["X_train"].values)
|
|
|
|
|
X_train = torch.tensor(train_test_data["X_train"].values,
|
|
|
|
|
dtype=torch.float32).cuda()
|
|
|
|
|
y_train = train_test_data["y_train"]
|
|
|
|
|
X_test = torch.tensor(train_test_data['X_test'].values,
|
|
|
|
|
dtype=torch.float32).cuda()
|
|
|
|
|
y_test = train_test_data['y_test']
|
|
|
|
|
r2s = []
|
|
|
|
|
for target_col in y_train.columns:
|
|
|
|
|
y1 = torch.tensor(y_train[target_col].values).reshape(-1, 1)
|
|
|
|
|
y1 = torch.tensor(y_train[target_col].values,
|
|
|
|
|
dtype=torch.float32).reshape(-1, 1).cuda()
|
|
|
|
|
print(X.shape, y1.shape)
|
|
|
|
|
net = TestNet2().double()
|
|
|
|
|
opti = torch.optim.SGD(net.parameters(), lr=0.04)
|
|
|
|
|
net = TestNet2().cuda()
|
|
|
|
|
opti = torch.optim.SGD(net.parameters(), lr=0.02)
|
|
|
|
|
loss_func = nn.MSELoss()
|
|
|
|
|
train_dataloader = DataLoader(TensorDataset(X_train, y1), batch_size=320)
|
|
|
|
|
for t in range(10000):
|
|
|
|
|
pred = net(X_train)
|
|
|
|
|
loss = loss_func(pred, y1)
|
|
|
|
|
if t % 1000 == 0:
|
|
|
|
|
print(f'Epoch {t}, loss {loss}')
|
|
|
|
|
opti.zero_grad()
|
|
|
|
|
loss.backward()
|
|
|
|
|
opti.step()
|
|
|
|
|
for batch, (x, y) in enumerate(train_dataloader):
|
|
|
|
|
pred = net(x)
|
|
|
|
|
loss = loss_func(pred, y)
|
|
|
|
|
opti.zero_grad()
|
|
|
|
|
torch.sqrt(loss).backward()
|
|
|
|
|
opti.step()
|
|
|
|
|
with torch.no_grad():
|
|
|
|
|
y = y_test[target_col]
|
|
|
|
|
y_hat = net(X_test).cpu().numpy()
|
|
|
|
|
rmse = metrics.mean_squared_error(y, y_hat, squared=False)
|
|
|
|
|
r2 = metrics.r2_score(y, y_hat)
|
|
|
|
|
r2s.append(r2)
|
|
|
|
|
print(f"Epoch {t}: r2={r2}, rmse={rmse}")
|
|
|
|
|
nets[target_col] = net
|
|
|
|
|
print(target_col)
|
|
|
|
|
break
|
|
|
|
|
|
|
|
|
|
import matplotlib.pyplot as plt
|
|
|
|
|
|
|
|
|
|
plt.plot(r2s)
|
|
|
|
|
plt.show()
|
|
|
|
|
|
|
|
|
|
# %%
|
|
|
|
|
X, ys = train_test_data['X_test'], train_test_data['y_test']
|
|
|
|
|
evals = []
|
|
|
|
|
from sklearn import metrics
|
|
|
|
|
with torch.no_grad():
|
|
|
|
|
for target_col, net in nets.items():
|
|
|
|
|
y_hat = net(torch.tensor(X.values)) # fake
|
|
|
|
|
|