ssa_everyone/nn_TR/FNN.py

#数据预处理
import pandas as pd
from itertools import product
from torch.utils.data import DataLoader
from  sklearn.metrics import r2_score
from sklearn.utils import shuffle

def get_state_vect_cols(prefix=''):
    if prefix:
        prefix += '_'
    vectors = ['r', 'v']
    components = ['x', 'y', 'z']
    col_names = [f'{prefix}{v}_{c}' for v, c in product(vectors, components)]
    return col_names

pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', 1000)

# %%
df = pd.read_parquet("physics_preds.parquet")
test_set = df[df['aso_id'] == "05277"]

train_set = df.groupby('aso_id').apply(lambda x: x.head(x.count()[0] - 3))
print(df.count()[0], train_set.count()[0], test_set.count()[0])
data_count = df.count()[0]

# %%
feature_cols = [
    'elapsed_seconds'
] + get_state_vect_cols('physics_pred') + get_state_vect_cols('start')
print(feature_cols)
# The target values are the errors between the physical model predictions
# and the ground truth observations
target_cols = get_state_vect_cols('physics_err')
print(target_cols)

print("样本统一归一化处理")
all_cols = feature_cols + target_cols
df = df[all_cols]
df = shuffle(df)
df = (df - df.mean(axis=0)) / df.std(axis=0)
# Create feature and target matrices
X = df[feature_cols]
y_all = df[target_cols]
# y = df['physics_err_v_x']
# print(y)
# %%


#FNN神经网络

import torch
import torch.nn as nn
import torchvision.transforms as transforms
import torchvision.datasets as dsets

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

class Net(nn.Module):

    def __init__(self, features):
        super(Net, self).__init__()

        self.linear_relu1 = nn.Linear(features, 128)
        self.linear_relu2 = nn.Linear(128, 256)
        self.linear_relu3 = nn.Linear(256, 256)
        self.linear_relu4 = nn.Linear(256, 256)
        self.linear5 = nn.Linear(256, 1)

    def forward(self, x):
        y_pred = self.linear_relu1(x)
        y_pred = nn.functional.relu(y_pred)

        y_pred = self.linear_relu2(y_pred)
        y_pred = nn.functional.relu(y_pred)

        y_pred = self.linear_relu3(y_pred)
        y_pred = nn.functional.relu(y_pred)

        y_pred = self.linear_relu4(y_pred)
        y_pred = nn.functional.relu(y_pred)

        y_pred = self.linear5(y_pred)
        return y_pred

for target_col in y_all.columns:
    y = y_all[target_col]
    train_size = int(data_count * 0.80)
    # 训练数据集特征
    train_features = torch.tensor(X[:train_size].values, dtype=torch.float)
    train_features = train_features.to(device)
    # 训练数据集目标
    train_labels = torch.tensor(y[:train_size].values, dtype=torch.float).view(-1, 1)
    train_labels = train_labels.to(device)
    # print("train data size: ", train_features.shape)
    # print("label data size: ", train_labels.shape)
    # print("test data size: ", test_features.shape)

    model = Net(features=train_features.shape[1])
    model = model.to(device)
    model.train()

    criterion = nn.MSELoss(reduction='mean')
    criterion = criterion.to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)

    losses = []
    epoch = 5000
    epoch_real = 0
    for t in range(epoch):
        y_pred = model(train_features)
        y_pred.to(device)

        loss = criterion(y_pred, train_labels)
        losses.append(loss.item())

        if torch.isnan(loss):
            break

        optimizer.zero_grad()

        loss.backward()

        optimizer.step()
        epoch_real = epoch_real + 1

    print(f"epoch:{epoch_real} loss:{losses[-1]} target:{target_col}")
    torch.save(model.state_dict(), './{}_FNN.pth'.format(target_col))

evals = []
for target_col in y_all.columns:
    y = y_all[target_col]
    # 测试数据集特征
    test_features = torch.tensor(X[train_size:].values, dtype=torch.float)
    test_features = test_features.to(device)
    model.load_state_dict(torch.load('./{}_FNN.pth'.format(target_col), map_location=lambda storage, loc: storage))
    model.to(device)
    pred_labels = model(test_features)
    pred_labels = pred_labels.cpu().data.numpy()
    real_labels = y[train_size:].values
    r2 = r2_score(real_labels, pred_labels)
    eval_dict = {'Error': target_col, 'R^2': r2}
    evals.append(eval_dict)
    # print(r2)
print(pd.DataFrame(evals))


# model_sequential = nn.Sequential(
#     nn.Linear(test_features.shape[1], 128),
#     nn.ReLU(),
#     nn.Linear(128, 256),
#     nn.ReLU(),
#     nn.Linear(256, 256),
#     nn.ReLU(),
#     nn.Linear(256, 256),
#     nn.ReLU(),
#     nn.Linear(256, 1)
# )


#
# '''
# STEP 1: LOADING DATASET
# '''
#
# train_dataset = dsets.MNIST(root='./data',
#                             train=True,
#                             transform=transforms.ToTensor(),
#                             download=True)
#
# test_dataset = dsets.MNIST(root='./data',
#                            train=False,
#                            transform=transforms.ToTensor())
#
# '''
# STEP 2: MAKING DATASET ITERABLE
# '''
#
# batch_size = 100
# n_iters = 3000
# num_epochs = n_iters / (len(train_dataset) / batch_size)
# num_epochs = int(num_epochs)
#
# train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
#                                            batch_size=batch_size,
#                                            shuffle=True)
#
# test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
#                                           batch_size=batch_size,
#                                           shuffle=False)
#
# '''
# STEP 3: CREATE MODEL CLASS
# '''
# class FeedforwardNeuralNetModel(nn.Module):
#     def __init__(self, input_dim, hidden_dim, output_dim):
#         super(FeedforwardNeuralNetModel, self).__init__()
#         # Linear function 1: 784 --> 100
#         self.fc1 = nn.Linear(input_dim, hidden_dim)
#         # Non-linearity 1
#         self.relu1 = nn.ReLU()
#
#         # Linear function 2: 100 --> 100
#         self.fc2 = nn.Linear(hidden_dim, hidden_dim)
#         # Non-linearity 2
#         self.relu2 = nn.ReLU()
#
#         # Linear function 3: 100 --> 100
#         self.fc3 = nn.Linear(hidden_dim, hidden_dim)
#         # Non-linearity 3
#         self.relu3 = nn.ReLU()
#
#         # Linear function 4 (readout): 100 --> 10
#         self.fc4 = nn.Linear(hidden_dim, output_dim)
#
#     def forward(self, x):
#         # Linear function 1
#         out = self.fc1(x)
#         # Non-linearity 1
#         out = self.relu1(out)
#
#         # Linear function 2
#         out = self.fc2(out)
#         # Non-linearity 2
#         out = self.relu2(out)
#
#         # Linear function 2
#         out = self.fc3(out)
#         # Non-linearity 2
#         out = self.relu3(out)
#
#         # Linear function 4 (readout)
#         out = self.fc4(out)
#         return out
# '''
# STEP 4: INSTANTIATE MODEL CLASS
# '''
# input_dim = 28*28
# hidden_dim = 100
# output_dim = 10
#
# model = FeedforwardNeuralNetModel(input_dim, hidden_dim, output_dim)
#
# #######################
# #  USE GPU FOR MODEL  #
# #######################
#
# device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
# model.to(device)
#
# '''
# STEP 5: INSTANTIATE LOSS CLASS
# '''
# criterion = nn.CrossEntropyLoss()
#
#
# '''
# STEP 6: INSTANTIATE OPTIMIZER CLASS
# '''
# learning_rate = 0.1
#
# optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
#
# '''
# STEP 7: TRAIN THE MODEL
# '''
# iter = 0
# for epoch in range(num_epochs):
#     for i, (images, labels) in enumerate(train_loader):
#
#         #######################
#         #  USE GPU FOR MODEL  #
#         #######################
#         images = images.view(-1, 28*28).requires_grad_().to(device)
#         labels = labels.to(device)
#
#         # Clear gradients w.r.t. parameters
#         optimizer.zero_grad()
#
#         # Forward pass to get output/logits
#         outputs = model(images)
#
#         # Calculate Loss: softmax --> cross entropy loss
#         loss = criterion(outputs, labels)
#
#         # Getting gradients w.r.t. parameters
#         loss.backward()
#
#         # Updating parameters
#         optimizer.step()
#
#         iter += 1
#
#         if iter % 500 == 0:
#             # Calculate Accuracy
#             correct = 0
#             total = 0
#             # Iterate through test dataset
#             for images, labels in test_loader:
#                 #######################
#                 #  USE GPU FOR MODEL  #
#                 #######################
#                 images = images.view(-1, 28*28).requires_grad_().to(device)
#
#                 # Forward pass only to get logits/output
#                 outputs = model(images)
#
#                 # Get predictions from the maximum value
#                 _, predicted = torch.max(outputs.data, 1)
#
#                 # Total number of labels
#                 total += labels.size(0)
#
#                 #######################
#                 #  USE GPU FOR MODEL  #
#                 #######################
#                 # Total correct predictions
#                 if torch.cuda.is_available():
#                     correct += (predicted.cpu() == labels.cpu()).sum()
#                 else:
#                     correct += (predicted == labels).sum()
#
#             accuracy = 100 * correct / total
#
#             # Print Loss
#             print('Iteration: {}. Loss: {}. Accuracy: {}'.format(iter, loss.item(), accuracy))