#数据预处理 import pandas as pd from itertools import product from torch.utils.data import DataLoader from sklearn.metrics import r2_score from sklearn.utils import shuffle def get_state_vect_cols(prefix=''): if prefix: prefix += '_' vectors = ['r', 'v'] components = ['x', 'y', 'z'] col_names = [f'{prefix}{v}_{c}' for v, c in product(vectors, components)] return col_names pd.set_option('display.max_rows', None) pd.set_option('display.max_columns', None) pd.set_option('display.width', 1000) # %% df = pd.read_parquet("physics_preds.parquet") test_set = df[df['aso_id'] == "05277"] train_set = df.groupby('aso_id').apply(lambda x: x.head(x.count()[0] - 3)) print(df.count()[0], train_set.count()[0], test_set.count()[0]) data_count = df.count()[0] # %% feature_cols = [ 'elapsed_seconds' ] + get_state_vect_cols('physics_pred') + get_state_vect_cols('start') print(feature_cols) # The target values are the errors between the physical model predictions # and the ground truth observations target_cols = get_state_vect_cols('physics_err') print(target_cols) print("样本统一归一化处理") all_cols = feature_cols + target_cols df = df[all_cols] df = shuffle(df) df = (df - df.mean(axis=0)) / df.std(axis=0) # Create feature and target matrices X = df[feature_cols] y_all = df[target_cols] # y = df['physics_err_v_x'] # print(y) # %% #FNN神经网络 import torch import torch.nn as nn import torchvision.transforms as transforms import torchvision.datasets as dsets device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") class Net(nn.Module): def __init__(self, features): super(Net, self).__init__() self.linear_relu1 = nn.Linear(features, 128) self.linear_relu2 = nn.Linear(128, 256) self.linear_relu3 = nn.Linear(256, 256) self.linear_relu4 = nn.Linear(256, 256) self.linear5 = nn.Linear(256, 1) def forward(self, x): y_pred = self.linear_relu1(x) y_pred = nn.functional.relu(y_pred) y_pred = self.linear_relu2(y_pred) y_pred = nn.functional.relu(y_pred) y_pred = self.linear_relu3(y_pred) y_pred = nn.functional.relu(y_pred) y_pred = self.linear_relu4(y_pred) y_pred = nn.functional.relu(y_pred) y_pred = self.linear5(y_pred) return y_pred for target_col in y_all.columns: y = y_all[target_col] train_size = int(data_count * 0.80) # 训练数据集特征 train_features = torch.tensor(X[:train_size].values, dtype=torch.float) train_features = train_features.to(device) # 训练数据集目标 train_labels = torch.tensor(y[:train_size].values, dtype=torch.float).view(-1, 1) train_labels = train_labels.to(device) # print("train data size: ", train_features.shape) # print("label data size: ", train_labels.shape) # print("test data size: ", test_features.shape) model = Net(features=train_features.shape[1]) model = model.to(device) model.train() criterion = nn.MSELoss(reduction='mean') criterion = criterion.to(device) optimizer = torch.optim.Adam(model.parameters(), lr=1e-4) losses = [] epoch = 5000 epoch_real = 0 for t in range(epoch): y_pred = model(train_features) y_pred.to(device) loss = criterion(y_pred, train_labels) losses.append(loss.item()) if torch.isnan(loss): break optimizer.zero_grad() loss.backward() optimizer.step() epoch_real = epoch_real + 1 print(f"epoch:{epoch_real} loss:{losses[-1]} target:{target_col}") torch.save(model.state_dict(), './{}_FNN.pth'.format(target_col)) evals = [] for target_col in y_all.columns: y = y_all[target_col] # 测试数据集特征 test_features = torch.tensor(X[train_size:].values, dtype=torch.float) test_features = test_features.to(device) model.load_state_dict(torch.load('./{}_FNN.pth'.format(target_col), map_location=lambda storage, loc: storage)) model.to(device) pred_labels = model(test_features) pred_labels = pred_labels.cpu().data.numpy() real_labels = y[train_size:].values r2 = r2_score(real_labels, pred_labels) eval_dict = {'Error': target_col, 'R^2': r2} evals.append(eval_dict) # print(r2) print(pd.DataFrame(evals)) # model_sequential = nn.Sequential( # nn.Linear(test_features.shape[1], 128), # nn.ReLU(), # nn.Linear(128, 256), # nn.ReLU(), # nn.Linear(256, 256), # nn.ReLU(), # nn.Linear(256, 256), # nn.ReLU(), # nn.Linear(256, 1) # ) # # ''' # STEP 1: LOADING DATASET # ''' # # train_dataset = dsets.MNIST(root='./data', # train=True, # transform=transforms.ToTensor(), # download=True) # # test_dataset = dsets.MNIST(root='./data', # train=False, # transform=transforms.ToTensor()) # # ''' # STEP 2: MAKING DATASET ITERABLE # ''' # # batch_size = 100 # n_iters = 3000 # num_epochs = n_iters / (len(train_dataset) / batch_size) # num_epochs = int(num_epochs) # # train_loader = torch.utils.data.DataLoader(dataset=train_dataset, # batch_size=batch_size, # shuffle=True) # # test_loader = torch.utils.data.DataLoader(dataset=test_dataset, # batch_size=batch_size, # shuffle=False) # # ''' # STEP 3: CREATE MODEL CLASS # ''' # class FeedforwardNeuralNetModel(nn.Module): # def __init__(self, input_dim, hidden_dim, output_dim): # super(FeedforwardNeuralNetModel, self).__init__() # # Linear function 1: 784 --> 100 # self.fc1 = nn.Linear(input_dim, hidden_dim) # # Non-linearity 1 # self.relu1 = nn.ReLU() # # # Linear function 2: 100 --> 100 # self.fc2 = nn.Linear(hidden_dim, hidden_dim) # # Non-linearity 2 # self.relu2 = nn.ReLU() # # # Linear function 3: 100 --> 100 # self.fc3 = nn.Linear(hidden_dim, hidden_dim) # # Non-linearity 3 # self.relu3 = nn.ReLU() # # # Linear function 4 (readout): 100 --> 10 # self.fc4 = nn.Linear(hidden_dim, output_dim) # # def forward(self, x): # # Linear function 1 # out = self.fc1(x) # # Non-linearity 1 # out = self.relu1(out) # # # Linear function 2 # out = self.fc2(out) # # Non-linearity 2 # out = self.relu2(out) # # # Linear function 2 # out = self.fc3(out) # # Non-linearity 2 # out = self.relu3(out) # # # Linear function 4 (readout) # out = self.fc4(out) # return out # ''' # STEP 4: INSTANTIATE MODEL CLASS # ''' # input_dim = 28*28 # hidden_dim = 100 # output_dim = 10 # # model = FeedforwardNeuralNetModel(input_dim, hidden_dim, output_dim) # # ####################### # # USE GPU FOR MODEL # # ####################### # # device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # model.to(device) # # ''' # STEP 5: INSTANTIATE LOSS CLASS # ''' # criterion = nn.CrossEntropyLoss() # # # ''' # STEP 6: INSTANTIATE OPTIMIZER CLASS # ''' # learning_rate = 0.1 # # optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate) # # ''' # STEP 7: TRAIN THE MODEL # ''' # iter = 0 # for epoch in range(num_epochs): # for i, (images, labels) in enumerate(train_loader): # # ####################### # # USE GPU FOR MODEL # # ####################### # images = images.view(-1, 28*28).requires_grad_().to(device) # labels = labels.to(device) # # # Clear gradients w.r.t. parameters # optimizer.zero_grad() # # # Forward pass to get output/logits # outputs = model(images) # # # Calculate Loss: softmax --> cross entropy loss # loss = criterion(outputs, labels) # # # Getting gradients w.r.t. parameters # loss.backward() # # # Updating parameters # optimizer.step() # # iter += 1 # # if iter % 500 == 0: # # Calculate Accuracy # correct = 0 # total = 0 # # Iterate through test dataset # for images, labels in test_loader: # ####################### # # USE GPU FOR MODEL # # ####################### # images = images.view(-1, 28*28).requires_grad_().to(device) # # # Forward pass only to get logits/output # outputs = model(images) # # # Get predictions from the maximum value # _, predicted = torch.max(outputs.data, 1) # # # Total number of labels # total += labels.size(0) # # ####################### # # USE GPU FOR MODEL # # ####################### # # Total correct predictions # if torch.cuda.is_available(): # correct += (predicted.cpu() == labels.cpu()).sum() # else: # correct += (predicted == labels).sum() # # accuracy = 100 * correct / total # # # Print Loss # print('Iteration: {}. Loss: {}. Accuracy: {}'.format(iter, loss.item(), accuracy))