You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

343 lines
9.6 KiB
Python

#数据预处理
import pandas as pd
from itertools import product
from torch.utils.data import DataLoader
from sklearn.metrics import r2_score
from sklearn.utils import shuffle
def get_state_vect_cols(prefix=''):
if prefix:
prefix += '_'
vectors = ['r', 'v']
components = ['x', 'y', 'z']
col_names = [f'{prefix}{v}_{c}' for v, c in product(vectors, components)]
return col_names
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', 1000)
# %%
df = pd.read_parquet("physics_preds.parquet")
test_set = df[df['aso_id'] == "05277"]
train_set = df.groupby('aso_id').apply(lambda x: x.head(x.count()[0] - 3))
print(df.count()[0], train_set.count()[0], test_set.count()[0])
data_count = df.count()[0]
# %%
feature_cols = [
'elapsed_seconds'
] + get_state_vect_cols('physics_pred') + get_state_vect_cols('start')
print(feature_cols)
# The target values are the errors between the physical model predictions
# and the ground truth observations
target_cols = get_state_vect_cols('physics_err')
print(target_cols)
print("样本统一归一化处理")
all_cols = feature_cols + target_cols
df = df[all_cols]
df = shuffle(df)
df = (df - df.mean(axis=0)) / df.std(axis=0)
# Create feature and target matrices
X = df[feature_cols]
y_all = df[target_cols]
# y = df['physics_err_v_x']
# print(y)
# %%
#FNN神经网络
import torch
import torch.nn as nn
import torchvision.transforms as transforms
import torchvision.datasets as dsets
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
class Net(nn.Module):
def __init__(self, features):
super(Net, self).__init__()
self.linear_relu1 = nn.Linear(features, 128)
self.linear_relu2 = nn.Linear(128, 256)
self.linear_relu3 = nn.Linear(256, 256)
self.linear_relu4 = nn.Linear(256, 256)
self.linear5 = nn.Linear(256, 1)
def forward(self, x):
y_pred = self.linear_relu1(x)
y_pred = nn.functional.relu(y_pred)
y_pred = self.linear_relu2(y_pred)
y_pred = nn.functional.relu(y_pred)
y_pred = self.linear_relu3(y_pred)
y_pred = nn.functional.relu(y_pred)
y_pred = self.linear_relu4(y_pred)
y_pred = nn.functional.relu(y_pred)
y_pred = self.linear5(y_pred)
return y_pred
for target_col in y_all.columns:
y = y_all[target_col]
train_size = int(data_count * 0.80)
# 训练数据集特征
train_features = torch.tensor(X[:train_size].values, dtype=torch.float)
train_features = train_features.to(device)
# 训练数据集目标
train_labels = torch.tensor(y[:train_size].values, dtype=torch.float).view(-1, 1)
train_labels = train_labels.to(device)
# print("train data size: ", train_features.shape)
# print("label data size: ", train_labels.shape)
# print("test data size: ", test_features.shape)
model = Net(features=train_features.shape[1])
model = model.to(device)
model.train()
criterion = nn.MSELoss(reduction='mean')
criterion = criterion.to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
losses = []
epoch = 5000
epoch_real = 0
for t in range(epoch):
y_pred = model(train_features)
y_pred.to(device)
loss = criterion(y_pred, train_labels)
losses.append(loss.item())
if torch.isnan(loss):
break
optimizer.zero_grad()
loss.backward()
optimizer.step()
epoch_real = epoch_real + 1
print(f"epoch:{epoch_real} loss:{losses[-1]} target:{target_col}")
torch.save(model.state_dict(), './{}_FNN.pth'.format(target_col))
evals = []
for target_col in y_all.columns:
y = y_all[target_col]
# 测试数据集特征
test_features = torch.tensor(X[train_size:].values, dtype=torch.float)
test_features = test_features.to(device)
model.load_state_dict(torch.load('./{}_FNN.pth'.format(target_col), map_location=lambda storage, loc: storage))
model.to(device)
pred_labels = model(test_features)
pred_labels = pred_labels.cpu().data.numpy()
real_labels = y[train_size:].values
r2 = r2_score(real_labels, pred_labels)
eval_dict = {'Error': target_col, 'R^2': r2}
evals.append(eval_dict)
# print(r2)
print(pd.DataFrame(evals))
# model_sequential = nn.Sequential(
# nn.Linear(test_features.shape[1], 128),
# nn.ReLU(),
# nn.Linear(128, 256),
# nn.ReLU(),
# nn.Linear(256, 256),
# nn.ReLU(),
# nn.Linear(256, 256),
# nn.ReLU(),
# nn.Linear(256, 1)
# )
#
# '''
# STEP 1: LOADING DATASET
# '''
#
# train_dataset = dsets.MNIST(root='./data',
# train=True,
# transform=transforms.ToTensor(),
# download=True)
#
# test_dataset = dsets.MNIST(root='./data',
# train=False,
# transform=transforms.ToTensor())
#
# '''
# STEP 2: MAKING DATASET ITERABLE
# '''
#
# batch_size = 100
# n_iters = 3000
# num_epochs = n_iters / (len(train_dataset) / batch_size)
# num_epochs = int(num_epochs)
#
# train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
# batch_size=batch_size,
# shuffle=True)
#
# test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
# batch_size=batch_size,
# shuffle=False)
#
# '''
# STEP 3: CREATE MODEL CLASS
# '''
# class FeedforwardNeuralNetModel(nn.Module):
# def __init__(self, input_dim, hidden_dim, output_dim):
# super(FeedforwardNeuralNetModel, self).__init__()
# # Linear function 1: 784 --> 100
# self.fc1 = nn.Linear(input_dim, hidden_dim)
# # Non-linearity 1
# self.relu1 = nn.ReLU()
#
# # Linear function 2: 100 --> 100
# self.fc2 = nn.Linear(hidden_dim, hidden_dim)
# # Non-linearity 2
# self.relu2 = nn.ReLU()
#
# # Linear function 3: 100 --> 100
# self.fc3 = nn.Linear(hidden_dim, hidden_dim)
# # Non-linearity 3
# self.relu3 = nn.ReLU()
#
# # Linear function 4 (readout): 100 --> 10
# self.fc4 = nn.Linear(hidden_dim, output_dim)
#
# def forward(self, x):
# # Linear function 1
# out = self.fc1(x)
# # Non-linearity 1
# out = self.relu1(out)
#
# # Linear function 2
# out = self.fc2(out)
# # Non-linearity 2
# out = self.relu2(out)
#
# # Linear function 2
# out = self.fc3(out)
# # Non-linearity 2
# out = self.relu3(out)
#
# # Linear function 4 (readout)
# out = self.fc4(out)
# return out
# '''
# STEP 4: INSTANTIATE MODEL CLASS
# '''
# input_dim = 28*28
# hidden_dim = 100
# output_dim = 10
#
# model = FeedforwardNeuralNetModel(input_dim, hidden_dim, output_dim)
#
# #######################
# # USE GPU FOR MODEL #
# #######################
#
# device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
# model.to(device)
#
# '''
# STEP 5: INSTANTIATE LOSS CLASS
# '''
# criterion = nn.CrossEntropyLoss()
#
#
# '''
# STEP 6: INSTANTIATE OPTIMIZER CLASS
# '''
# learning_rate = 0.1
#
# optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
#
# '''
# STEP 7: TRAIN THE MODEL
# '''
# iter = 0
# for epoch in range(num_epochs):
# for i, (images, labels) in enumerate(train_loader):
#
# #######################
# # USE GPU FOR MODEL #
# #######################
# images = images.view(-1, 28*28).requires_grad_().to(device)
# labels = labels.to(device)
#
# # Clear gradients w.r.t. parameters
# optimizer.zero_grad()
#
# # Forward pass to get output/logits
# outputs = model(images)
#
# # Calculate Loss: softmax --> cross entropy loss
# loss = criterion(outputs, labels)
#
# # Getting gradients w.r.t. parameters
# loss.backward()
#
# # Updating parameters
# optimizer.step()
#
# iter += 1
#
# if iter % 500 == 0:
# # Calculate Accuracy
# correct = 0
# total = 0
# # Iterate through test dataset
# for images, labels in test_loader:
# #######################
# # USE GPU FOR MODEL #
# #######################
# images = images.view(-1, 28*28).requires_grad_().to(device)
#
# # Forward pass only to get logits/output
# outputs = model(images)
#
# # Get predictions from the maximum value
# _, predicted = torch.max(outputs.data, 1)
#
# # Total number of labels
# total += labels.size(0)
#
# #######################
# # USE GPU FOR MODEL #
# #######################
# # Total correct predictions
# if torch.cuda.is_available():
# correct += (predicted.cpu() == labels.cpu()).sum()
# else:
# correct += (predicted == labels).sum()
#
# accuracy = 100 * correct / total
#
# # Print Loss
# print('Iteration: {}. Loss: {}. Accuracy: {}'.format(iter, loss.item(), accuracy))