You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
343 lines
9.6 KiB
Python
343 lines
9.6 KiB
Python
#数据预处理
|
|
import pandas as pd
|
|
from itertools import product
|
|
from torch.utils.data import DataLoader
|
|
from sklearn.metrics import r2_score
|
|
from sklearn.utils import shuffle
|
|
|
|
def get_state_vect_cols(prefix=''):
|
|
if prefix:
|
|
prefix += '_'
|
|
vectors = ['r', 'v']
|
|
components = ['x', 'y', 'z']
|
|
col_names = [f'{prefix}{v}_{c}' for v, c in product(vectors, components)]
|
|
return col_names
|
|
|
|
pd.set_option('display.max_rows', None)
|
|
pd.set_option('display.max_columns', None)
|
|
pd.set_option('display.width', 1000)
|
|
|
|
# %%
|
|
df = pd.read_parquet("physics_preds.parquet")
|
|
test_set = df[df['aso_id'] == "05277"]
|
|
|
|
train_set = df.groupby('aso_id').apply(lambda x: x.head(x.count()[0] - 3))
|
|
print(df.count()[0], train_set.count()[0], test_set.count()[0])
|
|
data_count = df.count()[0]
|
|
|
|
# %%
|
|
feature_cols = [
|
|
'elapsed_seconds'
|
|
] + get_state_vect_cols('physics_pred') + get_state_vect_cols('start')
|
|
print(feature_cols)
|
|
# The target values are the errors between the physical model predictions
|
|
# and the ground truth observations
|
|
target_cols = get_state_vect_cols('physics_err')
|
|
print(target_cols)
|
|
|
|
print("样本统一归一化处理")
|
|
all_cols = feature_cols + target_cols
|
|
df = df[all_cols]
|
|
df = shuffle(df)
|
|
df = (df - df.mean(axis=0)) / df.std(axis=0)
|
|
# Create feature and target matrices
|
|
X = df[feature_cols]
|
|
y_all = df[target_cols]
|
|
# y = df['physics_err_v_x']
|
|
# print(y)
|
|
# %%
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#FNN神经网络
|
|
|
|
import torch
|
|
import torch.nn as nn
|
|
import torchvision.transforms as transforms
|
|
import torchvision.datasets as dsets
|
|
|
|
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
|
|
|
|
class Net(nn.Module):
|
|
|
|
def __init__(self, features):
|
|
super(Net, self).__init__()
|
|
|
|
self.linear_relu1 = nn.Linear(features, 128)
|
|
self.linear_relu2 = nn.Linear(128, 256)
|
|
self.linear_relu3 = nn.Linear(256, 256)
|
|
self.linear_relu4 = nn.Linear(256, 256)
|
|
self.linear5 = nn.Linear(256, 1)
|
|
|
|
def forward(self, x):
|
|
y_pred = self.linear_relu1(x)
|
|
y_pred = nn.functional.relu(y_pred)
|
|
|
|
y_pred = self.linear_relu2(y_pred)
|
|
y_pred = nn.functional.relu(y_pred)
|
|
|
|
y_pred = self.linear_relu3(y_pred)
|
|
y_pred = nn.functional.relu(y_pred)
|
|
|
|
y_pred = self.linear_relu4(y_pred)
|
|
y_pred = nn.functional.relu(y_pred)
|
|
|
|
y_pred = self.linear5(y_pred)
|
|
return y_pred
|
|
|
|
for target_col in y_all.columns:
|
|
y = y_all[target_col]
|
|
train_size = int(data_count * 0.80)
|
|
# 训练数据集特征
|
|
train_features = torch.tensor(X[:train_size].values, dtype=torch.float)
|
|
train_features = train_features.to(device)
|
|
# 训练数据集目标
|
|
train_labels = torch.tensor(y[:train_size].values, dtype=torch.float).view(-1, 1)
|
|
train_labels = train_labels.to(device)
|
|
# print("train data size: ", train_features.shape)
|
|
# print("label data size: ", train_labels.shape)
|
|
# print("test data size: ", test_features.shape)
|
|
|
|
model = Net(features=train_features.shape[1])
|
|
model = model.to(device)
|
|
model.train()
|
|
|
|
criterion = nn.MSELoss(reduction='mean')
|
|
criterion = criterion.to(device)
|
|
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
|
|
|
|
losses = []
|
|
epoch = 5000
|
|
epoch_real = 0
|
|
for t in range(epoch):
|
|
y_pred = model(train_features)
|
|
y_pred.to(device)
|
|
|
|
loss = criterion(y_pred, train_labels)
|
|
losses.append(loss.item())
|
|
|
|
if torch.isnan(loss):
|
|
break
|
|
|
|
optimizer.zero_grad()
|
|
|
|
loss.backward()
|
|
|
|
optimizer.step()
|
|
epoch_real = epoch_real + 1
|
|
|
|
print(f"epoch:{epoch_real} loss:{losses[-1]} target:{target_col}")
|
|
torch.save(model.state_dict(), './{}_FNN.pth'.format(target_col))
|
|
|
|
evals = []
|
|
for target_col in y_all.columns:
|
|
y = y_all[target_col]
|
|
# 测试数据集特征
|
|
test_features = torch.tensor(X[train_size:].values, dtype=torch.float)
|
|
test_features = test_features.to(device)
|
|
model.load_state_dict(torch.load('./{}_FNN.pth'.format(target_col), map_location=lambda storage, loc: storage))
|
|
model.to(device)
|
|
pred_labels = model(test_features)
|
|
pred_labels = pred_labels.cpu().data.numpy()
|
|
real_labels = y[train_size:].values
|
|
r2 = r2_score(real_labels, pred_labels)
|
|
eval_dict = {'Error': target_col, 'R^2': r2}
|
|
evals.append(eval_dict)
|
|
# print(r2)
|
|
print(pd.DataFrame(evals))
|
|
|
|
|
|
|
|
# model_sequential = nn.Sequential(
|
|
# nn.Linear(test_features.shape[1], 128),
|
|
# nn.ReLU(),
|
|
# nn.Linear(128, 256),
|
|
# nn.ReLU(),
|
|
# nn.Linear(256, 256),
|
|
# nn.ReLU(),
|
|
# nn.Linear(256, 256),
|
|
# nn.ReLU(),
|
|
# nn.Linear(256, 1)
|
|
# )
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#
|
|
# '''
|
|
# STEP 1: LOADING DATASET
|
|
# '''
|
|
#
|
|
# train_dataset = dsets.MNIST(root='./data',
|
|
# train=True,
|
|
# transform=transforms.ToTensor(),
|
|
# download=True)
|
|
#
|
|
# test_dataset = dsets.MNIST(root='./data',
|
|
# train=False,
|
|
# transform=transforms.ToTensor())
|
|
#
|
|
# '''
|
|
# STEP 2: MAKING DATASET ITERABLE
|
|
# '''
|
|
#
|
|
# batch_size = 100
|
|
# n_iters = 3000
|
|
# num_epochs = n_iters / (len(train_dataset) / batch_size)
|
|
# num_epochs = int(num_epochs)
|
|
#
|
|
# train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
|
|
# batch_size=batch_size,
|
|
# shuffle=True)
|
|
#
|
|
# test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
|
|
# batch_size=batch_size,
|
|
# shuffle=False)
|
|
#
|
|
# '''
|
|
# STEP 3: CREATE MODEL CLASS
|
|
# '''
|
|
# class FeedforwardNeuralNetModel(nn.Module):
|
|
# def __init__(self, input_dim, hidden_dim, output_dim):
|
|
# super(FeedforwardNeuralNetModel, self).__init__()
|
|
# # Linear function 1: 784 --> 100
|
|
# self.fc1 = nn.Linear(input_dim, hidden_dim)
|
|
# # Non-linearity 1
|
|
# self.relu1 = nn.ReLU()
|
|
#
|
|
# # Linear function 2: 100 --> 100
|
|
# self.fc2 = nn.Linear(hidden_dim, hidden_dim)
|
|
# # Non-linearity 2
|
|
# self.relu2 = nn.ReLU()
|
|
#
|
|
# # Linear function 3: 100 --> 100
|
|
# self.fc3 = nn.Linear(hidden_dim, hidden_dim)
|
|
# # Non-linearity 3
|
|
# self.relu3 = nn.ReLU()
|
|
#
|
|
# # Linear function 4 (readout): 100 --> 10
|
|
# self.fc4 = nn.Linear(hidden_dim, output_dim)
|
|
#
|
|
# def forward(self, x):
|
|
# # Linear function 1
|
|
# out = self.fc1(x)
|
|
# # Non-linearity 1
|
|
# out = self.relu1(out)
|
|
#
|
|
# # Linear function 2
|
|
# out = self.fc2(out)
|
|
# # Non-linearity 2
|
|
# out = self.relu2(out)
|
|
#
|
|
# # Linear function 2
|
|
# out = self.fc3(out)
|
|
# # Non-linearity 2
|
|
# out = self.relu3(out)
|
|
#
|
|
# # Linear function 4 (readout)
|
|
# out = self.fc4(out)
|
|
# return out
|
|
# '''
|
|
# STEP 4: INSTANTIATE MODEL CLASS
|
|
# '''
|
|
# input_dim = 28*28
|
|
# hidden_dim = 100
|
|
# output_dim = 10
|
|
#
|
|
# model = FeedforwardNeuralNetModel(input_dim, hidden_dim, output_dim)
|
|
#
|
|
# #######################
|
|
# # USE GPU FOR MODEL #
|
|
# #######################
|
|
#
|
|
# device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
|
|
# model.to(device)
|
|
#
|
|
# '''
|
|
# STEP 5: INSTANTIATE LOSS CLASS
|
|
# '''
|
|
# criterion = nn.CrossEntropyLoss()
|
|
#
|
|
#
|
|
# '''
|
|
# STEP 6: INSTANTIATE OPTIMIZER CLASS
|
|
# '''
|
|
# learning_rate = 0.1
|
|
#
|
|
# optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
|
|
#
|
|
# '''
|
|
# STEP 7: TRAIN THE MODEL
|
|
# '''
|
|
# iter = 0
|
|
# for epoch in range(num_epochs):
|
|
# for i, (images, labels) in enumerate(train_loader):
|
|
#
|
|
# #######################
|
|
# # USE GPU FOR MODEL #
|
|
# #######################
|
|
# images = images.view(-1, 28*28).requires_grad_().to(device)
|
|
# labels = labels.to(device)
|
|
#
|
|
# # Clear gradients w.r.t. parameters
|
|
# optimizer.zero_grad()
|
|
#
|
|
# # Forward pass to get output/logits
|
|
# outputs = model(images)
|
|
#
|
|
# # Calculate Loss: softmax --> cross entropy loss
|
|
# loss = criterion(outputs, labels)
|
|
#
|
|
# # Getting gradients w.r.t. parameters
|
|
# loss.backward()
|
|
#
|
|
# # Updating parameters
|
|
# optimizer.step()
|
|
#
|
|
# iter += 1
|
|
#
|
|
# if iter % 500 == 0:
|
|
# # Calculate Accuracy
|
|
# correct = 0
|
|
# total = 0
|
|
# # Iterate through test dataset
|
|
# for images, labels in test_loader:
|
|
# #######################
|
|
# # USE GPU FOR MODEL #
|
|
# #######################
|
|
# images = images.view(-1, 28*28).requires_grad_().to(device)
|
|
#
|
|
# # Forward pass only to get logits/output
|
|
# outputs = model(images)
|
|
#
|
|
# # Get predictions from the maximum value
|
|
# _, predicted = torch.max(outputs.data, 1)
|
|
#
|
|
# # Total number of labels
|
|
# total += labels.size(0)
|
|
#
|
|
# #######################
|
|
# # USE GPU FOR MODEL #
|
|
# #######################
|
|
# # Total correct predictions
|
|
# if torch.cuda.is_available():
|
|
# correct += (predicted.cpu() == labels.cpu()).sum()
|
|
# else:
|
|
# correct += (predicted == labels).sum()
|
|
#
|
|
# accuracy = 100 * correct / total
|
|
#
|
|
# # Print Loss
|
|
# print('Iteration: {}. Loss: {}. Accuracy: {}'.format(iter, loss.item(), accuracy)) |