add many things

master
lj020 4 years ago
parent 386efc5523
commit d85a93668b

@ -0,0 +1,152 @@
# Copyright 2020 IBM Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Standard libraries
import os
import logging
import itertools
import datetime as dt
# Data processing libraries
import numpy as np
import pandas as pd
from tqdm import tqdm
from joblib import Parallel, delayed
# Physics model
from orbit_prediction import get_state_vect_cols
from orbit_prediction.physics_model import PhysicsModel
logging.basicConfig(level=os.environ.get("LOGLEVEL", "INFO"))
logger = logging.getLogger(__name__)
def predict_orbit(window):
"""Predict the state vectors of each future timestep in the given `window`
using a physics astrodynamics model.
:param window: The window of timesteps to predict the orbit of the ASO for
:type window: pandas.DataFrame
:return: The original timestep rows with the predicted state vectors added
:rtype: pandas.DataFrame
"""
# The `window` DataFrame is reverse sorted by time so the starting position
# is the last row
start_row = window.iloc[-1]
start_epoch = start_row.name
# Get the column names of the state vector components
state_vect_comps = get_state_vect_cols()
# Extract the position and velocity vectors as a numpy array
start_state_vect = start_row[state_vect_comps].to_numpy()
start_state = np.concatenate((np.array([start_epoch]),
start_state_vect))
# Build an orbit model
orbit_model = PhysicsModel()
orbit_model.fit([start_state])
future_rows = window.iloc[:-1].reset_index()
# We add the epoch and the state vector components of the starting row
# to the rows we will use the physics model to make predictions for
future_rows['start_epoch'] = start_epoch
for svc in state_vect_comps:
future_rows[f'start_{svc}'] = start_row[svc]
# Calculate the elapsed time from the starting epoch to the
# the epoch of all the rows to make predictions for
time_deltas = future_rows.epoch - future_rows.start_epoch
elapsed_seconds = time_deltas.dt.total_seconds()
future_rows['elapsed_seconds'] = elapsed_seconds
physics_cols = [f'physics_pred_{svc}' for svc in state_vect_comps]
# Predict the state vectors for each of the rows in the "future"
predicted_orbits = orbit_model.predict([elapsed_seconds.to_numpy()])
try:
future_rows[physics_cols] = predicted_orbits[0]
except Exception as ex:
print("asshole",f"{ex}\n",window,future_rows)
return future_rows
def predict_orbits(df, last_n_days, n_pred_days):
"""Use a physics astrodynamics model to predict the orbits of the ASOs
in the provided DataFrame.
:param df: The DataFrame containing the observed orbital state vectors
to use to make predictions from
:type df: pandas.DataFrame
:param last_n_days: Filter the DataFrame to use rows from only the last
`n` days. Use all the rows if `None` is passed, but this may take a
very long time to run
:type last_n_days: int
:param n_pred_days: The number of days in the rolling prediction window
:type n_pred_days: int
"""
if last_n_days:
time_cutoff = df.epoch.max() - dt.timedelta(days=last_n_days)
df = df[df.epoch >= time_cutoff]
epoch_df = df.sort_values('epoch', ascending=False).set_index('epoch')
pred_window_length = f'{n_pred_days}d'
# For each row in `df` we create a window of all of the observations for
# that ASO that are within `n_pred_days` of the given row
window_cols = ['aso_id', pd.Grouper(freq=pred_window_length)]
windows = [w[1] for w in epoch_df.groupby(window_cols)]
# Predict the orbits in each window in parallel
window_dfs = Parallel(n_jobs=-1)(delayed(predict_orbit)(w)
for w in tqdm(windows))
# Join all of the window prediction DataFrames into a single DataFrame
physics_pred_df = pd.concat(window_dfs).reset_index(drop=True)
return physics_pred_df
def calc_physics_error(df):
"""Calculates the error in the state vector components between the ground truth
observations and the physics model predictions.
:param df: The DataFrame containing the ground truth observations and the
physics model predictions
:type df: pandas.DataFrame
:return: The input DataFrame with the physical model error column added
:rtype: pandas.DataFrame
"""
comps = ['x', 'y', 'z']
vects = ['r', 'v']
for vect, comp in itertools.product(vects, comps):
comp_col = f'{vect}_{comp}'
err_col = f'physics_err_{comp_col}'
err_val = df[f'physics_pred_{comp_col}'] - df[comp_col]
df[err_col] = err_val
return df
def run():
"""Builds a training data set of physics model errors based on the
parameters supplied by the CLI.
:param args: The command line arguments
:type args: argparse.Namespace
"""
logger.info('Loading input DataFrame...')
input_df = pd.read_parquet("/home/lj020/Downloads/data6.parquet")
logger.info('Predicting orbits...')
physics_pred_df = predict_orbits(input_df,
last_n_days=None,
n_pred_days=5)
logger.info('Calculating physical model error...')
physics_pred_df = calc_physics_error(physics_pred_df)
logger.info('Serializing results...')
physics_pred_df.to_parquet("/home/lj020/Downloads/train_result.parquet")
with open("/home/lj020/Downloads/train_result.txt","w") as f:
f.write(physics_pred_df.to_string())
run()

@ -0,0 +1,72 @@
from tletools import TLE
import pandas as pd
from astropy import units as u
def get_tles(raw_tle_str):
"""Parses the raw TLE string and converts it to TLE objects.
:param raw_tle_str: The raw string form of the TLEs
:type raw_tle_str: str
:return: The parsed object representations of the TLEs
:rtype: [tletools.TLE]
"""
cutnum = 2
all_tle_lines = raw_tle_str.strip().splitlines()
tles = []
for i in range(len(all_tle_lines)//cutnum):
# Calculate offset
j = i*cutnum
tle_lines = ["noaa-06"] + all_tle_lines[j:j+cutnum]
print(tle_lines)
# Strip line number from object name line
# tle_lines[0] = tle_lines[0][2:]
tle = TLE.from_lines(*tle_lines)
tles.append(tle)
# print(tles)
return tles
def get_aso_data(tles):
"""Extracts the necessary data from the TLE objects for doing orbital
prediction.
:param tles: The list of TLE objects to extract orbit information from
:type tles: [tletools.TLE]
:return: A DataFrame of the extracted TLE data
:rtype: pandas.DataFrame
"""
tles_data = []
for tle in tles:
aso_data = {}
aso_data['aso_name'] = tle.name
aso_data['aso_id'] = tle.norad
aso_data['epoch'] = tle.epoch.to_datetime()
# Convert the TLE object to a poliastro.twobody.Orbit instance
orbit = tle.to_orbit()
# Calculate the position and velocity vectors
r, v = orbit.rv()
# Convert position vector from kilometers to meters
r_m = r.to(u.m).to_value()
# Convert the velocity vector from km/s to m/s
v_ms = v.to(u.m/u.s).to_value()
# Extract the components of the state vectiors
aso_data['r_x'], aso_data['r_y'], aso_data['r_z'] = r_m
aso_data['v_x'], aso_data['v_y'], aso_data['v_z'] = v_ms
tles_data.append(aso_data)
return pd.DataFrame(tles_data)
full_tle = []
with open("/home/lj020/Downloads/noaa.txt","r") as f:
raw_tle = f.read()
l = get_tles(raw_tle_str=raw_tle)
full_tle += l
w = get_aso_data(full_tle)
w.to_parquet("/home/lj020/Downloads/data6.parquet")
with open("/home/lj020/Downloads/data6.txt","w") as f:
f.write(w.to_string())
# print(w)

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

@ -0,0 +1,110 @@
# %%
import pandas as pd
from itertools import product
def get_state_vect_cols(prefix=''):
if prefix:
prefix += '_'
vectors = ['r', 'v']
components = ['x', 'y', 'z']
col_names = [f'{prefix}{v}_{c}' for v, c in product(vectors, components)]
return col_names
# %%
df = pd.read_parquet("traindata/physics_preds.parquet")
test_set = df[df['aso_id'] == "05277"]
train_set = df.groupby('aso_id').apply(lambda x: x.head(x.count()[0] - 3))
print(df.count()[0], train_set.count()[0], test_set.count()[0])
# %%
from sklearn.model_selection import train_test_split
feature_cols = [
'elapsed_seconds'
] + get_state_vect_cols('physics_pred') + get_state_vect_cols('start')
print(feature_cols)
# The target values are the errors between the physical model predictions
# and the ground truth observations
target_cols = get_state_vect_cols('physics_err')
print(target_cols)
# Create feature and target matrices
X = df[feature_cols]
y = df[target_cols]
data_keys = ['X_train', 'X_test', 'y_train', 'y_test']
data_vals = train_test_split(X, y, test_size=0.2)
train_test_data = dict(zip(data_keys, data_vals))
# %%
import torch
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader
from sklearn import metrics
TestNet2 = lambda: nn.Sequential(
nn.Linear(13, 64),
nn.ReLU6(),
nn.Linear(64, 64),
nn.LeakyReLU(),
nn.Linear(64, 1),
)
nets = {}
X_train = torch.tensor(train_test_data["X_train"].values,
dtype=torch.float32).cuda()
y_train = train_test_data["y_train"]
X_test = torch.tensor(train_test_data['X_test'].values,
dtype=torch.float32).cuda()
y_test = train_test_data['y_test']
r2s = []
for target_col in y_train.columns:
y1 = torch.tensor(y_train[target_col].values,
dtype=torch.float32).reshape(-1, 1).cuda()
print(X.shape, y1.shape)
net = TestNet2().cuda()
opti = torch.optim.SGD(net.parameters(), lr=0.02)
loss_func = nn.MSELoss()
train_dataloader = DataLoader(TensorDataset(X_train, y1), batch_size=320)
for t in range(10000):
for batch, (x, y) in enumerate(train_dataloader):
pred = net(x)
loss = loss_func(pred, y)
opti.zero_grad()
torch.sqrt(loss).backward()
opti.step()
with torch.no_grad():
y = y_test[target_col]
y_hat = net(X_test).cpu().numpy()
rmse = metrics.mean_squared_error(y, y_hat, squared=False)
r2 = metrics.r2_score(y, y_hat)
r2s.append(r2)
print(f"Epoch {t}: r2={r2}, rmse={rmse}")
nets[target_col] = net
print(target_col)
break
import matplotlib.pyplot as plt
plt.plot(r2s)
plt.show()
# %%
X, ys = train_test_data['X_test'], train_test_data['y_test']
evals = []
with torch.no_grad():
for target_col, net in nets.items():
y_hat = net(torch.tensor(X.values)) # fake
y_hat = y_hat.detach().numpy()
y = ys[target_col] # real
print(y)
print(y_hat)
rmse = metrics.mean_squared_error(y, y_hat, squared=False)
r2 = metrics.r2_score(y, y_hat)
eval_dict = {'Error': target_col, 'RMSE': rmse, 'R^2': r2}
evals.append(eval_dict)
print(pd.DataFrame(evals))
# %%

@ -0,0 +1,343 @@
#数据预处理
import pandas as pd
from itertools import product
from torch.utils.data import DataLoader
from sklearn.metrics import r2_score
from sklearn.utils import shuffle
def get_state_vect_cols(prefix=''):
if prefix:
prefix += '_'
vectors = ['r', 'v']
components = ['x', 'y', 'z']
col_names = [f'{prefix}{v}_{c}' for v, c in product(vectors, components)]
return col_names
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', 1000)
# %%
df = pd.read_parquet("physics_preds.parquet")
test_set = df[df['aso_id'] == "05277"]
train_set = df.groupby('aso_id').apply(lambda x: x.head(x.count()[0] - 3))
print(df.count()[0], train_set.count()[0], test_set.count()[0])
data_count = df.count()[0]
# %%
feature_cols = [
'elapsed_seconds'
] + get_state_vect_cols('physics_pred') + get_state_vect_cols('start')
print(feature_cols)
# The target values are the errors between the physical model predictions
# and the ground truth observations
target_cols = get_state_vect_cols('physics_err')
print(target_cols)
print("样本统一归一化处理")
all_cols = feature_cols + target_cols
df = df[all_cols]
df = shuffle(df)
df = (df - df.mean(axis=0)) / df.std(axis=0)
# Create feature and target matrices
X = df[feature_cols]
y_all = df[target_cols]
# y = df['physics_err_v_x']
# print(y)
# %%
#FNN神经网络
import torch
import torch.nn as nn
import torchvision.transforms as transforms
import torchvision.datasets as dsets
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
class Net(nn.Module):
def __init__(self, features):
super(Net, self).__init__()
self.linear_relu1 = nn.Linear(features, 128)
self.linear_relu2 = nn.Linear(128, 256)
self.linear_relu3 = nn.Linear(256, 256)
self.linear_relu4 = nn.Linear(256, 256)
self.linear5 = nn.Linear(256, 1)
def forward(self, x):
y_pred = self.linear_relu1(x)
y_pred = nn.functional.relu(y_pred)
y_pred = self.linear_relu2(y_pred)
y_pred = nn.functional.relu(y_pred)
y_pred = self.linear_relu3(y_pred)
y_pred = nn.functional.relu(y_pred)
y_pred = self.linear_relu4(y_pred)
y_pred = nn.functional.relu(y_pred)
y_pred = self.linear5(y_pred)
return y_pred
for target_col in y_all.columns:
y = y_all[target_col]
train_size = int(data_count * 0.80)
# 训练数据集特征
train_features = torch.tensor(X[:train_size].values, dtype=torch.float)
train_features = train_features.to(device)
# 训练数据集目标
train_labels = torch.tensor(y[:train_size].values, dtype=torch.float).view(-1, 1)
train_labels = train_labels.to(device)
# print("train data size: ", train_features.shape)
# print("label data size: ", train_labels.shape)
# print("test data size: ", test_features.shape)
model = Net(features=train_features.shape[1])
model = model.to(device)
model.train()
criterion = nn.MSELoss(reduction='mean')
criterion = criterion.to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
losses = []
epoch = 5000
epoch_real = 0
for t in range(epoch):
y_pred = model(train_features)
y_pred.to(device)
loss = criterion(y_pred, train_labels)
losses.append(loss.item())
if torch.isnan(loss):
break
optimizer.zero_grad()
loss.backward()
optimizer.step()
epoch_real = epoch_real + 1
print(f"epoch:{epoch_real} loss:{losses[-1]} target:{target_col}")
torch.save(model.state_dict(), './{}_FNN.pth'.format(target_col))
evals = []
for target_col in y_all.columns:
y = y_all[target_col]
# 测试数据集特征
test_features = torch.tensor(X[train_size:].values, dtype=torch.float)
test_features = test_features.to(device)
model.load_state_dict(torch.load('./{}_FNN.pth'.format(target_col), map_location=lambda storage, loc: storage))
model.to(device)
pred_labels = model(test_features)
pred_labels = pred_labels.cpu().data.numpy()
real_labels = y[train_size:].values
r2 = r2_score(real_labels, pred_labels)
eval_dict = {'Error': target_col, 'R^2': r2}
evals.append(eval_dict)
# print(r2)
print(pd.DataFrame(evals))
# model_sequential = nn.Sequential(
# nn.Linear(test_features.shape[1], 128),
# nn.ReLU(),
# nn.Linear(128, 256),
# nn.ReLU(),
# nn.Linear(256, 256),
# nn.ReLU(),
# nn.Linear(256, 256),
# nn.ReLU(),
# nn.Linear(256, 1)
# )
#
# '''
# STEP 1: LOADING DATASET
# '''
#
# train_dataset = dsets.MNIST(root='./data',
# train=True,
# transform=transforms.ToTensor(),
# download=True)
#
# test_dataset = dsets.MNIST(root='./data',
# train=False,
# transform=transforms.ToTensor())
#
# '''
# STEP 2: MAKING DATASET ITERABLE
# '''
#
# batch_size = 100
# n_iters = 3000
# num_epochs = n_iters / (len(train_dataset) / batch_size)
# num_epochs = int(num_epochs)
#
# train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
# batch_size=batch_size,
# shuffle=True)
#
# test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
# batch_size=batch_size,
# shuffle=False)
#
# '''
# STEP 3: CREATE MODEL CLASS
# '''
# class FeedforwardNeuralNetModel(nn.Module):
# def __init__(self, input_dim, hidden_dim, output_dim):
# super(FeedforwardNeuralNetModel, self).__init__()
# # Linear function 1: 784 --> 100
# self.fc1 = nn.Linear(input_dim, hidden_dim)
# # Non-linearity 1
# self.relu1 = nn.ReLU()
#
# # Linear function 2: 100 --> 100
# self.fc2 = nn.Linear(hidden_dim, hidden_dim)
# # Non-linearity 2
# self.relu2 = nn.ReLU()
#
# # Linear function 3: 100 --> 100
# self.fc3 = nn.Linear(hidden_dim, hidden_dim)
# # Non-linearity 3
# self.relu3 = nn.ReLU()
#
# # Linear function 4 (readout): 100 --> 10
# self.fc4 = nn.Linear(hidden_dim, output_dim)
#
# def forward(self, x):
# # Linear function 1
# out = self.fc1(x)
# # Non-linearity 1
# out = self.relu1(out)
#
# # Linear function 2
# out = self.fc2(out)
# # Non-linearity 2
# out = self.relu2(out)
#
# # Linear function 2
# out = self.fc3(out)
# # Non-linearity 2
# out = self.relu3(out)
#
# # Linear function 4 (readout)
# out = self.fc4(out)
# return out
# '''
# STEP 4: INSTANTIATE MODEL CLASS
# '''
# input_dim = 28*28
# hidden_dim = 100
# output_dim = 10
#
# model = FeedforwardNeuralNetModel(input_dim, hidden_dim, output_dim)
#
# #######################
# # USE GPU FOR MODEL #
# #######################
#
# device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
# model.to(device)
#
# '''
# STEP 5: INSTANTIATE LOSS CLASS
# '''
# criterion = nn.CrossEntropyLoss()
#
#
# '''
# STEP 6: INSTANTIATE OPTIMIZER CLASS
# '''
# learning_rate = 0.1
#
# optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
#
# '''
# STEP 7: TRAIN THE MODEL
# '''
# iter = 0
# for epoch in range(num_epochs):
# for i, (images, labels) in enumerate(train_loader):
#
# #######################
# # USE GPU FOR MODEL #
# #######################
# images = images.view(-1, 28*28).requires_grad_().to(device)
# labels = labels.to(device)
#
# # Clear gradients w.r.t. parameters
# optimizer.zero_grad()
#
# # Forward pass to get output/logits
# outputs = model(images)
#
# # Calculate Loss: softmax --> cross entropy loss
# loss = criterion(outputs, labels)
#
# # Getting gradients w.r.t. parameters
# loss.backward()
#
# # Updating parameters
# optimizer.step()
#
# iter += 1
#
# if iter % 500 == 0:
# # Calculate Accuracy
# correct = 0
# total = 0
# # Iterate through test dataset
# for images, labels in test_loader:
# #######################
# # USE GPU FOR MODEL #
# #######################
# images = images.view(-1, 28*28).requires_grad_().to(device)
#
# # Forward pass only to get logits/output
# outputs = model(images)
#
# # Get predictions from the maximum value
# _, predicted = torch.max(outputs.data, 1)
#
# # Total number of labels
# total += labels.size(0)
#
# #######################
# # USE GPU FOR MODEL #
# #######################
# # Total correct predictions
# if torch.cuda.is_available():
# correct += (predicted.cpu() == labels.cpu()).sum()
# else:
# correct += (predicted == labels).sum()
#
# accuracy = 100 * correct / total
#
# # Print Loss
# print('Iteration: {}. Loss: {}. Accuracy: {}'.format(iter, loss.item(), accuracy))

@ -0,0 +1,425 @@
#https://blog.csdn.net/weixin_35757704/article/details/115910672
#https://www.bilibili.com/video/BV1hE411t7RN?p=27
#https://www.zhihu.com/question/39792141
import numpy as np
from typing import Union
import pandas as pd
from itertools import product
import torch
from torch import nn
from torch.utils.data import DataLoader
import torchvision
from sklearn.model_selection import train_test_split
from sklearn.utils.validation import check_X_y
import joblib
from catboost import CatBoostRegressor
from lightgbm import LGBMRegressor
from xgboost import XGBRegressor
from sklearn.metrics import r2_score
from CNN_architecture import CNN_architecture
from sklearn.utils import shuffle
def get_state_vect_cols(prefix=''):
if prefix:
prefix += '_'
vectors = ['r', 'v']
components = ['x', 'y', 'z']
col_names = [f'{prefix}{v}_{c}' for v, c in product(vectors, components)]
return col_names
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', 1000)
# %%
df = pd.read_parquet("traindata/physics_preds.parquet")
test_set = df[df['aso_id'] == "05277"]
train_set = df.groupby('aso_id').apply(lambda x: x.head(x.count()[0] - 3))
print(df.count()[0], train_set.count()[0], test_set.count()[0])
data_count = df.count()[0]
# print(train_set)
# %%
feature_cols = [
'elapsed_seconds'
] + get_state_vect_cols('physics_pred') + get_state_vect_cols('start')
print(feature_cols)
# The target values are the errors between the physical model predictions
# and the ground truth observations
target_cols = get_state_vect_cols('physics_err')
print(target_cols)
print("样本统一归一化处理")
all_cols = feature_cols + target_cols
df = df[all_cols]
df = shuffle(df)
df = (df - df.mean(axis=0)) / df.std(axis=0)
# Create feature and target matrices
feature_cols = ['physics_err_v_x'] + feature_cols
X = df[feature_cols]
y = df[target_cols]
# data_keys = ['X_train', 'X_test', 'y_train', 'y_test']
# data_vals = train_test_split(X, y, test_size=0.2)
# train_test_data = dict(zip(data_keys, data_vals))
# train_test_data['X_test'] = test_set[feature_cols]
# train_test_data['y_test'] = test_set[target_cols]
# train_test_data = {
# 'X_train': train_set[feature_cols],
# 'y_train': train_set[target_cols],
# 'X_test': test_set[feature_cols],
# 'y_test': test_set[target_cols],
# }
# %%
import numpy as np
import torch
from torch import nn
import matplotlib.pyplot as plt
"""
Github: Yonv1943 Zen4 Jia1 hao2
https://github.com/Yonv1943/DL_RL_Zoo/blob/master/RNN
The source of training data
https://github.com/L1aoXingyu/
code-of-learn-deep-learning-with-pytorch/blob/master/
chapter5_RNN/time-series/lstm-time-series.ipynb
"""
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
def run_train_lstm(X):
inp_dim = 14
out_dim = 1
mid_dim = 15
mid_layers = 2
batch_size = 12 * 4
mod_dir = '.'
'''load data'''
data = X.values
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', 1000)
# np.set_printoptions(threshold='nan')
# print(data)
data_x = data[:-1, :]
data_y = data[+1:, 0]
# print(data_y)
assert data_x.shape[1] == inp_dim
train_size = int(data_count * 0.80)
train_x = data_x[:train_size]
train_y = data_y[:train_size]
train_x = train_x.reshape((train_size, inp_dim))
train_y = train_y.reshape((train_size, out_dim))
# print(train_y)
# X, ys = train_test_data['X_train'], train_test_data['y_train']
#
# check_X_y(X, ys, multi_output=True)
#
# train_x = X.values
# train_y = ys['physics_err_r_x'].values
# train_y = ys.reshape(ys.shape[0], 1)
'''build model'''
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device = torch.device("cuda")
net = RegLSTM(inp_dim, out_dim, mid_dim, mid_layers).to(device)
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(net.parameters(), lr=1e-2)
'''train'''
var_x = torch.tensor(train_x, dtype=torch.float32, device=device)
var_y = torch.tensor(train_y, dtype=torch.float32, device=device)
batch_var_x = list()
batch_var_y = list()
for i in range(batch_size):
j = batch_size - i
batch_var_x.append(var_x[j:])
batch_var_y.append(var_y[j:])
from torch.nn.utils.rnn import pad_sequence
batch_var_x = pad_sequence(batch_var_x)
batch_var_y = pad_sequence(batch_var_y)
with torch.no_grad():
weights = np.tanh(np.arange(len(train_y)) * (np.e / len(train_y)))
weights = torch.tensor(weights, dtype=torch.float32, device=device)
print("Training Start")
for e in range(10):
out = net(batch_var_x)
# loss = criterion(out, batch_var_y)
loss = (out - batch_var_y) ** 2 * weights
loss = loss.mean()
optimizer.zero_grad()
loss.backward()
optimizer.step()
if e % 64 == 0:
print('Epoch: {:4}, Loss: {:.5f}'.format(e, loss.item()))
torch.save(net.state_dict(), '{}/net.pth'.format(mod_dir))
print("Save in:", '{}/net.pth'.format(mod_dir))
'''eval'''
net.load_state_dict(torch.load('{}/net.pth'.format(mod_dir), map_location=lambda storage, loc: storage))
net = net.eval()
# X1, ys1 = train_test_data['X_test'], train_test_data['y_test']
# check_X_y(X1, ys1, multi_output=True)
# test_x = X1.values
# test_y = ys1['physics_err_r_x'].values
# test_y = ys1.reshape(ys.shape[0], 1)
# test_x =
test_x = data_x.copy()
test_x[train_size:, 0] = 0
# print(test_x)
test_x = test_x[:, np.newaxis, :]
test_x = torch.tensor(test_x, dtype=torch.float32, device=device)
'''simple way but no elegant'''
for i in range(train_size, len(data) - 2):
test_y = net(test_x[:i])
test_x[i, 0, 0] = test_y[-1]
'''elegant way but slightly complicated'''
# eval_size = 1
# zero_ten = torch.zeros((mid_layers, eval_size, mid_dim), dtype=torch.float32, device=device)
# test_y, hc = net.output_y_hc(test_x[:train_size], (zero_ten, zero_ten))
# test_x[train_size + 1, 0, 0] = test_y[-1]
# for i in range(train_size + 1, len(data) - 2):
# test_y, hc = net.output_y_hc(test_x[i:i + 1], hc)
# test_x[i + 1, 0, 0] = test_y[-1]
pred_y = test_x[1:, 0, 0]
pred_y = pred_y.cpu().data.numpy()
print("`````````````````````````")
print(pred_y.shape)
diff_y = pred_y[train_size:] - data_y[train_size:-1]
print("------")
# print(pred_y[train_size:])
print("------")
# print(data_y[train_size:-1])
r2 = r2_score(data_y[train_size:-1], pred_y[train_size:], multioutput= 'uniform_average')
evals = []
eval_dict = {'Error': 'physics_err_v_x', 'R^2': r2}
evals.append(eval_dict)
print(pd.DataFrame(evals))
l1_loss = np.mean(np.abs(diff_y))
l2_loss = np.mean(diff_y ** 2)
print("L1: {:.3f} L2: {:.3f}".format(l1_loss, l2_loss))
plt.plot(pred_y, 'r', label='pred')
plt.plot(data_y, 'b', label='real', alpha=0.3)
plt.plot([train_size, train_size], [-1, 2], color='k', label='train | pred')
plt.legend(loc='best')
plt.savefig('lstm_reg.png')
plt.pause(4)
def run_origin():
inp_dim = 2
out_dim = 1
mod_dir = '.'
'''load data'''
data = load_data() # axis1: number, year, month
data_x = np.concatenate((data[:-2, 0:1], data[+1:-1, 0:1]), axis=1)
data_y = data[2:, 0]
train_size = int(len(data_x) * 0.75)
train_x = data_x[:train_size]
train_y = data_y[:train_size]
train_x = train_x.reshape((-1, 1, inp_dim))
train_y = train_y.reshape((-1, 1, out_dim))
'''build model'''
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
net = RegLSTM(inp_dim, out_dim, mid_dim=4, mid_layers=2).to(device)
criterion = nn.SmoothL1Loss()
optimizer = torch.optim.Adam(net.parameters(), lr=1e-2)
'''train'''
var_x = torch.tensor(train_x, dtype=torch.float32, device=device)
var_y = torch.tensor(train_y, dtype=torch.float32, device=device)
print('var_x.size():', var_x.size())
print('var_y.size():', var_y.size())
for e in range(512):
out = net(var_x)
loss = criterion(out, var_y)
optimizer.zero_grad()
loss.backward()
optimizer.step()
if (e + 1) % 100 == 0: # 每 100 次输出结果
print('Epoch: {}, Loss: {:.5f}'.format(e + 1, loss.item()))
torch.save(net.state_dict(), '{}/net.pth'.format(mod_dir))
'''eval'''
# net.load_state_dict(torch.load('{}/net.pth'.format(mod_dir), map_location=lambda storage, loc: storage))
net = net.eval() # 转换成测试模式
"""
inappropriate way of seq prediction:
use all real data to predict the number of next month
"""
test_x = data_x.reshape((-1, 1, inp_dim))
var_data = torch.tensor(test_x, dtype=torch.float32, device=device)
eval_y = net(var_data) # 测试集的预测结果
pred_y = eval_y.view(-1).cpu().data.numpy()
plt.plot(pred_y[1:], 'r', label='pred inappr', alpha=0.3)
plt.plot(data_y, 'b', label='real', alpha=0.3)
plt.plot([train_size, train_size], [-1, 2], label='train | pred')
"""
appropriate way of seq prediction:
use real+pred data to predict the number of next 3 years.
"""
test_x = data_x.reshape((-1, 1, inp_dim))
test_x[train_size:] = 0 # delete the data of next 3 years.
test_x = torch.tensor(test_x, dtype=torch.float32, device=device)
for i in range(train_size, len(data) - 2):
test_y = net(test_x[:i])
test_x[i, 0, 0] = test_x[i - 1, 0, 1]
test_x[i, 0, 1] = test_y[-1, 0]
pred_y = test_x.cpu().data.numpy()
pred_y = pred_y[:, 0, 0]
plt.plot(pred_y[2:], 'g', label='pred appr')
plt.legend(loc='best')
plt.savefig('lstm_origin.png')
plt.pause(4)
class RegLSTM(nn.Module):
def __init__(self, inp_dim, out_dim, mid_dim, mid_layers):
super(RegLSTM, self).__init__()
self.rnn = nn.LSTM(inp_dim, mid_dim, mid_layers) # rnn
self.reg = nn.Sequential(
nn.Linear(mid_dim, mid_dim),
nn.Tanh(),
nn.Linear(mid_dim, out_dim),
) # regression
def forward(self, x):
y = self.rnn(x)[0] # y, (h, c) = self.rnn(x)
seq_len, batch_size, hid_dim = y.shape
y = y.view(-1, hid_dim)
y = self.reg(y)
y = y.view(seq_len, batch_size, -1)
return y
"""
PyCharm Crtl+click nn.LSTM() jump to code of PyTorch:
Examples::
>>> rnn = nn.LSTM(10, 20, 2)
>>> input = torch.randn(5, 3, 10)
>>> h0 = torch.randn(2, 3, 20)
>>> c0 = torch.randn(2, 3, 20)
>>> output, (hn, cn) = rnn(input, (h0, c0))
"""
def output_y_hc(self, x, hc):
y, hc = self.rnn(x, hc) # y, (h, c) = self.rnn(x)
seq_len, batch_size, hid_dim = y.size()
y = y.view(-1, hid_dim)
y = self.reg(y)
y = y.view(seq_len, batch_size, -1)
return y, hc
class RegGRU(nn.Module):
def __init__(self, inp_dim, out_dim, mod_dim, mid_layers):
super(RegGRU, self).__init__()
self.rnn = nn.GRU(inp_dim, mod_dim, mid_layers)
self.reg = nn.Linear(mod_dim, out_dim)
def forward(self, x):
x, h = self.rnn(x) # (seq, batch, hidden)
seq_len, batch_size, hid_dim = x.shape
x = x.view(-1, hid_dim)
x = self.reg(x)
x = x.view(seq_len, batch_size, -1)
return x
def output_y_h(self, x, h):
y, h = self.rnn(x, h)
seq_len, batch_size, hid_dim = y.size()
y = y.view(-1, hid_dim)
y = self.reg(y)
y = y.view(seq_len, batch_size, -1)
return y, h
def load_data():
# passengers number of international airline , 1949-01 ~ 1960-12 per month
seq_number = np.array(
[112., 118., 132., 129., 121., 135., 148., 148., 136., 119., 104.,
118., 115., 126., 141., 135., 125., 149., 170., 170., 158., 133.,
114., 140., 145., 150., 178., 163., 172., 178., 199., 199., 184.,
162., 146., 166., 171., 180., 193., 181., 183., 218., 230., 242.,
209., 191., 172., 194., 196., 196., 236., 235., 229., 243., 264.,
272., 237., 211., 180., 201., 204., 188., 235., 227., 234., 264.,
302., 293., 259., 229., 203., 229., 242., 233., 267., 269., 270.,
315., 364., 347., 312., 274., 237., 278., 284., 277., 317., 313.,
318., 374., 413., 405., 355., 306., 271., 306., 315., 301., 356.,
348., 355., 422., 465., 467., 404., 347., 305., 336., 340., 318.,
362., 348., 363., 435., 491., 505., 404., 359., 310., 337., 360.,
342., 406., 396., 420., 472., 548., 559., 463., 407., 362., 405.,
417., 391., 419., 461., 472., 535., 622., 606., 508., 461., 390.,
432.], dtype=np.float32)
# assert seq_number.shape == (144, )
# plt.plot(seq_number)
# plt.ion()
# plt.pause(1)
seq_number = seq_number[:, np.newaxis]
# print(repr(seq))
# 1949~1960, 12 years, 12*12==144 month
seq_year = np.arange(12)
seq_month = np.arange(12)
seq_year_month = np.transpose(
[np.repeat(seq_year, len(seq_month)),
np.tile(seq_month, len(seq_year))],
) # Cartesian Product
seq = np.concatenate((seq_number, seq_year_month), axis=1)
# normalization
seq = (seq - seq.mean(axis=0)) / seq.std(axis=0)
return seq
if __name__ == '__main__':
run_train_lstm(X)
# run_train_gru()
# run_origin()

@ -0,0 +1,117 @@
import itertools
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from keras import layers, models
def get_state_vect_cols(prefix=''):
if prefix:
prefix += '_'
vectors = ['r', 'v']
components = ['x', 'y', 'z']
col_names = [f'{prefix}{v}_{c}'
for v, c
in itertools.product(vectors, components)]
return col_names
def build_train_test_sets(df, test_size=0.2):
# Features are the physics predicted state vectors and the amount of
# time in seconds into the future the prediction was made
feature_cols = ['elapsed_seconds'] + get_state_vect_cols('physics_pred') \
+ get_state_vect_cols('start')
# The target values are the errors between the physical model predictions
# and the ground truth observations
target_cols = get_state_vect_cols('physics_err')
# Create feature and target matrices
X = df[feature_cols]
y = df[target_cols]
# Split feature and target data into training and test sets
data_keys = ['X_train', 'X_test', 'y_train', 'y_test']
data_vals = train_test_split(X, y, test_size=test_size)
train_test_data = dict(zip(data_keys, data_vals))
return train_test_data
def get_data(file_path):
print('Loading physical model orbit prediction training data...')
physics_pred_df = pd.read_parquet(file_path)
print('Building training and test sets...')
train_test_data = build_train_test_sets(physics_pred_df)
x_train = train_test_data['X_train']
x_test = train_test_data['X_test']
y_train = train_test_data['y_train']
y_test = train_test_data['y_test']
return x_train, y_train, x_test, y_test
# 模型定义
def build_model():
network = models.Sequential()
network.add(layers.Dense(64, activation='relu', input_shape=(13, )))
network.add(layers.Dense(64, activation='relu'))
network.add(layers.Dense(1)) # 最后输出预测值,恒等函数
#损失函数用mes(均方误差), 监控指标为mae(平均绝对误差, 返回误差绝对值)
network.compile(optimizer='rmsprop', loss='mse', metrics=['mae'])
return network
file_path = r"ssa/traindata/physics_preds.parquet"
train_data, train_labels, test_data, test_labels = get_data(file_path)
# # 数据标准化,减去平均值再除以标准差(测试数据也用训练数据的标准差)
# mean = train_data.mean(axis=0)
# train_data -= mean
# std = train_data.std(axis=0)
# train_data /= std
# test_data -= mean
# test_data /= std
x_train = np.array(train_data)
print(x_train.shape)
x_test = np.array(test_data)
print(x_test.shape)
features = ['physics_err_r_x', 'physics_err_r_y', 'physics_err_r_z',
'physics_err_v_x', 'physics_err_v_y', 'physics_err_v_z']
ave_r2 = 0.
for i in range(6):
y_train = np.array(train_labels[features[i]])
print(y_train.shape)
y_test = np.array(test_labels[features[i]])
print(y_test.shape)
network = build_model()
network.summary()
History = network.fit(x_train, y_train, epochs=100, batch_size=1)
network.save('models/DNN{0}.h5'.format(i+1))
# 用训练好的模型衡量测试数据精确度
mse, mae = network.evaluate(x_test, y_test)
rmse = mse**0.5
r2 = 1-mse / np.var(y_test)
print(features[i], ": mse:", mse, " rmse:",
rmse, " mae:", mae, " r2:", r2)
ave_r2 += r2
# #用训练好的网络预测结果
# y_p = network.predict(x_test)
# 绘制图像
history_dict = History.history
print(history_dict.keys())
metric_list = history_dict['mae']
x = range(1, len(metric_list) + 1)
plt.figure(i)
plt.plot(x, metric_list)
plt.title('Training_mae')
plt.xlabel('Epoches')
plt.ylabel('mean abs error')
plt.legend()
plt.show()
print('ave_r2: ', ave_r2/6)

@ -1 +1,113 @@
# wait for tommorow!
from scipy.sparse.construct import random
from normal_use import *
sumRegressors = [LGBMRegressor, RandomForestRegressor, XGBRegressor, CatBoostRegressor]
sumRegressor = Union[type(sumRegressors)]
sumParams = [{},{},{},{"silent": True}]
weight = [0.1, 0.2, 0.3, 0.4]
Sums = {}
train_test_data = None
out_weights = []
oof_train = {}
oof_test = {}
# Some changes
# LinearRegression, Ridge, XGBRegressor, CatBoostRegressor, LGBMRegressor
# deepforest.CascadeForestRegressor
REG_TOTAL = Ridge
def get_random_small_train(X, y, Percentage = 0.8, seed = 0):
# return X_train & y_train
data_keys = ['X_train', 'X_test', 'y_train', 'y_test']
data_vals = train_test_split(X, y, random_state=seed,test_size=(1-Percentage))
train_test_data = dict(zip(data_keys, data_vals))
return train_test_data['X_train'], train_test_data['y_train']
def train_one_regressor(id, regType: sumRegressor, use_RFsample = False, seed = 0):
full_X, full_ys = train_test_data['X_train'], train_test_data['y_train']
tX, tys = train_test_data['X_test'], train_test_data['y_test']
X, ys = full_X, full_ys
if use_RFsample:
X, ys = get_random_small_train(X, ys, seed=seed)
# which xxx_moon?
# make_moons(n_samples=100, shuffle=True, noise=None, random_state=None)
# pass
check_X_y(X, ys, multi_output=True)
models = {}
evals = []
for target_col in ys.columns:
y = ys[target_col]
reg = regType(**sumParams[id])
reg.fit(X, y)
models[target_col] = reg
# test in full train_test
y_hat = reg.predict(full_X)
oof_train[target_col].append(y_hat.reshape(-1, 1))
# predict test
ty_hat = reg.predict(tX)
oof_test[target_col].append(ty_hat.reshape(-1, 1))
ty = tys[target_col]
# one evals
rmse = metrics.mean_squared_error(ty, ty_hat, squared=False)
r2 = metrics.r2_score(ty, ty_hat)
eval_dict = {'Error': target_col, 'RMSE': rmse, 'R^2': r2}
evals.append(eval_dict)
print(regType.__name__)
print(pd.DataFrame(evals))
print("Average R2: ", average_R2(evals))
joblib.dump(models, f"linear/{regType.__name__}_study_{id}.model")
def train_linear_sumer():
ys = train_test_data['y_train']
tys = train_test_data['y_test'] # real
evals = []
for target_col in oof_train:
X = np.hstack(oof_train[target_col])
tX = np.hstack(oof_test[target_col])
print(ys.shape,X.shape,tys.shape,tX.shape)
y = ys[target_col]
ty = tys[target_col]
clf = REG_TOTAL()
clf.fit(X, y)
ty_hat = clf.predict(tX) # fake
rmse = metrics.mean_squared_error(ty, ty_hat, squared=False)
r2 = metrics.r2_score(ty, ty_hat)
eval_dict = {'Error': target_col, 'RMSE': rmse, 'R^2': r2}
evals.append(eval_dict)
print("linear *study* for {} regressors!".format(len(sumRegressors)))
print(pd.DataFrame(evals))
print("Average R2: ", average_R2(evals))
def study_linear(trainset):
"""
Description
-----------
create a linear combination, weight and regressors here to change
Parameters
----------
trainset : dict
train_data_set
Returns
-------
print result on screen
"""
global train_test_data
train_test_data = trainset
for target_col in train_test_data['y_train'].columns:
oof_train[target_col] = []
oof_test[target_col] = []
for i, reg in enumerate(sumRegressors):
train_one_regressor(i, reg, use_RFsample=True, seed=1024)
train_linear_sumer()

@ -20,7 +20,7 @@ def train_linear(id, regType: sumRegressor, use_RFsample = False):
X, ys = train_test_data['X_train'], train_test_data['y_train']
if use_RFsample:
# X, ys = get_random_small_train(X, ys)
# which xxx_moon?
# make_moons(n_samples=100, shuffle=True, noise=None, random_state=None)
pass
check_X_y(X, ys, multi_output=True)
models = {}

@ -0,0 +1,203 @@
# FNN
import torch
import torch.nn as nn
import torchvision.transforms as transforms
import torchvision.datasets as dsets
import pandas as pd
from normal_use import *
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
class FNN_Net(nn.Module):
def __init__(self):
super(FNN_Net, self).__init__()
self.features = 0
self.linear_relu1 = nn.Linear(self.features, 128)
self.linear_relu2 = nn.Linear(128, 256)
self.linear_relu3 = nn.Linear(256, 256)
self.linear_relu4 = nn.Linear(256, 256)
self.linear5 = nn.Linear(256, 1)
def forward(self, x):
y_pred = self.linear_relu1(x)
y_pred = nn.functional.relu(y_pred)
y_pred = self.linear_relu2(y_pred)
y_pred = nn.functional.relu(y_pred)
y_pred = self.linear_relu3(y_pred)
y_pred = nn.functional.relu(y_pred)
y_pred = self.linear_relu4(y_pred)
y_pred = nn.functional.relu(y_pred)
y_pred = self.linear5(y_pred)
return y_pred
def fit(self, X, y_all):
"""
Description
-----------
fit
Parameters
----------
X - train data X
y_all - train data ys
"""
y = y_all
# 训练数据集特征
train_features = torch.tensor(X.values, dtype=torch.float)
train_features = train_features.to(device)
# 训练数据集目标
train_labels = torch.tensor(y.values, dtype=torch.float).view(-1, 1)
train_labels = train_labels.to(device)
self.features=train_features.shape[1]
self.linear_relu1 = nn.Linear(self.features, 128)
self = self.to(device)
self.train()
criterion = nn.MSELoss(reduction='mean')
criterion = criterion.to(device)
optimizer = torch.optim.Adam(self.parameters(), lr=1e-4)
losses = []
epoch = 5000
epoch_real = 0
for t in range(epoch):
y_pred = self(train_features)
y_pred.to(device)
loss = criterion(y_pred, train_labels)
losses.append(loss.item())
if torch.isnan(loss):
break
optimizer.zero_grad()
loss.backward()
optimizer.step()
epoch_real = epoch_real + 1
print(f"epoch:{epoch_real} loss:{losses[-1]}")
def predict(self, X):
self.to(device)
test_features = torch.tensor(X.values, dtype=torch.float)
test_features = test_features.to(device)
pred_labels = self(test_features)
pred_labels = pred_labels.cpu().data.numpy()
return pred_labels
# # DNN equal next NN
# import itertools
# from sklearn.model_selection import train_test_split
# import pandas as pd
# import numpy as np
# from keras import layers, models
# # 模型定义
# class DNN_Net():
# def build_model(self):
# network = models.Sequential()
# network.add(layers.Dense(64, activation='relu', input_shape=(13, )))
# network.add(layers.Dense(64, activation='relu'))
# network.add(layers.Dense(1)) # 最后输出预测值,恒等函数
# #损失函数用mes(均方误差), 监控指标为mae(平均绝对误差, 返回误差绝对值)
# network.compile(optimizer='rmsprop', loss='mse', metrics=['mae'])
# return network
# def __init__(self):
# self.network = self.build_model()
# def fit(self, X, ys):
# self.network.summary()
# self.network.fit(X, ys, epochs=1, batch_size=1)
# def predict(self, X):
# res = self.network.predict(X)
# print(res)
# return res
# NN
import torch
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader
from sklearn import metrics
class NN_Net(nn.Module):
def __init__(self):
super(NN_Net, self).__init__()
self.linear_relu1 = nn.Linear(13, 64)
self.relu6 = nn.ReLU6()
# self.relu6 = nn.ReLU()
self.linear_relu2 = nn.Linear(64, 64)
self.leaky = nn.LeakyReLU()
# self.leaky = nn.ReLU()
self.linear3 = nn.Linear(64, 1)
def forward(self, x):
y_pred = self.linear_relu1(x)
y_pred = self.relu6(y_pred)
y_pred = self.linear_relu2(y_pred)
y_pred = self.leaky(y_pred)
y_pred = self.linear3(y_pred)
return y_pred
def fit(self, X, y_all):
"""
Description
-----------
fit
Parameters
----------
X - train data X
y_all - train data ys
"""
# 训练数据集特征
train_features = torch.tensor(X.values, dtype=torch.float)
train_features = train_features.to(device)
# 训练数据集目标
train_labels = torch.tensor(y_all.values, dtype=torch.float).view(-1, 1)
train_labels = train_labels.to(device)
self = self.to(device)
# self.train()
opti = torch.optim.SGD(self.parameters(), lr=0.02)
loss_func = nn.MSELoss()
train_dataloader = DataLoader(TensorDataset(train_features, train_labels), batch_size=320)
for t in range(1000):
for batch, (x, y) in enumerate(train_dataloader):
pred = self(x)
loss = loss_func(pred, y)
opti.zero_grad()
torch.sqrt(loss).backward()
opti.step()
print(t)
def predict(self, X):
self.to(device)
test_features = torch.tensor(X.values, dtype=torch.float)
test_features = test_features.to(device)
pred_labels = self(test_features)
pred_labels = pred_labels.cpu().data.numpy()
return pred_labels

@ -13,8 +13,11 @@ import pandas as pd
from typing import Union
from sklearn import metrics
from sklearn.model_selection import train_test_split
from sklearn.datasets import make_moons
from sklearn.utils.validation import check_X_y
import joblib
from sklearn.metrics import r2_score
def average_R2(evals):
sum = 0

@ -0,0 +1,68 @@
import keras
from keras.engine.data_adapter import KerasSequenceAdapter
from normal_use import *
from nn_use import FNN_Net, NN_Net
Regressors = [NN_Net]
# Params = ['','']
Regressor = Union[type(Regressors)]
train_test_data = None
def train_model(id, regType: Regressor):
X, ys = train_test_data['X_train'], train_test_data['y_train']
check_X_y(X, ys, multi_output=True)
models = {}
for target_col in ys.columns:
y = ys[target_col]
reg = regType()
reg.fit(X, y)
models[target_col] = reg
print(regType.__name__, target_col)
joblib.dump(models, f"nn_models/{regType.__name__}.model")
# keras.models.save_model(models, f"nn_models/{regType.__name__}.model")
def eval_model(regType: Regressor):
models = joblib.load(f"nn_models/{regType.__name__}.model")
X, ys = train_test_data['X_test'], train_test_data['y_test']
evals = []
for target_col, reg in models.items():
y_hat = reg.predict(X) # fake
y = ys[target_col] # real
rmse = metrics.mean_squared_error(y, y_hat, squared=False)
r2 = metrics.r2_score(y, y_hat)
eval_dict = {'Error': target_col, 'RMSE': rmse, 'R^2': r2}
evals.append(eval_dict)
print(regType.__name__)
print(pd.DataFrame(evals))
print("Average R2: ", average_R2(evals))
def train_one_models(trainsets):
"""
Description
-----------
call this to start trainning each regressors.
Parameters
----------
trainset : dict
use joblib to extract target dataset(create_datas) and put it in here.
Returns
-------
NO returns, but models in folder "models" and print R2 on screen
"""
global train_test_data
train_test_data = trainsets
for i, reg in enumerate(Regressors):
train_model(i, reg)
for reg in Regressors:
eval_model(reg)

@ -7,11 +7,17 @@ import create_traindata
import one_regressor
import stacking_regressor
import linear_sum_regressor
import linear_study
import one_nn
# %%
# create train data
seed = 514
create_traindata.create_train_data(seed=seed)
# %%
# load data
seed = 514
train_test_data = joblib.load(f"create_datas/seed_{seed}.td")
# %%
@ -24,4 +30,14 @@ stacking_regressor.stacking_train(train_test_data)
# %%
# test linear combination
linear_sum_regressor.only_linear(train_test_data)
linear_sum_regressor.only_linear(train_test_data)
# %%
# test linear study method
linear_study.study_linear(train_test_data)
# %%
# seed = 514
# train_test_data = joblib.load(f"create_datas/seed_{seed}.td")
one_nn.train_one_models(train_test_data)
# %%

Loading…
Cancel
Save