add many things

4 years ago · d85a93668b
parent 386efc5523
commit d85a93668b
22 changed files with 4624 additions and 3003 deletions
--- a/TLE_to_train.py
+++ b/TLE_to_train.py
@ -0,0 +1,152 @@
+# Copyright 2020 IBM Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Standard libraries
+import os
+import logging
+import itertools
+import datetime as dt
+# Data processing libraries
+import numpy as np
+import pandas as pd
+from tqdm import tqdm
+from joblib import Parallel, delayed
+# Physics model
+from orbit_prediction import get_state_vect_cols
+from orbit_prediction.physics_model import PhysicsModel
+
+logging.basicConfig(level=os.environ.get("LOGLEVEL", "INFO"))
+logger = logging.getLogger(__name__)
+
+
+def predict_orbit(window):
+    """Predict the state vectors of each future timestep in the given `window`
+    using a physics astrodynamics model.
+
+    :param window: The window of timesteps to predict the orbit of the ASO for
+    :type window: pandas.DataFrame
+
+    :return: The original timestep rows with the predicted state vectors added
+    :rtype: pandas.DataFrame
+    """
+    # The `window` DataFrame is reverse sorted by time so the starting position
+    # is the last row
+    start_row = window.iloc[-1]
+    start_epoch = start_row.name
+    # Get the column names of the state vector components
+    state_vect_comps = get_state_vect_cols()
+    # Extract the position and velocity vectors as a numpy array
+    start_state_vect = start_row[state_vect_comps].to_numpy()
+    start_state = np.concatenate((np.array([start_epoch]),
+                                  start_state_vect))
+    # Build an orbit model
+    orbit_model = PhysicsModel()
+    orbit_model.fit([start_state])
+    future_rows = window.iloc[:-1].reset_index()
+    # We add the epoch and the state vector components of the starting row
+    # to the rows we will use the physics model to make predictions for
+    future_rows['start_epoch'] = start_epoch
+    for svc in state_vect_comps:
+        future_rows[f'start_{svc}'] = start_row[svc]
+    # Calculate the elapsed time from the starting epoch to the
+    # the epoch of all the rows to make predictions for
+    time_deltas = future_rows.epoch - future_rows.start_epoch
+    elapsed_seconds = time_deltas.dt.total_seconds()
+    future_rows['elapsed_seconds'] = elapsed_seconds
+    physics_cols = [f'physics_pred_{svc}' for svc in state_vect_comps]
+    # Predict the state vectors for each of the rows in the "future"
+    predicted_orbits = orbit_model.predict([elapsed_seconds.to_numpy()])
+    try:
+        future_rows[physics_cols] = predicted_orbits[0]
+    except Exception as ex:
+        print("asshole",f"{ex}\n",window,future_rows)
+    return future_rows
+
+
+def predict_orbits(df, last_n_days, n_pred_days):
+    """Use a physics astrodynamics model to predict the orbits of the ASOs
+    in the provided DataFrame.
+
+    :param df: The DataFrame containing the observed orbital state vectors
+        to use to make predictions from
+    :type df: pandas.DataFrame
+
+    :param last_n_days: Filter the DataFrame to use rows from only the last
+        `n` days.  Use all the rows if `None` is passed, but this may take a
+        very long time to run
+    :type last_n_days: int
+
+    :param n_pred_days: The number of days in the rolling prediction window
+    :type n_pred_days: int
+    """
+    if last_n_days:
+        time_cutoff = df.epoch.max() - dt.timedelta(days=last_n_days)
+        df = df[df.epoch >= time_cutoff]
+    epoch_df = df.sort_values('epoch', ascending=False).set_index('epoch')
+    pred_window_length = f'{n_pred_days}d'
+    # For each row in `df` we create a window of all of the observations for
+    # that ASO that are within `n_pred_days` of the given row
+    window_cols = ['aso_id', pd.Grouper(freq=pred_window_length)]
+    windows = [w[1] for w in epoch_df.groupby(window_cols)]
+    # Predict the orbits in each window in parallel
+    window_dfs = Parallel(n_jobs=-1)(delayed(predict_orbit)(w)
+                                     for w in tqdm(windows))
+    # Join all of the window prediction DataFrames into a single DataFrame
+    physics_pred_df = pd.concat(window_dfs).reset_index(drop=True)
+    return physics_pred_df
+
+
+def calc_physics_error(df):
+    """Calculates the error in the state vector components between the ground truth
+    observations and the physics model predictions.
+
+    :param df: The DataFrame containing the ground truth observations and the
+        physics model predictions
+    :type df: pandas.DataFrame
+
+    :return: The input DataFrame with the physical model error column added
+    :rtype: pandas.DataFrame
+    """
+    comps = ['x', 'y', 'z']
+    vects = ['r', 'v']
+    for vect, comp in itertools.product(vects, comps):
+        comp_col = f'{vect}_{comp}'
+        err_col = f'physics_err_{comp_col}'
+        err_val = df[f'physics_pred_{comp_col}'] - df[comp_col]
+        df[err_col] = err_val
+    return df
+
+
+def run():
+    """Builds a training data set of physics model errors based on the
+    parameters supplied by the CLI.
+
+    :param args: The command line arguments
+    :type args: argparse.Namespace
+    """
+    logger.info('Loading input DataFrame...')
+    input_df = pd.read_parquet("/home/lj020/Downloads/data6.parquet")
+    logger.info('Predicting orbits...')
+    physics_pred_df = predict_orbits(input_df,
+                                     last_n_days=None,
+                                     n_pred_days=5)
+    logger.info('Calculating physical model error...')
+    physics_pred_df = calc_physics_error(physics_pred_df)
+    logger.info('Serializing results...')
+    physics_pred_df.to_parquet("/home/lj020/Downloads/train_result.parquet")
+
+    with open("/home/lj020/Downloads/train_result.txt","w") as f:
+        f.write(physics_pred_df.to_string())
+
+run()
--- a/TLEdata_extract.py
+++ b/TLEdata_extract.py
@ -0,0 +1,72 @@
+from tletools import TLE
+import pandas as pd
+from astropy import units as u
+
+
+def get_tles(raw_tle_str):
+    """Parses the raw TLE string and converts it to TLE objects.
+    
+    :param raw_tle_str: The raw string form of the TLEs
+    :type raw_tle_str: str
+    
+    :return: The parsed object representations of the TLEs
+    :rtype: [tletools.TLE]
+    """
+    cutnum = 2
+    all_tle_lines = raw_tle_str.strip().splitlines()
+    tles = []
+    for i in range(len(all_tle_lines)//cutnum):
+        # Calculate offset
+        j = i*cutnum
+        tle_lines = ["noaa-06"] + all_tle_lines[j:j+cutnum]
+        print(tle_lines)
+        # Strip line number from object name line
+        # tle_lines[0] = tle_lines[0][2:]
+        tle = TLE.from_lines(*tle_lines)
+        tles.append(tle)
+    # print(tles)
+    return tles
+
+
+def get_aso_data(tles):
+    """Extracts the necessary data from the TLE objects for doing orbital
+    prediction.
+
+    :param tles: The list of TLE objects to extract orbit information from
+    :type tles: [tletools.TLE]
+
+    :return: A DataFrame of the extracted TLE data
+    :rtype: pandas.DataFrame
+    """
+    tles_data = []
+    for tle in tles:
+        aso_data = {}
+        aso_data['aso_name'] = tle.name
+        aso_data['aso_id'] = tle.norad
+        aso_data['epoch'] = tle.epoch.to_datetime()
+        # Convert the TLE object to a poliastro.twobody.Orbit instance
+        orbit = tle.to_orbit()
+        # Calculate the position and velocity vectors
+        r, v = orbit.rv()
+        # Convert position vector from kilometers to meters
+        r_m = r.to(u.m).to_value()
+        # Convert the velocity vector from km/s to m/s
+        v_ms = v.to(u.m/u.s).to_value()
+        # Extract the components of the state vectiors
+        aso_data['r_x'], aso_data['r_y'], aso_data['r_z'] = r_m
+        aso_data['v_x'], aso_data['v_y'], aso_data['v_z'] = v_ms
+        tles_data.append(aso_data)
+    return pd.DataFrame(tles_data)
+
+
+full_tle = []
+with open("/home/lj020/Downloads/noaa.txt","r") as f:
+    raw_tle = f.read()
+    l = get_tles(raw_tle_str=raw_tle)
+    full_tle += l
+
+w = get_aso_data(full_tle)
+w.to_parquet("/home/lj020/Downloads/data6.parquet")
+with open("/home/lj020/Downloads/data6.txt","w") as f:
+    f.write(w.to_string())
+# print(w)
--- a/catboost_info/catboost_training.json
+++ b/catboost_info/catboost_training.json
--- a/catboost_info/learn/events.out.tfevents
+++ b/catboost_info/learn/events.out.tfevents
--- a/catboost_info/learn_error.tsv
+++ b/catboost_info/learn_error.tsv
--- a/catboost_info/time_left.tsv
+++ b/catboost_info/time_left.tsv
--- a/nn_LYC/train_model_nn.py
+++ b/nn_LYC/train_model_nn.py
@ -0,0 +1,110 @@
+# %%
+import pandas as pd
+
+from itertools import product
+
+
+def get_state_vect_cols(prefix=''):
+    if prefix:
+        prefix += '_'
+    vectors = ['r', 'v']
+    components = ['x', 'y', 'z']
+    col_names = [f'{prefix}{v}_{c}' for v, c in product(vectors, components)]
+    return col_names
+
+
+# %%
+df = pd.read_parquet("traindata/physics_preds.parquet")
+test_set = df[df['aso_id'] == "05277"]
+
+train_set = df.groupby('aso_id').apply(lambda x: x.head(x.count()[0] - 3))
+print(df.count()[0], train_set.count()[0], test_set.count()[0])
+
+# %%
+from sklearn.model_selection import train_test_split
+
+feature_cols = [
+    'elapsed_seconds'
+] + get_state_vect_cols('physics_pred') + get_state_vect_cols('start')
+print(feature_cols)
+# The target values are the errors between the physical model predictions
+# and the ground truth observations
+target_cols = get_state_vect_cols('physics_err')
+print(target_cols)
+# Create feature and target matrices
+X = df[feature_cols]
+y = df[target_cols]
+data_keys = ['X_train', 'X_test', 'y_train', 'y_test']
+data_vals = train_test_split(X, y, test_size=0.2)
+train_test_data = dict(zip(data_keys, data_vals))
+
+# %%
+import torch
+import torch.nn as nn
+from torch.utils.data import TensorDataset, DataLoader
+from sklearn import metrics
+
+TestNet2 = lambda: nn.Sequential(
+    nn.Linear(13, 64),
+    nn.ReLU6(),
+    nn.Linear(64, 64),
+    nn.LeakyReLU(),
+    nn.Linear(64, 1),
+)
+
+nets = {}
+
+X_train = torch.tensor(train_test_data["X_train"].values,
+                       dtype=torch.float32).cuda()
+y_train = train_test_data["y_train"]
+X_test = torch.tensor(train_test_data['X_test'].values,
+                      dtype=torch.float32).cuda()
+y_test = train_test_data['y_test']
+r2s = []
+for target_col in y_train.columns:
+    y1 = torch.tensor(y_train[target_col].values,
+                      dtype=torch.float32).reshape(-1, 1).cuda()
+    print(X.shape, y1.shape)
+    net = TestNet2().cuda()
+    opti = torch.optim.SGD(net.parameters(), lr=0.02)
+    loss_func = nn.MSELoss()
+    train_dataloader = DataLoader(TensorDataset(X_train, y1), batch_size=320)
+    for t in range(10000):
+        for batch, (x, y) in enumerate(train_dataloader):
+            pred = net(x)
+            loss = loss_func(pred, y)
+            opti.zero_grad()
+            torch.sqrt(loss).backward()
+            opti.step()
+        with torch.no_grad():
+            y = y_test[target_col]
+            y_hat = net(X_test).cpu().numpy()
+            rmse = metrics.mean_squared_error(y, y_hat, squared=False)
+            r2 = metrics.r2_score(y, y_hat)
+            r2s.append(r2)
+            print(f"Epoch {t}: r2={r2}, rmse={rmse}")
+    nets[target_col] = net
+    print(target_col)
+    break
+
+import matplotlib.pyplot as plt
+
+plt.plot(r2s)
+plt.show()
+
+# %%
+X, ys = train_test_data['X_test'], train_test_data['y_test']
+evals = []
+with torch.no_grad():
+    for target_col, net in nets.items():
+        y_hat = net(torch.tensor(X.values))  # fake
+        y_hat = y_hat.detach().numpy()
+        y = ys[target_col]  # real
+        print(y)
+        print(y_hat)
+        rmse = metrics.mean_squared_error(y, y_hat, squared=False)
+        r2 = metrics.r2_score(y, y_hat)
+        eval_dict = {'Error': target_col, 'RMSE': rmse, 'R^2': r2}
+        evals.append(eval_dict)
+print(pd.DataFrame(evals))
+# %%
--- a/nn_TR/FNN.py
+++ b/nn_TR/FNN.py
@ -0,0 +1,343 @@
+#数据预处理
+import pandas as pd
+from itertools import product
+from torch.utils.data import DataLoader
+from  sklearn.metrics import r2_score
+from sklearn.utils import shuffle
+
+def get_state_vect_cols(prefix=''):
+    if prefix:
+        prefix += '_'
+    vectors = ['r', 'v']
+    components = ['x', 'y', 'z']
+    col_names = [f'{prefix}{v}_{c}' for v, c in product(vectors, components)]
+    return col_names
+
+pd.set_option('display.max_rows', None)
+pd.set_option('display.max_columns', None)
+pd.set_option('display.width', 1000)
+
+# %%
+df = pd.read_parquet("physics_preds.parquet")
+test_set = df[df['aso_id'] == "05277"]
+
+train_set = df.groupby('aso_id').apply(lambda x: x.head(x.count()[0] - 3))
+print(df.count()[0], train_set.count()[0], test_set.count()[0])
+data_count = df.count()[0]
+
+# %%
+feature_cols = [
+    'elapsed_seconds'
+] + get_state_vect_cols('physics_pred') + get_state_vect_cols('start')
+print(feature_cols)
+# The target values are the errors between the physical model predictions
+# and the ground truth observations
+target_cols = get_state_vect_cols('physics_err')
+print(target_cols)
+
+print("样本统一归一化处理")
+all_cols = feature_cols + target_cols
+df = df[all_cols]
+df = shuffle(df)
+df = (df - df.mean(axis=0)) / df.std(axis=0)
+# Create feature and target matrices
+X = df[feature_cols]
+y_all = df[target_cols]
+# y = df['physics_err_v_x']
+# print(y)
+# %%
+
+
+
+
+
+
+
+#FNN神经网络
+
+import torch
+import torch.nn as nn
+import torchvision.transforms as transforms
+import torchvision.datasets as dsets
+
+device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+
+class Net(nn.Module):
+
+    def __init__(self, features):
+        super(Net, self).__init__()
+
+        self.linear_relu1 = nn.Linear(features, 128)
+        self.linear_relu2 = nn.Linear(128, 256)
+        self.linear_relu3 = nn.Linear(256, 256)
+        self.linear_relu4 = nn.Linear(256, 256)
+        self.linear5 = nn.Linear(256, 1)
+
+    def forward(self, x):
+        y_pred = self.linear_relu1(x)
+        y_pred = nn.functional.relu(y_pred)
+
+        y_pred = self.linear_relu2(y_pred)
+        y_pred = nn.functional.relu(y_pred)
+
+        y_pred = self.linear_relu3(y_pred)
+        y_pred = nn.functional.relu(y_pred)
+
+        y_pred = self.linear_relu4(y_pred)
+        y_pred = nn.functional.relu(y_pred)
+
+        y_pred = self.linear5(y_pred)
+        return y_pred
+
+for target_col in y_all.columns:
+    y = y_all[target_col]
+    train_size = int(data_count * 0.80)
+    # 训练数据集特征
+    train_features = torch.tensor(X[:train_size].values, dtype=torch.float)
+    train_features = train_features.to(device)
+    # 训练数据集目标
+    train_labels = torch.tensor(y[:train_size].values, dtype=torch.float).view(-1, 1)
+    train_labels = train_labels.to(device)
+    # print("train data size: ", train_features.shape)
+    # print("label data size: ", train_labels.shape)
+    # print("test data size: ", test_features.shape)
+
+    model = Net(features=train_features.shape[1])
+    model = model.to(device)
+    model.train()
+
+    criterion = nn.MSELoss(reduction='mean')
+    criterion = criterion.to(device)
+    optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
+
+    losses = []
+    epoch = 5000
+    epoch_real = 0
+    for t in range(epoch):
+        y_pred = model(train_features)
+        y_pred.to(device)
+
+        loss = criterion(y_pred, train_labels)
+        losses.append(loss.item())
+
+        if torch.isnan(loss):
+            break
+
+        optimizer.zero_grad()
+
+        loss.backward()
+
+        optimizer.step()
+        epoch_real = epoch_real + 1
+
+    print(f"epoch:{epoch_real} loss:{losses[-1]} target:{target_col}")
+    torch.save(model.state_dict(), './{}_FNN.pth'.format(target_col))
+
+evals = []
+for target_col in y_all.columns:
+    y = y_all[target_col]
+    # 测试数据集特征
+    test_features = torch.tensor(X[train_size:].values, dtype=torch.float)
+    test_features = test_features.to(device)
+    model.load_state_dict(torch.load('./{}_FNN.pth'.format(target_col), map_location=lambda storage, loc: storage))
+    model.to(device)
+    pred_labels = model(test_features)
+    pred_labels = pred_labels.cpu().data.numpy()
+    real_labels = y[train_size:].values
+    r2 = r2_score(real_labels, pred_labels)
+    eval_dict = {'Error': target_col, 'R^2': r2}
+    evals.append(eval_dict)
+    # print(r2)
+print(pd.DataFrame(evals))
+
+
+
+# model_sequential = nn.Sequential(
+#     nn.Linear(test_features.shape[1], 128),
+#     nn.ReLU(),
+#     nn.Linear(128, 256),
+#     nn.ReLU(),
+#     nn.Linear(256, 256),
+#     nn.ReLU(),
+#     nn.Linear(256, 256),
+#     nn.ReLU(),
+#     nn.Linear(256, 1)
+# )
+
+
+
+
+
+
+
+
+
+
+
+
+#
+# '''
+# STEP 1: LOADING DATASET
+# '''
+#
+# train_dataset = dsets.MNIST(root='./data',
+#                             train=True,
+#                             transform=transforms.ToTensor(),
+#                             download=True)
+#
+# test_dataset = dsets.MNIST(root='./data',
+#                            train=False,
+#                            transform=transforms.ToTensor())
+#
+# '''
+# STEP 2: MAKING DATASET ITERABLE
+# '''
+#
+# batch_size = 100
+# n_iters = 3000
+# num_epochs = n_iters / (len(train_dataset) / batch_size)
+# num_epochs = int(num_epochs)
+#
+# train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
+#                                            batch_size=batch_size,
+#                                            shuffle=True)
+#
+# test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
+#                                           batch_size=batch_size,
+#                                           shuffle=False)
+#
+# '''
+# STEP 3: CREATE MODEL CLASS
+# '''
+# class FeedforwardNeuralNetModel(nn.Module):
+#     def __init__(self, input_dim, hidden_dim, output_dim):
+#         super(FeedforwardNeuralNetModel, self).__init__()
+#         # Linear function 1: 784 --> 100
+#         self.fc1 = nn.Linear(input_dim, hidden_dim)
+#         # Non-linearity 1
+#         self.relu1 = nn.ReLU()
+#
+#         # Linear function 2: 100 --> 100
+#         self.fc2 = nn.Linear(hidden_dim, hidden_dim)
+#         # Non-linearity 2
+#         self.relu2 = nn.ReLU()
+#
+#         # Linear function 3: 100 --> 100
+#         self.fc3 = nn.Linear(hidden_dim, hidden_dim)
+#         # Non-linearity 3
+#         self.relu3 = nn.ReLU()
+#
+#         # Linear function 4 (readout): 100 --> 10
+#         self.fc4 = nn.Linear(hidden_dim, output_dim)
+#
+#     def forward(self, x):
+#         # Linear function 1
+#         out = self.fc1(x)
+#         # Non-linearity 1
+#         out = self.relu1(out)
+#
+#         # Linear function 2
+#         out = self.fc2(out)
+#         # Non-linearity 2
+#         out = self.relu2(out)
+#
+#         # Linear function 2
+#         out = self.fc3(out)
+#         # Non-linearity 2
+#         out = self.relu3(out)
+#
+#         # Linear function 4 (readout)
+#         out = self.fc4(out)
+#         return out
+# '''
+# STEP 4: INSTANTIATE MODEL CLASS
+# '''
+# input_dim = 28*28
+# hidden_dim = 100
+# output_dim = 10
+#
+# model = FeedforwardNeuralNetModel(input_dim, hidden_dim, output_dim)
+#
+# #######################
+# #  USE GPU FOR MODEL  #
+# #######################
+#
+# device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+# model.to(device)
+#
+# '''
+# STEP 5: INSTANTIATE LOSS CLASS
+# '''
+# criterion = nn.CrossEntropyLoss()
+#
+#
+# '''
+# STEP 6: INSTANTIATE OPTIMIZER CLASS
+# '''
+# learning_rate = 0.1
+#
+# optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
+#
+# '''
+# STEP 7: TRAIN THE MODEL
+# '''
+# iter = 0
+# for epoch in range(num_epochs):
+#     for i, (images, labels) in enumerate(train_loader):
+#
+#         #######################
+#         #  USE GPU FOR MODEL  #
+#         #######################
+#         images = images.view(-1, 28*28).requires_grad_().to(device)
+#         labels = labels.to(device)
+#
+#         # Clear gradients w.r.t. parameters
+#         optimizer.zero_grad()
+#
+#         # Forward pass to get output/logits
+#         outputs = model(images)
+#
+#         # Calculate Loss: softmax --> cross entropy loss
+#         loss = criterion(outputs, labels)
+#
+#         # Getting gradients w.r.t. parameters
+#         loss.backward()
+#
+#         # Updating parameters
+#         optimizer.step()
+#
+#         iter += 1
+#
+#         if iter % 500 == 0:
+#             # Calculate Accuracy
+#             correct = 0
+#             total = 0
+#             # Iterate through test dataset
+#             for images, labels in test_loader:
+#                 #######################
+#                 #  USE GPU FOR MODEL  #
+#                 #######################
+#                 images = images.view(-1, 28*28).requires_grad_().to(device)
+#
+#                 # Forward pass only to get logits/output
+#                 outputs = model(images)
+#
+#                 # Get predictions from the maximum value
+#                 _, predicted = torch.max(outputs.data, 1)
+#
+#                 # Total number of labels
+#                 total += labels.size(0)
+#
+#                 #######################
+#                 #  USE GPU FOR MODEL  #
+#                 #######################
+#                 # Total correct predictions
+#                 if torch.cuda.is_available():
+#                     correct += (predicted.cpu() == labels.cpu()).sum()
+#                 else:
+#                     correct += (predicted == labels).sum()
+#
+#             accuracy = 100 * correct / total
+#
+#             # Print Loss
+#             print('Iteration: {}. Loss: {}. Accuracy: {}'.format(iter, loss.item(), accuracy))
--- a/nn_TR/train_LSTM.py
+++ b/nn_TR/train_LSTM.py
@ -0,0 +1,425 @@
+#https://blog.csdn.net/weixin_35757704/article/details/115910672
+#https://www.bilibili.com/video/BV1hE411t7RN?p=27
+#https://www.zhihu.com/question/39792141
+import numpy as np
+from typing import Union
+import pandas as pd
+from itertools import product
+import torch
+from torch import nn
+from torch.utils.data import DataLoader
+import torchvision
+from sklearn.model_selection import train_test_split
+from sklearn.utils.validation import check_X_y
+import joblib
+from catboost import CatBoostRegressor
+from lightgbm import LGBMRegressor
+from xgboost import XGBRegressor
+from  sklearn.metrics import r2_score
+from CNN_architecture import CNN_architecture
+from sklearn.utils import shuffle
+
+def get_state_vect_cols(prefix=''):
+    if prefix:
+        prefix += '_'
+    vectors = ['r', 'v']
+    components = ['x', 'y', 'z']
+    col_names = [f'{prefix}{v}_{c}' for v, c in product(vectors, components)]
+    return col_names
+
+pd.set_option('display.max_rows', None)
+pd.set_option('display.max_columns', None)
+pd.set_option('display.width', 1000)
+# %%
+df = pd.read_parquet("traindata/physics_preds.parquet")
+test_set = df[df['aso_id'] == "05277"]
+
+train_set = df.groupby('aso_id').apply(lambda x: x.head(x.count()[0] - 3))
+print(df.count()[0], train_set.count()[0], test_set.count()[0])
+data_count = df.count()[0]
+
+# print(train_set)
+
+# %%
+feature_cols = [
+    'elapsed_seconds'
+] + get_state_vect_cols('physics_pred') + get_state_vect_cols('start')
+print(feature_cols)
+# The target values are the errors between the physical model predictions
+# and the ground truth observations
+target_cols = get_state_vect_cols('physics_err')
+print(target_cols)
+
+print("样本统一归一化处理")
+all_cols = feature_cols + target_cols
+df = df[all_cols]
+df = shuffle(df)
+df = (df - df.mean(axis=0)) / df.std(axis=0)
+# Create feature and target matrices
+feature_cols = ['physics_err_v_x'] + feature_cols
+X = df[feature_cols]
+y = df[target_cols]
+# data_keys = ['X_train', 'X_test', 'y_train', 'y_test']
+# data_vals = train_test_split(X, y, test_size=0.2)
+# train_test_data = dict(zip(data_keys, data_vals))
+# train_test_data['X_test'] = test_set[feature_cols]
+# train_test_data['y_test'] = test_set[target_cols]
+# train_test_data = {
+#     'X_train': train_set[feature_cols],
+#     'y_train': train_set[target_cols],
+#     'X_test': test_set[feature_cols],
+#     'y_test': test_set[target_cols],
+# }
+
+# %%
+
+import numpy as np
+import torch
+from torch import nn
+
+import matplotlib.pyplot as plt
+
+"""
+Github: Yonv1943 Zen4 Jia1 hao2
+https://github.com/Yonv1943/DL_RL_Zoo/blob/master/RNN
+
+The source of training data 
+https://github.com/L1aoXingyu/
+code-of-learn-deep-learning-with-pytorch/blob/master/
+chapter5_RNN/time-series/lstm-time-series.ipynb
+"""
+
+import os
+os.environ["CUDA_VISIBLE_DEVICES"] = "0"
+
+def run_train_lstm(X):
+
+    inp_dim = 14
+    out_dim = 1
+    mid_dim = 15
+    mid_layers = 2
+    batch_size = 12 * 4
+    mod_dir = '.'
+
+    '''load data'''
+    data = X.values
+    pd.set_option('display.max_rows', None)
+    pd.set_option('display.max_columns', None)
+    pd.set_option('display.width', 1000)
+    # np.set_printoptions(threshold='nan')
+    # print(data)
+    data_x = data[:-1, :]
+    data_y = data[+1:, 0]
+    # print(data_y)
+    assert data_x.shape[1] == inp_dim
+    train_size = int(data_count * 0.80)
+
+    train_x = data_x[:train_size]
+    train_y = data_y[:train_size]
+    train_x = train_x.reshape((train_size, inp_dim))
+    train_y = train_y.reshape((train_size, out_dim))
+
+    # print(train_y)
+
+    # X, ys = train_test_data['X_train'], train_test_data['y_train']
+    #
+    # check_X_y(X, ys, multi_output=True)
+    #
+    # train_x = X.values
+    # train_y = ys['physics_err_r_x'].values
+    # train_y = ys.reshape(ys.shape[0], 1)
+
+    '''build model'''
+    # device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    device = torch.device("cuda")
+    net = RegLSTM(inp_dim, out_dim, mid_dim, mid_layers).to(device)
+    criterion = nn.MSELoss()
+    optimizer = torch.optim.Adam(net.parameters(), lr=1e-2)
+
+    '''train'''
+    var_x = torch.tensor(train_x, dtype=torch.float32, device=device)
+    var_y = torch.tensor(train_y, dtype=torch.float32, device=device)
+
+    batch_var_x = list()
+    batch_var_y = list()
+
+    for i in range(batch_size):
+        j = batch_size - i
+        batch_var_x.append(var_x[j:])
+        batch_var_y.append(var_y[j:])
+
+    from torch.nn.utils.rnn import pad_sequence
+    batch_var_x = pad_sequence(batch_var_x)
+    batch_var_y = pad_sequence(batch_var_y)
+
+    with torch.no_grad():
+        weights = np.tanh(np.arange(len(train_y)) * (np.e / len(train_y)))
+        weights = torch.tensor(weights, dtype=torch.float32, device=device)
+
+    print("Training Start")
+    for e in range(10):
+        out = net(batch_var_x)
+
+        # loss = criterion(out, batch_var_y)
+        loss = (out - batch_var_y) ** 2 * weights
+        loss = loss.mean()
+
+        optimizer.zero_grad()
+        loss.backward()
+        optimizer.step()
+
+        if e % 64 == 0:
+            print('Epoch: {:4}, Loss: {:.5f}'.format(e, loss.item()))
+    torch.save(net.state_dict(), '{}/net.pth'.format(mod_dir))
+    print("Save in:", '{}/net.pth'.format(mod_dir))
+
+    '''eval'''
+    net.load_state_dict(torch.load('{}/net.pth'.format(mod_dir), map_location=lambda storage, loc: storage))
+    net = net.eval()
+
+    # X1, ys1 = train_test_data['X_test'], train_test_data['y_test']
+    # check_X_y(X1, ys1, multi_output=True)
+    # test_x = X1.values
+    # test_y = ys1['physics_err_r_x'].values
+    # test_y = ys1.reshape(ys.shape[0], 1)
+    # test_x =
+
+
+    test_x = data_x.copy()
+    test_x[train_size:, 0] = 0
+    # print(test_x)
+    test_x = test_x[:, np.newaxis, :]
+    test_x = torch.tensor(test_x, dtype=torch.float32, device=device)
+
+    '''simple way but no elegant'''
+    for i in range(train_size, len(data) - 2):
+        test_y = net(test_x[:i])
+        test_x[i, 0, 0] = test_y[-1]
+
+    '''elegant way but slightly complicated'''
+    # eval_size = 1
+    # zero_ten = torch.zeros((mid_layers, eval_size, mid_dim), dtype=torch.float32, device=device)
+    # test_y, hc = net.output_y_hc(test_x[:train_size], (zero_ten, zero_ten))
+    # test_x[train_size + 1, 0, 0] = test_y[-1]
+    # for i in range(train_size + 1, len(data) - 2):
+    #     test_y, hc = net.output_y_hc(test_x[i:i + 1], hc)
+    #     test_x[i + 1, 0, 0] = test_y[-1]
+
+    pred_y = test_x[1:, 0, 0]
+    pred_y = pred_y.cpu().data.numpy()
+    print("`````````````````````````")
+    print(pred_y.shape)
+    diff_y = pred_y[train_size:] - data_y[train_size:-1]
+    print("------")
+    # print(pred_y[train_size:])
+    print("------")
+    # print(data_y[train_size:-1])
+    r2 = r2_score(data_y[train_size:-1], pred_y[train_size:], multioutput= 'uniform_average')
+    evals = []
+    eval_dict = {'Error': 'physics_err_v_x', 'R^2': r2}
+    evals.append(eval_dict)
+    print(pd.DataFrame(evals))
+
+    l1_loss = np.mean(np.abs(diff_y))
+    l2_loss = np.mean(diff_y ** 2)
+    print("L1: {:.3f}    L2: {:.3f}".format(l1_loss, l2_loss))
+
+    plt.plot(pred_y, 'r', label='pred')
+    plt.plot(data_y, 'b', label='real', alpha=0.3)
+    plt.plot([train_size, train_size], [-1, 2], color='k', label='train | pred')
+    plt.legend(loc='best')
+    plt.savefig('lstm_reg.png')
+    plt.pause(4)
+
+
+def run_origin():
+    inp_dim = 2
+    out_dim = 1
+    mod_dir = '.'
+
+    '''load data'''
+    data = load_data()  # axis1: number, year, month
+    data_x = np.concatenate((data[:-2, 0:1], data[+1:-1, 0:1]), axis=1)
+    data_y = data[2:, 0]
+
+    train_size = int(len(data_x) * 0.75)
+    train_x = data_x[:train_size]
+    train_y = data_y[:train_size]
+
+    train_x = train_x.reshape((-1, 1, inp_dim))
+    train_y = train_y.reshape((-1, 1, out_dim))
+
+    '''build model'''
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    net = RegLSTM(inp_dim, out_dim, mid_dim=4, mid_layers=2).to(device)
+    criterion = nn.SmoothL1Loss()
+    optimizer = torch.optim.Adam(net.parameters(), lr=1e-2)
+
+    '''train'''
+    var_x = torch.tensor(train_x, dtype=torch.float32, device=device)
+    var_y = torch.tensor(train_y, dtype=torch.float32, device=device)
+    print('var_x.size():', var_x.size())
+    print('var_y.size():', var_y.size())
+
+    for e in range(512):
+        out = net(var_x)
+        loss = criterion(out, var_y)
+
+        optimizer.zero_grad()
+        loss.backward()
+        optimizer.step()
+
+        if (e + 1) % 100 == 0:  # 每 100 次输出结果
+            print('Epoch: {}, Loss: {:.5f}'.format(e + 1, loss.item()))
+
+    torch.save(net.state_dict(), '{}/net.pth'.format(mod_dir))
+
+    '''eval'''
+    # net.load_state_dict(torch.load('{}/net.pth'.format(mod_dir), map_location=lambda storage, loc: storage))
+    net = net.eval()  # 转换成测试模式
+
+    """
+    inappropriate way of seq prediction: 
+    use all real data to predict the number of next month
+    """
+    test_x = data_x.reshape((-1, 1, inp_dim))
+    var_data = torch.tensor(test_x, dtype=torch.float32, device=device)
+    eval_y = net(var_data)  # 测试集的预测结果
+    pred_y = eval_y.view(-1).cpu().data.numpy()
+
+    plt.plot(pred_y[1:], 'r', label='pred inappr', alpha=0.3)
+    plt.plot(data_y, 'b', label='real', alpha=0.3)
+    plt.plot([train_size, train_size], [-1, 2], label='train | pred')
+
+    """
+    appropriate way of seq prediction: 
+    use real+pred data to predict the number of next 3 years.
+    """
+    test_x = data_x.reshape((-1, 1, inp_dim))
+    test_x[train_size:] = 0  # delete the data of next 3 years.
+    test_x = torch.tensor(test_x, dtype=torch.float32, device=device)
+    for i in range(train_size, len(data) - 2):
+        test_y = net(test_x[:i])
+        test_x[i, 0, 0] = test_x[i - 1, 0, 1]
+        test_x[i, 0, 1] = test_y[-1, 0]
+    pred_y = test_x.cpu().data.numpy()
+    pred_y = pred_y[:, 0, 0]
+    plt.plot(pred_y[2:], 'g', label='pred appr')
+
+    plt.legend(loc='best')
+    plt.savefig('lstm_origin.png')
+    plt.pause(4)
+
+
+class RegLSTM(nn.Module):
+    def __init__(self, inp_dim, out_dim, mid_dim, mid_layers):
+        super(RegLSTM, self).__init__()
+
+        self.rnn = nn.LSTM(inp_dim, mid_dim, mid_layers)  # rnn
+        self.reg = nn.Sequential(
+            nn.Linear(mid_dim, mid_dim),
+            nn.Tanh(),
+            nn.Linear(mid_dim, out_dim),
+        )  # regression
+
+    def forward(self, x):
+        y = self.rnn(x)[0]  # y, (h, c) = self.rnn(x)
+
+        seq_len, batch_size, hid_dim = y.shape
+        y = y.view(-1, hid_dim)
+        y = self.reg(y)
+        y = y.view(seq_len, batch_size, -1)
+        return y
+
+    """
+    PyCharm Crtl+click nn.LSTM() jump to code of PyTorch:
+    Examples::
+        >>> rnn = nn.LSTM(10, 20, 2)
+        >>> input = torch.randn(5, 3, 10)
+        >>> h0 = torch.randn(2, 3, 20)
+        >>> c0 = torch.randn(2, 3, 20)
+        >>> output, (hn, cn) = rnn(input, (h0, c0))
+    """
+
+    def output_y_hc(self, x, hc):
+        y, hc = self.rnn(x, hc)  # y, (h, c) = self.rnn(x)
+
+        seq_len, batch_size, hid_dim = y.size()
+        y = y.view(-1, hid_dim)
+        y = self.reg(y)
+        y = y.view(seq_len, batch_size, -1)
+        return y, hc
+
+
+class RegGRU(nn.Module):
+    def __init__(self, inp_dim, out_dim, mod_dim, mid_layers):
+        super(RegGRU, self).__init__()
+
+        self.rnn = nn.GRU(inp_dim, mod_dim, mid_layers)
+        self.reg = nn.Linear(mod_dim, out_dim)
+
+    def forward(self, x):
+        x, h = self.rnn(x)  # (seq, batch, hidden)
+
+        seq_len, batch_size, hid_dim = x.shape
+        x = x.view(-1, hid_dim)
+        x = self.reg(x)
+        x = x.view(seq_len, batch_size, -1)
+        return x
+
+    def output_y_h(self, x, h):
+        y, h = self.rnn(x, h)
+
+        seq_len, batch_size, hid_dim = y.size()
+        y = y.view(-1, hid_dim)
+        y = self.reg(y)
+        y = y.view(seq_len, batch_size, -1)
+        return y, h
+
+
+def load_data():
+    # passengers number of international airline , 1949-01 ~ 1960-12 per month
+    seq_number = np.array(
+        [112., 118., 132., 129., 121., 135., 148., 148., 136., 119., 104.,
+         118., 115., 126., 141., 135., 125., 149., 170., 170., 158., 133.,
+         114., 140., 145., 150., 178., 163., 172., 178., 199., 199., 184.,
+         162., 146., 166., 171., 180., 193., 181., 183., 218., 230., 242.,
+         209., 191., 172., 194., 196., 196., 236., 235., 229., 243., 264.,
+         272., 237., 211., 180., 201., 204., 188., 235., 227., 234., 264.,
+         302., 293., 259., 229., 203., 229., 242., 233., 267., 269., 270.,
+         315., 364., 347., 312., 274., 237., 278., 284., 277., 317., 313.,
+         318., 374., 413., 405., 355., 306., 271., 306., 315., 301., 356.,
+         348., 355., 422., 465., 467., 404., 347., 305., 336., 340., 318.,
+         362., 348., 363., 435., 491., 505., 404., 359., 310., 337., 360.,
+         342., 406., 396., 420., 472., 548., 559., 463., 407., 362., 405.,
+         417., 391., 419., 461., 472., 535., 622., 606., 508., 461., 390.,
+         432.], dtype=np.float32)
+    # assert seq_number.shape == (144, )
+    # plt.plot(seq_number)
+    # plt.ion()
+    # plt.pause(1)
+    seq_number = seq_number[:, np.newaxis]
+
+    # print(repr(seq))
+    # 1949~1960, 12 years, 12*12==144 month
+    seq_year = np.arange(12)
+    seq_month = np.arange(12)
+    seq_year_month = np.transpose(
+        [np.repeat(seq_year, len(seq_month)),
+         np.tile(seq_month, len(seq_year))],
+    )  # Cartesian Product
+
+    seq = np.concatenate((seq_number, seq_year_month), axis=1)
+
+    # normalization
+    seq = (seq - seq.mean(axis=0)) / seq.std(axis=0)
+    return seq
+
+
+if __name__ == '__main__':
+    run_train_lstm(X)
+    # run_train_gru()
+    # run_origin()
+
+
+
--- a/nn_ZSH/DNN.py
+++ b/nn_ZSH/DNN.py
@ -0,0 +1,117 @@
+import itertools
+from sklearn.model_selection import train_test_split
+import pandas as pd
+import numpy as np
+import matplotlib.pyplot as plt
+from keras import layers, models
+
+
+def get_state_vect_cols(prefix=''):
+    if prefix:
+        prefix += '_'
+    vectors = ['r', 'v']
+    components = ['x', 'y', 'z']
+    col_names = [f'{prefix}{v}_{c}'
+                 for v, c
+                 in itertools.product(vectors, components)]
+    return col_names
+
+
+def build_train_test_sets(df, test_size=0.2):
+    # Features are the physics predicted state vectors and the amount of
+    # time in seconds into the future the prediction was made
+    feature_cols = ['elapsed_seconds'] + get_state_vect_cols('physics_pred') \
+        + get_state_vect_cols('start')
+    # The target values are the errors between the physical model predictions
+    # and the ground truth observations
+    target_cols = get_state_vect_cols('physics_err')
+    # Create feature and target matrices
+    X = df[feature_cols]
+    y = df[target_cols]
+    # Split feature and target data into training and test sets
+    data_keys = ['X_train', 'X_test', 'y_train', 'y_test']
+    data_vals = train_test_split(X, y, test_size=test_size)
+    train_test_data = dict(zip(data_keys, data_vals))
+    return train_test_data
+
+
+def get_data(file_path):
+    print('Loading physical model orbit prediction training data...')
+    physics_pred_df = pd.read_parquet(file_path)
+    print('Building training and test sets...')
+    train_test_data = build_train_test_sets(physics_pred_df)
+    x_train = train_test_data['X_train']
+    x_test = train_test_data['X_test']
+    y_train = train_test_data['y_train']
+    y_test = train_test_data['y_test']
+    return x_train, y_train, x_test, y_test
+
+
+# 模型定义
+def build_model():
+    network = models.Sequential()
+    network.add(layers.Dense(64, activation='relu', input_shape=(13, )))
+    network.add(layers.Dense(64, activation='relu'))
+    network.add(layers.Dense(1))  # 最后输出预测值，恒等函数
+    #损失函数用mes(均方误差), 监控指标为mae(平均绝对误差, 返回误差绝对值)
+    network.compile(optimizer='rmsprop', loss='mse', metrics=['mae'])
+    return network
+
+
+file_path = r"ssa/traindata/physics_preds.parquet"
+train_data, train_labels, test_data, test_labels = get_data(file_path)
+
+# # 数据标准化，减去平均值再除以标准差(测试数据也用训练数据的标准差)
+# mean = train_data.mean(axis=0)
+# train_data -= mean
+# std = train_data.std(axis=0)
+# train_data /= std
+# test_data -= mean
+# test_data /= std
+
+x_train = np.array(train_data)
+print(x_train.shape)
+x_test = np.array(test_data)
+print(x_test.shape)
+
+features = ['physics_err_r_x', 'physics_err_r_y', 'physics_err_r_z',
+            'physics_err_v_x', 'physics_err_v_y', 'physics_err_v_z']
+ave_r2 = 0.
+for i in range(6):
+    y_train = np.array(train_labels[features[i]])
+    print(y_train.shape)
+    y_test = np.array(test_labels[features[i]])
+    print(y_test.shape)
+
+    network = build_model()
+    network.summary()
+    History = network.fit(x_train, y_train, epochs=100, batch_size=1)
+
+    network.save('models/DNN{0}.h5'.format(i+1))
+    # 用训练好的模型衡量测试数据精确度
+    mse, mae = network.evaluate(x_test, y_test)
+    rmse = mse**0.5
+    r2 = 1-mse / np.var(y_test)
+    print(features[i], ":   mse:", mse, " rmse:",
+          rmse, " mae:", mae, " r2:", r2)
+    ave_r2 += r2
+
+    # #用训练好的网络预测结果
+    # y_p = network.predict(x_test)
+
+    # 绘制图像
+    history_dict = History.history
+    print(history_dict.keys())
+    metric_list = history_dict['mae']
+
+    x = range(1, len(metric_list) + 1)
+
+    plt.figure(i)
+    plt.plot(x, metric_list)
+    plt.title('Training_mae')
+    plt.xlabel('Epoches')
+    plt.ylabel('mean abs error')
+    plt.legend()
+    plt.show()
+
+print('ave_r2: ', ave_r2/6)
--- a/regressors/pycache/linear_study.cpython-38.pyc
+++ b/regressors/pycache/linear_study.cpython-38.pyc
--- a/regressors/pycache/linear_sum_regressor.cpython-38.pyc
+++ b/regressors/pycache/linear_sum_regressor.cpython-38.pyc
--- a/regressors/pycache/nn_use.cpython-38.pyc
+++ b/regressors/pycache/nn_use.cpython-38.pyc
--- a/regressors/pycache/normal_use.cpython-38.pyc
+++ b/regressors/pycache/normal_use.cpython-38.pyc
--- a/regressors/pycache/one_nn.cpython-38.pyc
+++ b/regressors/pycache/one_nn.cpython-38.pyc
--- a/regressors/pycache/one_regressor.cpython-38.pyc
+++ b/regressors/pycache/one_regressor.cpython-38.pyc
--- a/regressors/linear_study.py
+++ b/regressors/linear_study.py
@ -1 +1,113 @@
-# wait for tommorow!
+from scipy.sparse.construct import random
+from normal_use import *
+
+
+sumRegressors = [LGBMRegressor, RandomForestRegressor, XGBRegressor, CatBoostRegressor]
+sumRegressor = Union[type(sumRegressors)]
+sumParams = [{},{},{},{"silent": True}]
+weight = [0.1, 0.2, 0.3, 0.4]
+Sums = {}
+train_test_data = None
+out_weights = []
+
+
+oof_train = {}
+oof_test = {}
+# Some changes
+# LinearRegression, Ridge, XGBRegressor, CatBoostRegressor, LGBMRegressor
+# deepforest.CascadeForestRegressor
+REG_TOTAL = Ridge
+
+
+def get_random_small_train(X, y, Percentage = 0.8, seed = 0):
+    # return X_train & y_train
+    data_keys = ['X_train', 'X_test', 'y_train', 'y_test']
+    data_vals = train_test_split(X, y, random_state=seed,test_size=(1-Percentage))
+    train_test_data = dict(zip(data_keys, data_vals))
+    return train_test_data['X_train'], train_test_data['y_train']
+
+
+def train_one_regressor(id, regType: sumRegressor, use_RFsample = False, seed = 0):
+    full_X, full_ys = train_test_data['X_train'], train_test_data['y_train']
+    tX, tys = train_test_data['X_test'], train_test_data['y_test']
+    X, ys = full_X, full_ys
+    if use_RFsample:
+        X, ys = get_random_small_train(X, ys, seed=seed)
+        # which xxx_moon?
+        # make_moons(n_samples=100, shuffle=True, noise=None, random_state=None)
+        # pass
+    check_X_y(X, ys, multi_output=True)
+    models = {}
+    evals = []
+    for target_col in ys.columns:
+        y = ys[target_col]
+        reg = regType(**sumParams[id])
+        reg.fit(X, y)
+        models[target_col] = reg
+        # test in full train_test
+        y_hat = reg.predict(full_X)
+        oof_train[target_col].append(y_hat.reshape(-1, 1))
+        # predict test
+        ty_hat = reg.predict(tX)
+        oof_test[target_col].append(ty_hat.reshape(-1, 1))
+        ty = tys[target_col]
+        # one evals
+        rmse = metrics.mean_squared_error(ty, ty_hat, squared=False)
+        r2 = metrics.r2_score(ty, ty_hat)
+        eval_dict = {'Error': target_col, 'RMSE': rmse, 'R^2': r2}
+        evals.append(eval_dict)
+    print(regType.__name__)
+    print(pd.DataFrame(evals))
+    print("Average R2:  ", average_R2(evals))
+
+    joblib.dump(models, f"linear/{regType.__name__}_study_{id}.model")
+
+def train_linear_sumer():
+    ys = train_test_data['y_train']
+    tys = train_test_data['y_test'] # real
+    evals = []
+    for target_col in oof_train:
+        X = np.hstack(oof_train[target_col])
+        tX = np.hstack(oof_test[target_col])
+        print(ys.shape,X.shape,tys.shape,tX.shape)
+        y = ys[target_col]
+        ty = tys[target_col]
+        clf = REG_TOTAL()
+        clf.fit(X, y)
+        ty_hat = clf.predict(tX) # fake
+        rmse = metrics.mean_squared_error(ty, ty_hat, squared=False)
+        r2 = metrics.r2_score(ty, ty_hat)
+        eval_dict = {'Error': target_col, 'RMSE': rmse, 'R^2': r2}
+        evals.append(eval_dict)
+
+    print("linear *study* for {} regressors!".format(len(sumRegressors)))
+    print(pd.DataFrame(evals))
+    print("Average R2:  ", average_R2(evals))
+
+def study_linear(trainset):
+    """
+    Description
+    -----------
+    create a linear combination, weight and regressors here to change
+    
+    Parameters
+    ----------
+    trainset : dict
+        train_data_set
+    
+    Returns
+    -------
+    print result on screen
+    """
+    global train_test_data
+    train_test_data = trainset
+
+    for target_col in train_test_data['y_train'].columns:
+        oof_train[target_col] = []
+        oof_test[target_col] = []
+
+    for i, reg in enumerate(sumRegressors):
+        train_one_regressor(i, reg, use_RFsample=True, seed=1024)
+    
+    train_linear_sumer()
+
--- a/regressors/linear_sum_regressor.py
+++ b/regressors/linear_sum_regressor.py
@ -20,7 +20,7 @@ def train_linear(id, regType: sumRegressor, use_RFsample = False):
    X, ys = train_test_data['X_train'], train_test_data['y_train']
    if use_RFsample:
        # X, ys = get_random_small_train(X, ys)
-        # which xxx_moon?
+        # make_moons(n_samples=100, shuffle=True, noise=None, random_state=None)
        pass
    check_X_y(X, ys, multi_output=True)
    models = {}
--- a/regressors/nn_use.py
+++ b/regressors/nn_use.py
@ -0,0 +1,203 @@
+# FNN
+
+import torch
+import torch.nn as nn
+import torchvision.transforms as transforms
+import torchvision.datasets as dsets
+import pandas as pd
+from normal_use import *
+
+device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+
+class FNN_Net(nn.Module):
+
+    def __init__(self):
+        super(FNN_Net, self).__init__()
+        self.features = 0
+        self.linear_relu1 = nn.Linear(self.features, 128)
+        self.linear_relu2 = nn.Linear(128, 256)
+        self.linear_relu3 = nn.Linear(256, 256)
+        self.linear_relu4 = nn.Linear(256, 256)
+        self.linear5 = nn.Linear(256, 1)
+
+    def forward(self, x):
+        y_pred = self.linear_relu1(x)
+        y_pred = nn.functional.relu(y_pred)
+
+        y_pred = self.linear_relu2(y_pred)
+        y_pred = nn.functional.relu(y_pred)
+
+        y_pred = self.linear_relu3(y_pred)
+        y_pred = nn.functional.relu(y_pred)
+
+        y_pred = self.linear_relu4(y_pred)
+        y_pred = nn.functional.relu(y_pred)
+
+        y_pred = self.linear5(y_pred)
+        return y_pred
+
+    def fit(self, X, y_all):
+        """
+        Description
+        -----------
+        fit
+
+        Parameters
+        ----------
+        X - train data X
+        y_all - train data ys
+        
+        """
+
+        y = y_all
+        # 训练数据集特征
+        train_features = torch.tensor(X.values, dtype=torch.float)
+        train_features = train_features.to(device)
+        # 训练数据集目标
+        train_labels = torch.tensor(y.values, dtype=torch.float).view(-1, 1)
+        train_labels = train_labels.to(device)
+
+        self.features=train_features.shape[1]
+        self.linear_relu1 = nn.Linear(self.features, 128)
+        self = self.to(device)
+        self.train()
+
+        criterion = nn.MSELoss(reduction='mean')
+        criterion = criterion.to(device)
+        optimizer = torch.optim.Adam(self.parameters(), lr=1e-4)
+
+        losses = []
+        epoch = 5000
+        epoch_real = 0
+        for t in range(epoch):
+            y_pred = self(train_features)
+            y_pred.to(device)
+
+            loss = criterion(y_pred, train_labels)
+            losses.append(loss.item())
+
+            if torch.isnan(loss):
+                break
+
+            optimizer.zero_grad()
+
+            loss.backward()
+
+            optimizer.step()
+            epoch_real = epoch_real + 1
+
+        print(f"epoch:{epoch_real} loss:{losses[-1]}")
+
+
+    def predict(self, X):
+        self.to(device)
+        test_features = torch.tensor(X.values, dtype=torch.float)
+        test_features = test_features.to(device)
+        pred_labels = self(test_features)
+        pred_labels = pred_labels.cpu().data.numpy()
+        return pred_labels
+
+
+# # DNN equal next NN
+# import itertools
+# from sklearn.model_selection import train_test_split
+# import pandas as pd
+# import numpy as np
+# from keras import layers, models
+
+# # 模型定义
+
+# class DNN_Net():
+#     def build_model(self):
+#         network = models.Sequential()
+#         network.add(layers.Dense(64, activation='relu', input_shape=(13, )))
+#         network.add(layers.Dense(64, activation='relu'))
+#         network.add(layers.Dense(1))  # 最后输出预测值，恒等函数
+#         #损失函数用mes(均方误差), 监控指标为mae(平均绝对误差, 返回误差绝对值)
+#         network.compile(optimizer='rmsprop', loss='mse', metrics=['mae'])
+#         return network
+
+#     def __init__(self):
+#         self.network = self.build_model()
+
+    
+#     def fit(self, X, ys):
+#         self.network.summary()
+#         self.network.fit(X, ys, epochs=1, batch_size=1)
+
+#     def predict(self, X):
+#         res = self.network.predict(X)
+#         print(res)
+#         return res
+
+
+# NN
+
+import torch
+import torch.nn as nn
+from torch.utils.data import TensorDataset, DataLoader
+from sklearn import metrics
+
+class NN_Net(nn.Module):
+    def __init__(self):
+        super(NN_Net, self).__init__()
+        self.linear_relu1 = nn.Linear(13, 64)
+        self.relu6 = nn.ReLU6()
+        # self.relu6 = nn.ReLU()
+        self.linear_relu2 = nn.Linear(64, 64)
+        self.leaky = nn.LeakyReLU()
+        # self.leaky = nn.ReLU()
+        self.linear3 = nn.Linear(64, 1)
+
+    def forward(self, x):
+        y_pred = self.linear_relu1(x)
+        y_pred = self.relu6(y_pred)
+
+        y_pred = self.linear_relu2(y_pred)
+        y_pred = self.leaky(y_pred)
+
+        y_pred = self.linear3(y_pred)
+        return y_pred
+
+    def fit(self, X, y_all):
+        """
+        Description
+        -----------
+        fit
+
+        Parameters
+        ----------
+        X - train data X
+        y_all - train data ys
+        
+        """
+        # 训练数据集特征
+        train_features = torch.tensor(X.values, dtype=torch.float)
+        train_features = train_features.to(device)
+        # 训练数据集目标
+        train_labels = torch.tensor(y_all.values, dtype=torch.float).view(-1, 1)
+        train_labels = train_labels.to(device)
+
+        self = self.to(device)
+        # self.train()
+
+        opti = torch.optim.SGD(self.parameters(), lr=0.02)
+        loss_func = nn.MSELoss()
+        train_dataloader = DataLoader(TensorDataset(train_features, train_labels), batch_size=320)
+        for t in range(1000):
+            for batch, (x, y) in enumerate(train_dataloader):
+                pred = self(x)
+                loss = loss_func(pred, y)
+                opti.zero_grad()
+                torch.sqrt(loss).backward()
+                opti.step()
+            print(t)
+
+
+    def predict(self, X):
+        self.to(device)
+        test_features = torch.tensor(X.values, dtype=torch.float)
+        test_features = test_features.to(device)
+        pred_labels = self(test_features)
+        pred_labels = pred_labels.cpu().data.numpy()
+        return pred_labels
--- a/regressors/normal_use.py
+++ b/regressors/normal_use.py
@ -13,8 +13,11 @@ import pandas as pd
 from typing import Union
 from sklearn import metrics
 from sklearn.model_selection import train_test_split
+from sklearn.datasets import make_moons
 from sklearn.utils.validation import check_X_y
 import joblib
+from sklearn.metrics import r2_score
+

 def average_R2(evals):
    sum = 0
--- a/regressors/one_nn.py
+++ b/regressors/one_nn.py
@ -0,0 +1,68 @@
+import keras
+from keras.engine.data_adapter import KerasSequenceAdapter
+from normal_use import *
+from nn_use import FNN_Net, NN_Net
+
+
+Regressors = [NN_Net]
+# Params = ['','']
+Regressor = Union[type(Regressors)]
+train_test_data = None
+
+
+def train_model(id, regType: Regressor):
+    X, ys = train_test_data['X_train'], train_test_data['y_train']
+    check_X_y(X, ys, multi_output=True)
+    models = {}
+    for target_col in ys.columns:
+        y = ys[target_col]
+        reg = regType()
+        reg.fit(X, y)
+        models[target_col] = reg
+        print(regType.__name__, target_col)
+    joblib.dump(models, f"nn_models/{regType.__name__}.model")
+    # keras.models.save_model(models, f"nn_models/{regType.__name__}.model")
+
+
+def eval_model(regType: Regressor):
+    models = joblib.load(f"nn_models/{regType.__name__}.model")
+    X, ys = train_test_data['X_test'], train_test_data['y_test']
+    evals = []
+    for target_col, reg in models.items():
+        y_hat = reg.predict(X)  # fake
+        y = ys[target_col]  # real
+        rmse = metrics.mean_squared_error(y, y_hat, squared=False)
+        r2 = metrics.r2_score(y, y_hat)
+        eval_dict = {'Error': target_col, 'RMSE': rmse, 'R^2': r2}
+        evals.append(eval_dict)
+    print(regType.__name__)
+    print(pd.DataFrame(evals))
+    print("Average R2:  ", average_R2(evals))
+
+
+def train_one_models(trainsets):
+    """
+    Description
+    -----------
+    call this to start trainning each regressors.
+
+    Parameters
+    ----------
+    trainset : dict
+        use joblib to extract target dataset(create_datas) and put it in here.
+    
+    Returns
+    -------
+    NO returns, but models in folder "models" and print R2 on screen 
+
+    """
+    global train_test_data
+    train_test_data = trainsets
+    
+    for i, reg in enumerate(Regressors):
+        train_model(i, reg)
+
+    for reg in Regressors:
+        eval_model(reg)
+
+    
--- a/test_full.py
+++ b/test_full.py
@ -7,11 +7,17 @@ import create_traindata
 import one_regressor
 import stacking_regressor
 import linear_sum_regressor
+import linear_study
+import one_nn

 # %%
 # create train data
 seed = 514
 create_traindata.create_train_data(seed=seed)
+
+# %%
+# load data
+seed = 514
 train_test_data = joblib.load(f"create_datas/seed_{seed}.td")

 # %%
@ -24,4 +30,14 @@ stacking_regressor.stacking_train(train_test_data)

 # %%
 # test linear combination
-linear_sum_regressor.only_linear(train_test_data)
+linear_sum_regressor.only_linear(train_test_data)
+
+# %%
+# test linear study method
+linear_study.study_linear(train_test_data)
+
+# %%
+# seed = 514
+# train_test_data = joblib.load(f"create_datas/seed_{seed}.td")
+one_nn.train_one_models(train_test_data)
+# %%