In [None]:
import numpy as np
import pandas as pd
import torch
import torchvision
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
from torch.utils import data
import numpy as np
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader
from torch.utils.data import random_split
from torch.optim import lr_scheduler
from torch.optim import lr_scheduler
import time
import matplotlib.pyplot as plt
from tqdm import tqdm

def normalization(x: list):
    M, m = np.max(x), np.min(x)
    for i in range(len(x)):
        x[i] = (x[i] - (M + m) / 2) / ((M - m) / 2)
    # x in [-1, 1]
    return M, m, x

def ArrNorm(x: np.ndarray):
    assert isinstance(x, np.ndarray), "We need a list"
    M_list, m_list, res = [], [], []
    for i in range(x.shape[0]):
        u = x[i].tolist()
        M, m, t = normalization(u)
        res.append(t)
        M_list.append(M)
        m_list.append(m)
    return M_list, m_list, np.array(res)


def df2arr(x) -> np.ndarray:
    return np.array(x, dtype=np.float32)


In [None]:
excel = pd.read_excel('./data/A32.xlsx', header=None)
excel.shape

In [None]:
sp = [1486, 2972, 4458]
station_1 = excel.iloc[1:sp[0]+1,1:6]
station_2 = excel.iloc[sp[0]+1:sp[1]+1,1:6]
standard = excel.iloc[sp[1]+1:sp[2]+1,1:6]
standard

In [None]:
station_1 = df2arr(station_1)
station_2 = df2arr(station_2)
standard = df2arr(standard)
station_1.shape, station_2.shape, standard.shape

In [None]:
s1_minus_sd = station_1 - standard
s2_minus_sd = station_2 - standard
s1_div_sd = station_1 / standard
s2_dic_sd = station_2 / standard

In [None]:
s1_minus_sd.shape, s2_minus_sd.shape

In [None]:
s1_minus_sd_M, s1_minus_sd_m, s1_minus_sd = ArrNorm(s1_minus_sd)

In [None]:
def GetDataset(input_arr: list, output_arr: list, seq: int):
    assert(len(input_arr)==len(output_arr)), "Different size of input and output!"
    Input = []
    Output = []
    for i in range(input_arr.shape[0]-seq):
        Input.append(input_arr[i:i+seq][:])
        Output.append(output_arr[i:i+seq][:])
    return torch.tensor(Input, dtype=torch.float32), torch.tensor(Output, dtype=torch.float32)

        
def load_array(data_arrays, batch_size, is_train=True):
    # data-iter
    dataset = data.TensorDataset(*data_arrays)
    return data.DataLoader(dataset, batch_size, shuffle=is_train)

s1_minus_sd.shape

In [None]:
Input_Data_1, Output_Data_1 = GetDataset(s1_minus_sd, standard, 15)
Input_Data_1.shape


In [None]:
# one train demo

class Try(nn.Module):
    def __init__(self, seq, batch_size, scale=0):
        super(Try, self).__init__()
        self.scale = scale
        self.seq = seq
        self.into = batch_size
        self.linear = nn.Sequential(
            nn.Linear((self.seq+1)*6, (self.seq+1)*12),
            nn.Dropout(0.5),
            nn.ReLU(inplace=True),

            nn.Linear((self.seq+1)*12, 200),
            nn.Dropout(0.5),
            nn.ReLU(inplace=True),

            nn.Linear(200, (self.seq+1)*3),
            nn.Dropout(0.5),
            nn.Linear((self.seq+1)*3, 200, (self.seq+1)*6),
            nn.ReLU(inplace=True)
        )
        self.conv = nn.Sequential(
            # seq * 5 
            nn.Conv2d(self.into, 60, kernel_size=(3,3), padding=2, bias=False), # (seq+2) * 7
            nn.BatchNorm2d(60),
            nn.ReLU(inplace=True),

            nn.Conv2d(60, 120, kernel_size=(3,3), padding=1, bias=False), # (seq+2) * 7
            nn.BatchNorm2d(120),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=(2,2)), # (seq+1) * 6

            nn.Conv2d(120, 60, kernel_size=(1,1), padding=0, bias=True), # (seq+1) * 6
            nn.ReLU(inplace=True)
        )

    def forward(self, x):
        out = self.conv(x)
        out = out.view(out.size()[0], -1)
        out = self.linear(out)
        return out


In [None]:
from torch.utils.data import random_split

global cr, seed
cr = 0.85
seed = 2

data_tot = torch.utils.data.TensorDataset(Input_Data_1, Output_Data_1)
train_size = int(Input_Data_1.shape[0] * cr)
test_size = Input_Data_1.shape[0] - train_size
train_set , test_set = random_split(data_tot,[train_size,test_size],
                                   torch.Generator().manual_seed(0))




# DataIter = load_array((Input_Data_1, Output_Data_1), batch_size=8)

In [None]:
def R_square(A: torch.tensor, B: torch.tensor) -> torch.float32:
    assert A.shape == B.shape, "Predict value not match the Ground Truth"
    # A: predict   B: ground truth
    A = A.detach()
    B = B.detach()
    _, b = A.shape
    pre_bar = torch.mean(A, dim=0, keepdim=False)
    gt_bar = torch.mean(B, dim=0, keepdim=False)
    assert pre_bar.shape[0] == A.shape[1], "Error"
    assert gt_bar.shape[0] == B.shape[1], "Error"
    def sq_sum(x):
        x = torch.tensor(x, dtype=torch.float32)
        return torch.sum(x * x, dim=0)
    # print(A[:, 3])
    # print([A[:, i] - pre_bar[i] for i in range(b)])
    SST = [sq_sum(A[:, i] - pre_bar[i]) for i in range(b)]
    SSR = [sq_sum(B[:, i] - gt_bar[i]) for i in range(b)]


    return [ (SST[i] / SSR[i]) for i in range(b) ]

"""
R-squared = SSR / SST = 1 - SSE / SST
"""
A = torch.arange(12.).reshape(3,4)   # test
R_square(A, A)

In [None]:
x_plt, train_loss_plt = [], []
global lr, num_epoch, batch_size
lr, num_epoch, batch_size = 0.001, 1000, 8
Data_Iter = data.DataLoader(train_set, batch_size, shuffle=True)

net = Try(batch_size=batch_size, seq=8)
Loss = torch.nn.MSELoss()
optimizer = torch.optim.Adam(net.parameters(), lr)
scheduler = lr_scheduler.ExponentialLR(optimizer, gamma=0.9)
loss_list = []


print("Start Training...")
for epoch in range(num_epoch):
    epoch_start_time = time.time()
    train_loss = 0.0
    R2 = .0
    net.train()
    for i, use in enumerate((Data_Iter)):
        optimizer.zero_grad()
        print(use[0].shape)
        train_pred = net(use[0])    # use[0].cuda()

        batch_loss = Loss(train_pred, use[1])   # use[1].cuda()
        batch_loss.backward()
        optimizer.step()
        R2 = R_square(train_pred, use[1])

        train_loss += batch_loss.item()

    train_loss = train_loss / train_size
    x_plt.append(epoch+2)
    train_loss_plt.append(train_loss)
    print("[%2d|%2d] %.2f(s) R-squared=%.6f Train_Loss=%.6f (%%)"%\
            (epoch+1,num_epoch,time.time()-epoch_start_time,R2,train_loss))
    scheduler.step()

plt.figure(1)
# plt.plot(x_plt,train_acc_plt,'ob',label='all_train_acc')
# plt.figure(2)
plt.plot(x_plt,train_loss_plt,'rs-',label='all_train_loss')
plt.show()

torch.save(net.state_dict(),'./model_cnn'+str(num)+'.pth')
print("Parameters Saved.")  

