In [15]:
import numpy as np
import pandas as pd
import torch
import torchvision
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
from torch.utils import data
import numpy as np
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader
from torch.utils.data import random_split
from torch.optim import lr_scheduler
from torch.optim import lr_scheduler
import time
import matplotlib.pyplot as plt
from tqdm import tqdm

def normalization(x: list):
    M, m = np.max(x), np.min(x)
    for i in range(len(x)):
        x[i] = (x[i] - (M + m) / 2) / ((M - m) / 2)
    # x in [-1, 1]
    return M, m, x

def ArrNorm(x: np.ndarray):
    assert isinstance(x, np.ndarray), "We need a list"
    M_list, m_list, res = [], [], []
    for i in range(x.shape[0]):
        u = x[i].tolist()
        M, m, t = normalization(u)
        res.append(t)
        M_list.append(M)
        m_list.append(m)
    return M_list, m_list, np.array(res)


def df2arr(x) -> np.ndarray:
    return np.array(x, dtype=np.float32)


In [16]:
excel = pd.read_excel('./data/A32.xlsx', header=None)
excel.shape

(4459, 18)

In [17]:
sp = [1486, 2972, 4458]
station_1 = excel.iloc[1:sp[0]+1,1:6]
station_2 = excel.iloc[sp[0]+1:sp[1]+1,1:6]
standard = excel.iloc[sp[1]+1:sp[2]+1,1:6]
standard

Unnamed: 0,1,2,3,4,5
2973,16,33,15,1.5,74
2974,16,33,15,1.7,74
2975,16,38,16,1.8,76
2976,18,41,21,1.9,79
2977,17,41,23,1.7,78
...,...,...,...,...,...
4454,16,23,49,21,99
4455,18,26,55,20.7,99
4456,19,28,52,20.8,99
4457,22,31,41,21,98


In [18]:
station_1 = df2arr(station_1)
station_2 = df2arr(station_2)
standard = df2arr(standard)
station_1.shape, station_2.shape, standard.shape

((1486, 5), (1486, 5), (1486, 5))

In [19]:
s1_minus_sd = station_1 - standard
s2_minus_sd = station_2 - standard
s1_div_sd = station_1 / standard
s2_dic_sd = station_2 / standard

  s1_div_sd = station_1 / standard
  s2_dic_sd = station_2 / standard


In [20]:
s1_minus_sd.shape, s2_minus_sd.shape

((1486, 5), (1486, 5))

In [21]:
s1_minus_sd_M, s1_minus_sd_m, s1_minus_sd = ArrNorm(s1_minus_sd)

In [22]:
def GetDataset(input_arr: list, output_arr: list, seq: int):
    assert(len(input_arr)==len(output_arr)), "Different size of input and output!"
    Input = []
    Output = []
    for i in range(input_arr.shape[0]-seq):
        Input.append(input_arr[i:i+seq][:])
        Output.append(output_arr[i:i+seq][:])
    return torch.tensor(Input, dtype=torch.float32), torch.tensor(Output, dtype=torch.float32)

        
def load_array(data_arrays, batch_size, is_train=True):
    # data-iter
    dataset = data.TensorDataset(*data_arrays)
    return data.DataLoader(dataset, batch_size, shuffle=is_train)

s1_minus_sd.shape

(1486, 5)

In [23]:
Input_Data_1, Output_Data_1 = GetDataset(s1_minus_sd, standard, 15)
Input_Data_1.shape


torch.Size([1471, 15, 5])

In [24]:
Input_Data_1 = Input_Data_1.unsqueeze(1)
Output_Data_1 = Output_Data_1.unsqueeze(1)
Input_Data_1.shape, Output_Data_1.shape

(torch.Size([1471, 1, 15, 5]), torch.Size([1471, 1, 15, 5]))

In [39]:
# one train demo

class Try(nn.Module):
    def __init__(self, seq, batch_size, scale=0):
        super(Try, self).__init__()
        self.scale = scale
        self.seq = seq
        self.batch_size = batch_size
        self.linear = nn.Sequential(
            nn.Linear((self.seq+1)*6*60, (self.seq+1)*60),
            nn.Dropout(0.5),
            nn.Sigmoid(),

            nn.Linear((self.seq+1)*60, (self.seq+1)*6),
            nn.Dropout(0.5),
            nn.ReLU(inplace=True),

            nn.Linear((self.seq+1)*6, (self.seq+1)*6),
            nn.Dropout(0.5),
            nn.ReLU(inplace=True)
        )
        self.conv1 = nn.Sequential(
            # seq * 5 
            nn.Conv2d(1, 30, kernel_size=(3,3), padding=2, bias=False), # (seq+2) * 7
            nn.BatchNorm2d(30),
            nn.ReLU(inplace=True),

            nn.Conv2d(30, 60, kernel_size=(3,3), padding=1, bias=False), # (seq+2) * 7
            nn.BatchNorm2d(60),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=(2,2), stride=1), # (seq+1) * 6
            
        )
        
        self.conv2 = nn.Sequential(
            nn.Conv2d(1, 30, kernel_size=(2,2), padding=0, bias=False), # seq * 5
            nn.BatchNorm2d(30),
            nn.ReLU(inplace=True),

            nn.Conv2d(30, 1, kernel_size=(1,1), padding=0, bias=True), # seq * 5
            nn.ReLU(inplace=True)

        )

    def forward(self, x):
        out = self.conv1(x)
        out = out.view(out.size()[0], -1)
        out = self.linear(out)
        # print(out.shape)
        with torch.no_grad():
            out = out.reshape(self.batch_size, 1, self.seq+1, 6)
        out = self.conv2(out)      
        return out


In [26]:
from torch.utils.data import random_split

global cr, seed
cr = 0.85
seed = 2

data_tot = torch.utils.data.TensorDataset(Input_Data_1, Output_Data_1)
train_size = int(Input_Data_1.shape[0] * cr)
test_size = Input_Data_1.shape[0] - train_size
train_set , test_set = random_split(data_tot,[train_size,test_size],
                                   torch.Generator().manual_seed(0))
# DataIter = load_array((Input_Data_1, Output_Data_1), batch_size=8)


In [44]:
def R_square(A: torch.tensor, B: torch.tensor) -> torch.float32:
    assert A.shape == B.shape, "Predict value not match the Ground Truth"
    # A: predict   B: ground truth
    # shape: batch_size * 1 * w * h
    A = A.detach()
    B = B.detach()
    A = A.squeeze()
    B = B.squeeze()
    # batch_size * w * h
    *_, h = A.shape
    pre_bar = torch.mean(A, dim=[0,1], keepdim=False)
    gt_bar = torch.mean(B, dim=[0,1], keepdim=False)
    # print(pre_bar.shape[0])

    def sq_sum(x):
        # print(x.shape)
        x = torch.tensor(x, dtype=torch.float32)
        return torch.sum(x * x, dim=[0,1])
    # print(A[:,:,1].shape, pre_bar[1].shape)
    SST = [sq_sum(A[:,:,i] - pre_bar[i]) for i in range(h)]
    SSR = [sq_sum(B[:,:,i] - gt_bar[i]) for i in range(h)]


    return [ (SST[i] / SSR[i]) for i in range(h) ]

"""
R-squared = SSR / SST = 1 - SSE / SST
"""
A = torch.arange(48.*2).reshape(2,1,4,12)   # test
R_square(A, A)

  x = torch.tensor(x, dtype=torch.float32)


[tensor(1.),
 tensor(1.),
 tensor(1.),
 tensor(1.),
 tensor(1.),
 tensor(1.),
 tensor(1.),
 tensor(1.),
 tensor(1.),
 tensor(1.),
 tensor(1.),
 tensor(1.)]

In [41]:
x_plt, train_loss_plt = [], []
global lr, num_epoch, batch_size
lr, num_epoch, batch_size = 0.001, 1000, 10
Data_Iter = data.DataLoader(dataset=train_set, batch_size=batch_size, shuffle=True)

net = Try(batch_size=batch_size, seq=15)
Loss = torch.nn.MSELoss()
optimizer = torch.optim.Adam(net.parameters(), lr)
scheduler = lr_scheduler.ExponentialLR(optimizer, gamma=0.9)
loss_list = []


print("Start Training...")
for epoch in range(num_epoch):
    epoch_start_time = time.time()
    train_loss = 0.0
    net.train()
    for i, use in enumerate(tqdm(Data_Iter)):
        optimizer.zero_grad()
        # if use[0].shape[0]==2:
        #     print(use[0])
        train_pred = net(use[0])    # use[0].cuda()

        batch_loss = Loss(train_pred, use[1])   # use[1].cuda()
        batch_loss.backward()
        optimizer.step()
        R2 = R_square(train_pred, use[1])

        train_loss += batch_loss.item()

    train_loss = train_loss / train_size
    x_plt.append(epoch+1)
    train_loss_plt.append(train_loss)
    print("[%2d|%2d] %.2f(s) Train_Loss=%.6f (%%)"%\
            (epoch+1,num_epoch,time.time()-epoch_start_time,train_loss),end='')
    print("R-squared: ",end='')
    R2 = torch.Tensor(R2).tolist()
    print(R2)
    scheduler.step()

plt.figure(1)
# plt.plot(x_plt,train_acc_plt,'ob',label='all_train_acc')
# plt.figure(2)
plt.plot(x_plt,train_loss_plt,'rs-',label='all_train_loss')
plt.show()

torch.save(net.state_dict(),'./model_cnn.pth')
print("Parameters Saved.")  



Start Training...


  x = torch.tensor(x, dtype=torch.float32)
100%|██████████| 125/125 [00:01<00:00, 83.70it/s]


[ 1|1000] 1.51(s) Train_Loss=225.370374 (%)R-squared: [0.03175225108861923, 0.008131429553031921, 0.007069754414260387, 0.4068453013896942, 0.03185199573636055]


100%|██████████| 125/125 [00:01<00:00, 99.34it/s]


[ 2|1000] 1.26(s) Train_Loss=207.298831 (%)R-squared: [0.10405036807060242, 0.031928371638059616, 0.016146285459399223, 1.894513726234436, 0.06661615520715714]


100%|██████████| 125/125 [00:01<00:00, 98.27it/s]


[ 3|1000] 1.28(s) Train_Loss=185.708618 (%)R-squared: [0.20180198550224304, 0.05431358888745308, 0.02498510479927063, 1.302838921546936, 0.09180133789777756]


100%|██████████| 125/125 [00:01<00:00, 99.79it/s] 


[ 4|1000] 1.26(s) Train_Loss=162.685455 (%)R-squared: [0.15350385010242462, 0.05046546831727028, 0.0912424623966217, 4.729371070861816, 0.20897717773914337]


100%|██████████| 125/125 [00:01<00:00, 114.37it/s]


[ 5|1000] 1.10(s) Train_Loss=142.098632 (%)R-squared: [0.27406004071235657, 0.07554352283477783, 0.07886458188295364, 0.4753417372703552, 0.14170411229133606]


 91%|█████████ | 114/125 [00:00<00:00, 118.56it/s]


KeyboardInterrupt: 