In [1]:
import os
from time import time
import numpy as np
import matplotlib.pyplot as plt
import networkx as nx



import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from scipy.sparse.linalg import eigs


USE_CUDA = torch.cuda.is_available()
DEVICE = torch.device('cuda:0')
print("CUDA:", USE_CUDA, DEVICE)

from tensorboardX import SummaryWriter
sw = SummaryWriter(logdir='./Student_MSE', flush_secs=5)

import math
from typing import Optional, List, Union

import torch
import torch.nn as nn
from torch.nn import Parameter
import torch.nn.functional as F

from torch_geometric.data import Data
from torch_geometric.typing import OptTensor
from torch_geometric.nn.conv import MessagePassing
from torch_geometric.transforms import LaplacianLambdaMax
from torch_geometric.utils import remove_self_loops, add_self_loops, get_laplacian
from torch_geometric.utils import to_dense_adj
from torch_scatter import scatter_add

CUDA: True cuda:0


In [2]:
def load_graphdata_channel1(graph_signal_matrix_filename, num_of_hours, num_of_days, num_of_weeks, batch_size,
                            shuffle=True, DEVICE = torch.device('cuda:0')):
    '''
    :param graph_signal_matrix_filename: str
    :param num_of_hours: int
    :param num_of_days: int
    :param num_of_weeks: int
    :param DEVICE:
    :param batch_size: int
    :return:
    three DataLoaders, each dataloader contains:
    test_x_tensor: (B, N_nodes, in_feature, T_input)
    test_decoder_input_tensor: (B, N_nodes, T_output)
    test_target_tensor: (B, N_nodes, T_output)
    '''

    file = os.path.basename(graph_signal_matrix_filename).split('.')[0]
    filename = os.path.join('./data/PEMS04/', file + '_r' + str(num_of_hours) + '_d' + str(num_of_days) + '_w' + str(num_of_weeks)) +'_astcgn'
    print('load file:', filename)

    file_data = np.load(filename + '.npz')
    train_x = file_data['train_x']  # (10181, 307, 3, 12)
    train_x = train_x[:, :, 0:1, :]
    train_target = file_data['train_target']  # (10181, 307, 12)

    val_x = file_data['val_x']
    val_x = val_x[:, :, 0:1, :]
    val_target = file_data['val_target']

    test_x = file_data['test_x']
    test_x = test_x[:, :, 0:1, :]
    test_target = file_data['test_target']

    mean = file_data['mean'][:, :, 0:1, :]  # (1, 1, 3, 1)
    std = file_data['std'][:, :, 0:1, :]  # (1, 1, 3, 1)

    # ------- train_loader -------
    train_x_tensor = torch.from_numpy(train_x).type(torch.FloatTensor).to(DEVICE)  # (B, N, F, T)
    train_target_tensor = torch.from_numpy(train_target).type(torch.FloatTensor).to(DEVICE)  # (B, N, T)
    train_dataset = torch.utils.data.TensorDataset(train_x_tensor, train_target_tensor)
    train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=shuffle)

    # ------- val_loader -------
    val_x_tensor = torch.from_numpy(val_x).type(torch.FloatTensor).to(DEVICE)  # (B, N, F, T)
    val_target_tensor = torch.from_numpy(val_target).type(torch.FloatTensor).to(DEVICE)  # (B, N, T)
    val_dataset = torch.utils.data.TensorDataset(val_x_tensor, val_target_tensor)
    val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

    # ------- test_loader -------
    test_x_tensor = torch.from_numpy(test_x).type(torch.FloatTensor).to(DEVICE)  # (B, N, F, T)
    test_target_tensor = torch.from_numpy(test_target).type(torch.FloatTensor).to(DEVICE)  # (B, N, T)
    test_dataset = torch.utils.data.TensorDataset(test_x_tensor, test_target_tensor)
    test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

    # print
    print('train:', train_x_tensor.size(), train_target_tensor.size())
    print('val:', val_x_tensor.size(), val_target_tensor.size())
    print('test:', test_x_tensor.size(), test_target_tensor.size())

    return train_loader, train_target_tensor, val_loader, val_target_tensor, test_loader, test_target_tensor, mean, std


In [3]:
graph_signal_matrix_filename = './data/PEMS04/pems04.npz'
batch_size = 16
num_of_weeks = 1
num_of_days = 1
num_of_hours = 1

train_loader, train_target_tensor, val_loader, val_target_tensor, test_loader, test_target_tensor, _mean, _std = load_graphdata_channel1(
    graph_signal_matrix_filename, num_of_hours, num_of_days, num_of_weeks, batch_size)

load file: ./data/PEMS04/pems04_r1_d1_w1_astcgn
train: torch.Size([13575, 307, 1, 12]) torch.Size([13575, 307, 12])
val: torch.Size([1697, 307, 1, 12]) torch.Size([1697, 307, 12])
test: torch.Size([1697, 307, 1, 12]) torch.Size([1697, 307, 12])


In [4]:
def get_adjacency_matrix(distance_df_filename, num_of_vertices, id_filename=None):
    '''
    Parameters
    ----------
    distance_df_filename: str, path of the csv file contains edges information
    num_of_vertices: int, the number of vertices
    Returns
    ----------
    A: np.ndarray, adjacency matrix
    '''
    if 'npy' in distance_df_filename:  # false
        adj_mx = np.load(distance_df_filename)
        return adj_mx, None
    else:
        
        #--------------------------------------------- read from here
        import csv
        A = np.zeros((int(num_of_vertices), int(num_of_vertices)),dtype=np.float32)
        distaneA = np.zeros((int(num_of_vertices), int(num_of_vertices)), dtype=np.float32)

        #------------ Ignore
        if id_filename: # false
            with open(id_filename, 'r') as f:
                id_dict = {int(i): idx for idx, i in enumerate(f.read().strip().split('\n'))}  # 把节点id（idx）映射成从0开始的索引

            with open(distance_df_filename, 'r') as f:
                f.readline()
                reader = csv.reader(f)
                for row in reader:
                    if len(row) != 3:
                        continue
                    i, j, distance = int(row[0]), int(row[1]), float(row[2])
                    A[id_dict[i], id_dict[j]] = 1
                    distaneA[id_dict[i], id_dict[j]] = distance
            return A, distaneA

        else:
         #-------------Continue reading
            with open(distance_df_filename, 'r') as f:
                f.readline()
                reader = csv.reader(f)
                for row in reader:
                    if len(row) != 3:
                        continue
                    i, j, distance = int(row[0]), int(row[1]), float(row[2])
                    A[i, j] = 1
                    distaneA[i, j] = distance
            return A, distaneA

In [5]:
id_filename = None
adj_filename = './data/PEMS04/PEMS04.csv'
num_of_vertices = 307
adj_mx, distance_mx = get_adjacency_matrix(adj_filename, num_of_vertices, id_filename) #  adj_mx and distance_mx (307, 307)
rows, cols = np.where(adj_mx == 1)
edges = zip(rows.tolist(), cols.tolist())
gr = nx.Graph()
gr.add_edges_from(edges)
# nx.draw(gr, node_size=3)
plt.show()
rows, cols = np.where(adj_mx == 1)
edges = zip(rows.tolist(), cols.tolist())
edge_index_data = torch.LongTensor(np.array([rows, cols])).to(DEVICE)

In [6]:
from model.ASTGCN import ASTGCN

In [7]:
nb_block = 2
in_channels = 1
K = 3
nb_chev_filter =64
nb_time_filter = 64
time_strides = num_of_hours
num_for_predict = 12
len_input = 12
dropout=0.01

#L_tilde = scaled_Laplacian(adj_mx)
#cheb_polynomials = [torch.from_numpy(i).type(torch.FloatTensor).to(DEVICE) for i in cheb_polynomial(L_tilde, K)]
# net = ASTGCN( nb_block, in_channels, K, nb_chev_filter, nb_time_filter, time_strides, num_for_predict, len_input, num_of_vertices).to(DEVICE)
Teacher=ASTGCN( nb_block, in_channels, K, nb_chev_filter, nb_time_filter, time_strides, num_for_predict, len_input, num_of_vertices,dropout).to(DEVICE)
print(Teacher)
print(time_strides)


ASTGCN(
  (_blocklist): ModuleList(
    (0): ASTGCNBlock(
      (_temporal_attention): TemporalAttention()
      (_spatial_attention): SpatialAttention()
      (_chebconv_attention): ChebConvAttention(1, 64, K=3, normalization=None)
      (_time_convolution): Conv2d(64, 64, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1))
      (_residual_convolution): Conv2d(1, 64, kernel_size=(1, 1), stride=(1, 1))
      (_layer_norm): LayerNorm((64,), eps=1e-05, elementwise_affine=True)
      (_dropout): Dropout2d(p=0.01, inplace=True)
    )
    (1): ASTGCNBlock(
      (_temporal_attention): TemporalAttention()
      (_spatial_attention): SpatialAttention()
      (_chebconv_attention): ChebConvAttention(64, 64, K=3, normalization=None)
      (_time_convolution): Conv2d(64, 64, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1))
      (_residual_convolution): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1))
      (_layer_norm): LayerNorm((64,), eps=1e-05, elementwise_affine=True)
      (_dropout): 

In [8]:
nb_chev_filterstdn = 64
nb_time_filterstdn = 64
nb_blockstdn = 1
dropouts=0.01
Student=ASTGCN( nb_blockstdn, in_channels, K, nb_chev_filterstdn, nb_time_filterstdn, time_strides, num_for_predict, len_input, num_of_vertices,dropouts).to(DEVICE)
print(Student)

ASTGCN(
  (_blocklist): ModuleList(
    (0): ASTGCNBlock(
      (_temporal_attention): TemporalAttention()
      (_spatial_attention): SpatialAttention()
      (_chebconv_attention): ChebConvAttention(1, 64, K=3, normalization=None)
      (_time_convolution): Conv2d(64, 64, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1))
      (_residual_convolution): Conv2d(1, 64, kernel_size=(1, 1), stride=(1, 1))
      (_layer_norm): LayerNorm((64,), eps=1e-05, elementwise_affine=True)
      (_dropout): Dropout2d(p=0.01, inplace=True)
    )
  )
  (_final_conv): Conv2d(12, 12, kernel_size=(1, 64), stride=(1, 1))
)


# intialized teacher model

In [9]:
#------------------------------------------------------
learning_rate = 0.001
# 
optimizerTeacher = optim.Adam(Teacher.parameters(), lr=1e-3,weight_decay=1e-6)#l2 regularization applied in weightdecay<0

print('Teacher\'s state_dict:')
total_param = 0
for param_tensor in Teacher.state_dict():
    print(param_tensor, '\t', Teacher.state_dict()[param_tensor].size(), '\t', Teacher.state_dict()[param_tensor].device)
    total_param += np.prod(Teacher.state_dict()[param_tensor].size())
print('Teacher\'s total params:', total_param)
#--------------------------------------------------
print('Optimizer\'s state_dict:')
for var_name in optimizerTeacher.state_dict():
    print(var_name, '\t', optimizerTeacher.state_dict()[var_name])


Teacher's state_dict:
_blocklist.0._temporal_attention._U1 	 torch.Size([307]) 	 cuda:0
_blocklist.0._temporal_attention._U2 	 torch.Size([1, 307]) 	 cuda:0
_blocklist.0._temporal_attention._U3 	 torch.Size([1]) 	 cuda:0
_blocklist.0._temporal_attention._be 	 torch.Size([1, 12, 12]) 	 cuda:0
_blocklist.0._temporal_attention._Ve 	 torch.Size([12, 12]) 	 cuda:0
_blocklist.0._spatial_attention._W1 	 torch.Size([12]) 	 cuda:0
_blocklist.0._spatial_attention._W2 	 torch.Size([1, 12]) 	 cuda:0
_blocklist.0._spatial_attention._W3 	 torch.Size([1]) 	 cuda:0
_blocklist.0._spatial_attention._bs 	 torch.Size([1, 307, 307]) 	 cuda:0
_blocklist.0._spatial_attention._Vs 	 torch.Size([307, 307]) 	 cuda:0
_blocklist.0._chebconv_attention._weight 	 torch.Size([3, 1, 64]) 	 cuda:0
_blocklist.0._chebconv_attention._bias 	 torch.Size([64]) 	 cuda:0
_blocklist.0._time_convolution.weight 	 torch.Size([64, 64, 1, 3]) 	 cuda:0
_blocklist.0._time_convolution.bias 	 torch.Size([64]) 	 cuda:0
_blocklist.0._resid

## Intialized student model

In [10]:
#------------------------------------------------------
# weight_decay=1e-3
optimizerStudent = optim.Adam(Student.parameters(), lr=1e-3,weight_decay=1e-6)

print('Teacher\'s state_dict:')
total_param = 0
for param_tensor in Student.state_dict():
    print(param_tensor, '\t', Student.state_dict()[param_tensor].size(), '\t', Student.state_dict()[param_tensor].device)
    total_param += np.prod(Student.state_dict()[param_tensor].size())
print('Student\'s total params:', total_param)
#--------------------------------------------------
print('Optimizer\'s state_dict:')
for var_name in optimizerStudent.state_dict():
    print(var_name, '\t', optimizerStudent.state_dict()[var_name])


Teacher's state_dict:
_blocklist.0._temporal_attention._U1 	 torch.Size([307]) 	 cuda:0
_blocklist.0._temporal_attention._U2 	 torch.Size([1, 307]) 	 cuda:0
_blocklist.0._temporal_attention._U3 	 torch.Size([1]) 	 cuda:0
_blocklist.0._temporal_attention._be 	 torch.Size([1, 12, 12]) 	 cuda:0
_blocklist.0._temporal_attention._Ve 	 torch.Size([12, 12]) 	 cuda:0
_blocklist.0._spatial_attention._W1 	 torch.Size([12]) 	 cuda:0
_blocklist.0._spatial_attention._W2 	 torch.Size([1, 12]) 	 cuda:0
_blocklist.0._spatial_attention._W3 	 torch.Size([1]) 	 cuda:0
_blocklist.0._spatial_attention._bs 	 torch.Size([1, 307, 307]) 	 cuda:0
_blocklist.0._spatial_attention._Vs 	 torch.Size([307, 307]) 	 cuda:0
_blocklist.0._chebconv_attention._weight 	 torch.Size([3, 1, 64]) 	 cuda:0
_blocklist.0._chebconv_attention._bias 	 torch.Size([64]) 	 cuda:0
_blocklist.0._time_convolution.weight 	 torch.Size([64, 64, 1, 3]) 	 cuda:0
_blocklist.0._time_convolution.bias 	 torch.Size([64]) 	 cuda:0
_blocklist.0._resid

In [11]:
def masked_mae(preds, labels, null_val=np.nan):
    if np.isnan(null_val):
        mask = ~torch.isnan(labels)
    else:
        mask = (labels != null_val)
    mask = mask.float()
    mask /= torch.mean((mask))
    mask = torch.where(torch.isnan(mask), torch.zeros_like(mask), mask)
    loss = torch.abs(preds - labels)
    loss = loss * mask
    loss = torch.where(torch.isnan(loss), torch.zeros_like(loss), loss)
    return torch.mean(loss)


In [12]:
class RMSELoss(nn.Module):
    def __init__(self):
        super().__init__()
        self.mse = nn.MSELoss()
        
    def forward(self,ytil,y):
        return torch.sqrt(self.mse(ytil,y))

In [13]:
alpha=0.4
masked_flag=0
criterionStudent=RMSELoss().to(DEVICE)

criterion_masked = masked_mae
loss_function = 'mae'

metric_method = 'unmask'
missing_value=0.0

if loss_function=='masked_mse':
    criterion_masked = masked_mse         #nn.MSELoss().to(DEVICE)
    masked_flag=1
elif loss_function=='masked_mae':
    criterion_masked = masked_mae
    masked_flag = 1
elif loss_function == 'mae':
    criterion = nn.L1Loss().to(DEVICE)
    masked_flag = 0
elif loss_function == 'rmse':
    criterion = nn.MSELoss().to(DEVICE)
    masked_flag= 0

In [14]:
ValLoss=[]
TrainLoss=[]
distloss=[]

In [15]:
def compute_val_loss_mstgcn(net, val_loader, criterion,  masked_flag,missing_value,sw, epoch, edge_index_data, limit=None):
    '''
    for rnn, compute mean loss on validation set
    :param net: model
    :param val_loader: torch.utils.data.utils.DataLoader
    :param criterion: torch.nn.MSELoss
    :param sw: tensorboardX.SummaryWriter
    :param global_step: int, current global_step
    :param limit: int,
    :return: val_loss
    '''
    net.train(False)  # ensure dropout layers are in evaluation mode
    with torch.no_grad():
        val_loader_length = len(val_loader)  # nb of batch
        tmp = []  # batch loss
        for batch_index, batch_data in enumerate(val_loader):
            encoder_inputs, labels = batch_data
            outputs = net(encoder_inputs, edge_index_data)
            if masked_flag:
                loss = criterion(outputs, labels)
            else:
                loss = criterion(outputs, labels)
            tmp.append(loss.item())
            if batch_index % 20 == 0:
                print('validation batch %s / %s, loss: %.2f' % (batch_index + 1, val_loader_length, loss.item()))
            if (limit is not None) and batch_index >= limit:
                break

        validation_loss = sum(tmp) / len(tmp)
        ValLoss.append(validation_loss)
        sw.add_scalar('validation_loss', validation_loss, epoch)
        print("rata rata val_loss",validation_loss,"pada epoch:",epoch)
    return validation_loss

In [16]:
global_step = 0
best_epoch = 0
best_val_loss = np.inf
start_time= time()

# load weights from teacher

In [17]:
epoch = 22
params_filename = os.path.join('./Teacher/', 'checkpoint_%s.pth' % epoch)
loaded_checkpoint=torch.load(params_filename)
epoch=loaded_checkpoint['epoch']
Teacher.load_state_dict(loaded_checkpoint['model_state'])
optimizerTeacher.load_state_dict(loaded_checkpoint['optimizer_state'])
Teacher.eval()

ASTGCN(
  (_blocklist): ModuleList(
    (0): ASTGCNBlock(
      (_temporal_attention): TemporalAttention()
      (_spatial_attention): SpatialAttention()
      (_chebconv_attention): ChebConvAttention(1, 64, K=3, normalization=None)
      (_time_convolution): Conv2d(64, 64, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1))
      (_residual_convolution): Conv2d(1, 64, kernel_size=(1, 1), stride=(1, 1))
      (_layer_norm): LayerNorm((64,), eps=1e-05, elementwise_affine=True)
      (_dropout): Dropout2d(p=0.01, inplace=True)
    )
    (1): ASTGCNBlock(
      (_temporal_attention): TemporalAttention()
      (_spatial_attention): SpatialAttention()
      (_chebconv_attention): ChebConvAttention(64, 64, K=3, normalization=None)
      (_time_convolution): Conv2d(64, 64, kernel_size=(1, 3), stride=(1, 1), padding=(0, 1))
      (_residual_convolution): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1))
      (_layer_norm): LayerNorm((64,), eps=1e-05, elementwise_affine=True)
      (_dropout): 

In [18]:
print(Teacher.state_dict())

OrderedDict([('_blocklist.0._temporal_attention._U1', tensor([ 5.5869e-01, -1.2616e-01,  1.7045e-01,  2.3812e-02,  4.8195e-01,
         6.3830e-01,  9.7228e-02,  6.9284e-01,  6.2028e-02,  1.3427e-01,
         5.9608e-02,  2.1886e-01, -2.9185e-02, -2.1210e-01,  4.1344e-01,
         4.8402e-01,  4.7639e-01,  6.2355e-01, -3.7197e-01,  3.5150e-01,
         2.9970e-01, -5.1058e-02, -4.0474e-02,  3.1314e-01,  3.2762e-02,
         8.3597e-01,  3.0440e-01,  1.0644e-02,  7.0973e-02, -2.7030e-01,
        -4.8443e-02,  3.3125e-01, -1.4869e-01,  4.1654e-01,  1.9244e-01,
         4.4903e-01,  5.4413e-01,  1.9074e-01,  1.0784e+00, -1.2609e-01,
         1.2166e-01,  1.0992e-01, -5.5038e-01, -3.9896e-01,  4.6273e-02,
         7.7185e-01,  6.4420e-01,  5.1098e-01,  5.1247e-01, -1.4301e-01,
        -5.7959e-01, -3.9534e-01,  1.8895e-01,  6.8797e-01,  3.2177e-01,
        -2.0934e-01,  4.2424e-01,  4.4988e-03, -1.9095e-01, -1.6160e-02,
         4.4068e-01, -1.2400e-01,  7.1368e-02,  7.2863e-02,  5.0593e-0

In [19]:
for var_name in optimizerTeacher.state_dict():
    print(var_name, "\t", optimizerTeacher.state_dict()[var_name])

state 	 {0: {'step': tensor(19527.), 'exp_avg': tensor([ 1.0126e-03, -5.4395e-05,  4.7144e-04,  9.2481e-04, -1.1797e-03,
         2.1716e-03, -1.8800e-04, -1.4909e-03, -5.6754e-04,  9.3604e-04,
         2.5555e-03,  8.1660e-04,  2.6403e-03, -4.5370e-04, -6.5035e-05,
         5.9047e-04,  2.2746e-03,  2.1456e-03,  2.3619e-03,  3.2418e-03,
         1.4282e-04, -1.0995e-03, -9.4018e-04, -6.8637e-04,  1.2438e-03,
         9.0418e-04,  1.5194e-03, -8.3317e-04,  1.2811e-03,  6.2705e-04,
         9.1821e-04, -1.0021e-03,  2.9116e-03,  7.3326e-04,  7.7511e-04,
         1.6662e-03,  2.1342e-03,  1.1616e-03,  3.9137e-04,  4.0767e-04,
         1.5526e-03,  5.3927e-03,  3.1605e-03,  2.9166e-03, -1.5444e-03,
         2.4302e-03, -1.3441e-03, -4.5856e-04,  1.1417e-03, -8.8680e-04,
        -5.8099e-05,  3.3293e-03,  3.7254e-04,  1.4927e-03,  6.3597e-04,
         1.3350e-03,  3.1470e-03,  4.1953e-03,  5.3009e-03,  2.3086e-03,
        -1.9088e-03,  2.9275e-03, -6.1441e-04,  1.9771e-03,  7.5513e-04,
   

In [20]:
#Student kd
Studloss1=[]
for epoch in range(40):
    params_filename = os.path.join('./Student_MSE/', 'checkpoint_%s.pth' % epoch)
    tmp=[]
    tmp0=[]
    Student.train()  # ensure dropout layers are in train mode
    Teacher.eval()
    for batch_index, batch_data in enumerate(train_loader):
        encoder_inputs, labels = batch_data   # encoder_inputs torch.Size([32, 307, 1, 12])  label torch.Size([32, 307, 12])
        optimizerStudent.zero_grad()
        P_s = Student(encoder_inputs, edge_index_data) # torch.Size([32, 307, 12]) Ps
        with torch.no_grad():
            P_t=Teacher(encoder_inputs, edge_index_data)# Pt
        P_tloss=criterionStudent(P_t,labels)
        P_sloss=criterionStudent(P_s,labels)
        if P_sloss > P_tloss:
            loss=P_sloss * (alpha) + criterionStudent(P_s, P_t) * (1-alpha)
        else:
            loss=P_sloss*alpha #los student * alpha + 0 , loss imitasi 0
        loss.backward()
        
        optimizerStudent.step()
        distilation_loss = loss.item()
        tmp.append(distilation_loss)
        tmp0.append(P_sloss.item())
        global_step += 1
        sw.add_scalar('training_loss', P_sloss, global_step)
        if global_step % 100 == 0:
            print('global step: %s, Training loss: %.2f, Distilation loss: %.2f, time: %.2fs' % (global_step,P_sloss,distilation_loss, time() - start_time))
    studloss=sum(tmp0)/len(tmp0)
    rtloss_t=sum(tmp) / len(tmp)
    Studloss1.append(studloss)
    TrainLoss.append(rtloss_t)
    print("rata rata Trainingloss: ",studloss,"rata rata Distilation loss",rtloss_t,"pada epoch:",epoch)
    val_loss = compute_val_loss_mstgcn(Student, val_loader, criterionStudent, masked_flag, missing_value, sw, epoch,edge_index_data)

    if val_loss < best_val_loss:
        best_epoch=epoch
        best_val_loss = val_loss
        checkpoint = {
            'epoch': best_epoch,
            'model_state': Student.state_dict(),
            'optimizer_state': optimizerStudent.state_dict()
        }
        torch.save(checkpoint, params_filename)
        print('save parameters to file: %s' % params_filename)

print('best epoch:', best_epoch)
path=os.path.join("./Student_MSE/",'losshistval.npy')
path1=os.path.join("./Student_MSE/",'losshistdistilation.npy')
path2=os.path.join("./Student_MSE/",'losshisttrain.npy')

np.save(path,ValLoss)
np.save(path1,TrainLoss)
np.save(path2,Studloss1)

KeyboardInterrupt: 

In [None]:
output

* label smoothing

In [None]:
epoch =  37
params_filename = os.path.join('./Student_MSE/alpha 0.6/checkpoint_%s.pth' % epoch)
loaded_checkpoint=torch.load(params_filename)
epoch=loaded_checkpoint['epoch']
Student.load_state_dict(loaded_checkpoint['model_state'])
optimizerStudent.load_state_dict(loaded_checkpoint['optimizer_state'])
Student.eval()

In [None]:
from time import process_time as ps
start = ps() 
Student.train(False)  # ensure dropout layers are in evaluation mode
with torch.no_grad():
    test_loader_length = len(test_loader)  # nb of batch
    tmp = []  # batch loss
    for batch_index, batch_data in enumerate(test_loader):
        encoder_inputs, labels = batch_data
        outputs = Student(encoder_inputs, edge_index_data)
        loss = criterionStudent(outputs, labels)
        tmp.append(loss.item())
        if batch_index % 1 == 0:
            print('test_loss batch %s / %s, loss: %.2f ' % (batch_index + 1, test_loader_length, loss.item()))

    
    test_loss = sum(tmp) / len(tmp)
end = ps()
print("exec time : ",end-start, "second", "Test loss : ",test_loss)  
    
# print(test_loss)
np.save('./Student_MSE/test.npy',tmp)

In [None]:
sample_output = outputs[0]  # prediction
sample_labels = labels[0] # truth
print(sample_output.shape, sample_labels.shape)

In [None]:
import datetime
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import numpy as np

In [None]:
base = datetime.datetime(2018, 1, 1)
dates = [base + datetime.timedelta(hours=(i)) for i in range(24)]

In [None]:
y = np.random.choice(range(10), 24)

In [None]:
time=['09:00','10:00','11:00','12:00','13:00','14:00','15:00','16:00','17:00','18:00','19:00','20:00']

In [None]:
from matplotlib.pyplot import figure

figure(figsize=(30,4), dpi=80)
for i in range(2):
    new_i = i * 12
    plt.plot(range(0+new_i,12+new_i),sample_output[i].detach().cpu().numpy(), color = 'red')
    plt.plot(range(0+new_i,12+new_i),sample_labels[i].cpu().numpy(), color='blue')
plt.show()

In [None]:
fig, ax = plt.subplots(figsize=(20,10),dpi=60)
for i in range(1):
    new_i = i * 12
    print(sample_labels[i].cpu().numpy())
    ax.scatter(time,sample_labels[i].cpu().numpy(), color='blue')
    ax.scatter(time,sample_labels[i+24].cpu().numpy(), color='magenta')
    ax.scatter(time,sample_labels[i+36].cpu().numpy(), color='black')
    ax.scatter(time,sample_labels[i+48].cpu().numpy(), color='cyan')
    ax.scatter(time,sample_labels[i+60].cpu().numpy(), color='pink')
    ax.scatter(time,sample_labels[i+72].cpu().numpy(), color='purple')
    ax.scatter(time,sample_labels[i+84].cpu().numpy(), color='gray')
    ax.scatter(time,sample_labels[i+96].cpu().numpy(), color='olive')
    ax.scatter(time,sample_labels[i+108].cpu().numpy(), color='yellow')
    ax.scatter(time,sample_labels[i+120].cpu().numpy(), color='palegreen')
    ax.scatter(time,sample_labels[i+132].cpu().numpy(), color='peru')
    ax.scatter(time,sample_labels[i+144].cpu().numpy(), color='teal')
    ax.scatter(time,sample_labels[i+156].cpu().numpy(), color='cadetblue')
    ax.scatter(time,sample_labels[i+168].cpu().numpy(), color='indigo')
    ax.legend(['73','154','263','56','96','42','58','95','72','271','68','134','107','130'], loc=1,fontsize = 25, fancybox = True, title=' ID detektor',title_fontsize=25,bbox_to_anchor=(1.2, 1.05))
# fig.suptitle('Visualisasi Volume kendaraan pada 15 detektor pertama (01/01/2018) ', fontsize=30)
plt.xticks(fontsize = 30)
plt.yticks(fontsize = 30)
plt.xlabel('Time Stamp', fontsize=30)
plt.ylabel('Flow', fontsize=30)
fig.savefig("Volume lalulintas.png",dpi=400,bbox_inches='tight')

In [None]:
import pandas as pd 

In [None]:
df = pd.read_csv(adj_filename)

In [None]:
df[:15]

In [None]:
print(Student)

In [None]:
Student._blocklist[0]._temporal_attention._U1

In [None]:
Student._blocklist[0]._temporal_attention._U2

In [None]:
Student._blocklist[0]._temporal_attention._U3


In [None]:
Student._blocklist[0]._temporal_attention._Ve

In [None]:
Student._blocklist[0]._temporal_attention._be

In [None]:
w1=Student._blocklist[0]._spatial_attention._W1

In [None]:
w2=Student._blocklist[0]._spatial_attention._W2

In [None]:
Student._blocklist[0]._spatial_attention._W3

In [None]:
Student._blocklist[0]._spatial_attention._Vs[0,:12]

In [None]:
Student._blocklist[0]._spatial_attention._bs[0,0,:12]

In [None]:
 for batch_index, batch_data in enumerate(test_loader):
        encoder_inputs, labels = batch_data
        np.matmul(encoder_inputs.detach.cpu.numpy(),w1)
        

In [None]:
encoder_inputs.size()

In [None]:
labels[0,0]

In [None]:
np.matmul(a,b)