In [None]:
import os
from time import time
import numpy as np
import matplotlib.pyplot as plt
import networkx as nx



import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from scipy.sparse.linalg import eigs


USE_CUDA = torch.cuda.is_available()
DEVICE = torch.device('cuda:0')
print("CUDA:", USE_CUDA, DEVICE)

from tensorboardX import SummaryWriter
sw = SummaryWriter(logdir='./Student_AIL', flush_secs=5)

import math
from typing import Optional, List, Union

import torch
import torch.nn as nn
from torch.nn import Parameter
import torch.nn.functional as F

from torch_geometric.data import Data
from torch_geometric.typing import OptTensor
from torch_geometric.nn.conv import MessagePassing
from torch_geometric.transforms import LaplacianLambdaMax
from torch_geometric.utils import remove_self_loops, add_self_loops, get_laplacian
from torch_geometric.utils import to_dense_adj
from torch_scatter import scatter_add

In [None]:
def load_graphdata_channel1(graph_signal_matrix_filename, num_of_hours, num_of_days, num_of_weeks, batch_size,
                            shuffle=True, DEVICE = torch.device('cuda:0')):
    '''
    :param graph_signal_matrix_filename: str
    :param num_of_hours: int
    :param num_of_days: int
    :param num_of_weeks: int
    :param DEVICE:
    :param batch_size: int
    :return:
    three DataLoaders, each dataloader contains:
    test_x_tensor: (B, N_nodes, in_feature, T_input)
    test_decoder_input_tensor: (B, N_nodes, T_output)
    test_target_tensor: (B, N_nodes, T_output)
    '''

    file = os.path.basename(graph_signal_matrix_filename).split('.')[0]
    filename = os.path.join('./data/PEMS04/', file + '_r' + str(num_of_hours) + '_d' + str(num_of_days) + '_w' + str(num_of_weeks)) +'_astcgn'
    print('load file:', filename)

    file_data = np.load(filename + '.npz')
    train_x = file_data['train_x']  # (10181, 307, 3, 12)
    train_x = train_x[:, :, 0:1, :]
    train_target = file_data['train_target']  # (10181, 307, 12)

    val_x = file_data['val_x']
    val_x = val_x[:, :, 0:1, :]
    val_target = file_data['val_target']

    test_x = file_data['test_x']
    test_x = test_x[:, :, 0:1, :]
    test_target = file_data['test_target']

    mean = file_data['mean'][:, :, 0:1, :]  # (1, 1, 3, 1)
    std = file_data['std'][:, :, 0:1, :]  # (1, 1, 3, 1)

    # ------- train_loader -------
    train_x_tensor = torch.from_numpy(train_x).type(torch.FloatTensor).to(DEVICE)  # (B, N, F, T)
    train_target_tensor = torch.from_numpy(train_target).type(torch.FloatTensor).to(DEVICE)  # (B, N, T)
    train_dataset = torch.utils.data.TensorDataset(train_x_tensor, train_target_tensor)
    train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=shuffle)

    # ------- val_loader -------
    val_x_tensor = torch.from_numpy(val_x).type(torch.FloatTensor).to(DEVICE)  # (B, N, F, T)
    val_target_tensor = torch.from_numpy(val_target).type(torch.FloatTensor).to(DEVICE)  # (B, N, T)
    val_dataset = torch.utils.data.TensorDataset(val_x_tensor, val_target_tensor)
    val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

    # ------- test_loader -------
    test_x_tensor = torch.from_numpy(test_x).type(torch.FloatTensor).to(DEVICE)  # (B, N, F, T)
    test_target_tensor = torch.from_numpy(test_target).type(torch.FloatTensor).to(DEVICE)  # (B, N, T)
    test_dataset = torch.utils.data.TensorDataset(test_x_tensor, test_target_tensor)
    test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

    # print
    print('train:', train_x_tensor.size(), train_target_tensor.size())
    print('val:', val_x_tensor.size(), val_target_tensor.size())
    print('test:', test_x_tensor.size(), test_target_tensor.size())

    return train_loader, train_target_tensor, val_loader, val_target_tensor, test_loader, test_target_tensor, mean, std


In [None]:
graph_signal_matrix_filename = './data/PEMS04/pems04.npz'
batch_size = 16
num_of_weeks = 1
num_of_days = 1
num_of_hours = 1

train_loader, train_target_tensor, val_loader, val_target_tensor, test_loader, test_target_tensor, _mean, _std = load_graphdata_channel1(
    graph_signal_matrix_filename, num_of_hours, num_of_days, num_of_weeks, batch_size)

In [None]:
def get_adjacency_matrix(distance_df_filename, num_of_vertices, id_filename=None):
    '''
    Parameters
    ----------
    distance_df_filename: str, path of the csv file contains edges information
    num_of_vertices: int, the number of vertices
    Returns
    ----------
    A: np.ndarray, adjacency matrix
    '''
    if 'npy' in distance_df_filename:  # false
        adj_mx = np.load(distance_df_filename)
        return adj_mx, None
    else:
        
        #--------------------------------------------- read from here
        import csv
        A = np.zeros((int(num_of_vertices), int(num_of_vertices)),dtype=np.float32)
        distaneA = np.zeros((int(num_of_vertices), int(num_of_vertices)), dtype=np.float32)

        #------------ Ignore
        if id_filename: # false
            with open(id_filename, 'r') as f:
                id_dict = {int(i): idx for idx, i in enumerate(f.read().strip().split('\n'))}  # 把节点id（idx）映射成从0开始的索引

            with open(distance_df_filename, 'r') as f:
                f.readline()
                reader = csv.reader(f)
                for row in reader:
                    if len(row) != 3:
                        continue
                    i, j, distance = int(row[0]), int(row[1]), float(row[2])
                    A[id_dict[i], id_dict[j]] = 1
                    distaneA[id_dict[i], id_dict[j]] = distance
            return A, distaneA

        else:
         #-------------Continue reading
            with open(distance_df_filename, 'r') as f:
                f.readline()
                reader = csv.reader(f)
                for row in reader:
                    if len(row) != 3:
                        continue
                    i, j, distance = int(row[0]), int(row[1]), float(row[2])
                    A[i, j] = 1
                    distaneA[i, j] = distance
            return A, distaneA

In [None]:
id_filename = None
adj_filename = './data/PEMS04/PEMS04.csv'
num_of_vertices = 307
adj_mx, distance_mx = get_adjacency_matrix(adj_filename, num_of_vertices, id_filename) #  adj_mx and distance_mx (307, 307)

rows, cols = np.where(adj_mx == 1)
edges = zip(rows.tolist(), cols.tolist())
gr = nx.Graph()
gr.add_edges_from(edges)
# nx.draw(gr, node_size=3)
plt.show()
rows, cols = np.where(adj_mx == 1)
edges = zip(rows.tolist(), cols.tolist())
edge_index_data = torch.LongTensor(np.array([rows, cols])).to(DEVICE)

In [None]:
from model.ASTGCN import ASTGCN

In [None]:
nb_block = 2
in_channels = 1
K = 3
nb_chev_filter =64
nb_time_filter = 64
time_strides = num_of_hours
num_for_predict = 12
len_input = 12
dropout=0.01

#L_tilde = scaled_Laplacian(adj_mx)
#cheb_polynomials = [torch.from_numpy(i).type(torch.FloatTensor).to(DEVICE) for i in cheb_polynomial(L_tilde, K)]
# net = ASTGCN( nb_block, in_channels, K, nb_chev_filter, nb_time_filter, time_strides, num_for_predict, len_input, num_of_vertices).to(DEVICE)
Teacher=ASTGCN( nb_block, in_channels, K, nb_chev_filter, nb_time_filter, time_strides, num_for_predict, len_input, num_of_vertices,dropout).to(DEVICE)
print(Teacher)



In [None]:
nb_chev_filterstdn = 64
nb_time_filterstdn = 64
nb_blockstdn = 1
dropouts=0.01
Student=ASTGCN( nb_blockstdn, in_channels, K, nb_chev_filterstdn, nb_time_filterstdn, time_strides, num_for_predict, len_input, num_of_vertices,dropouts).to(DEVICE)
print(Student)

# intialized teacher model

In [None]:
#------------------------------------------------------
learning_rate = 0.001
# 
optimizerTeacher = optim.Adam(Teacher.parameters(), lr=1e-3,weight_decay=1e-6)#l2 regularization applied in weightdecay<0

print('Teacher\'s state_dict:')
total_param = 0
for param_tensor in Teacher.state_dict():
    print(param_tensor, '\t', Teacher.state_dict()[param_tensor].size(), '\t', Teacher.state_dict()[param_tensor].device)
    total_param += np.prod(Teacher.state_dict()[param_tensor].size())
print('Teacher\'s total params:', total_param)
#--------------------------------------------------
print('Optimizer\'s state_dict:')
for var_name in optimizerTeacher.state_dict():
    print(var_name, '\t', optimizerTeacher.state_dict()[var_name])


## Intialized student model

In [None]:
#------------------------------------------------------
# weight_decay=1e-3
optimizerStudent = optim.Adam(Student.parameters(), lr=1e-3,weight_decay=1e-6)

print('Teacher\'s state_dict:')
total_param = 0
for param_tensor in Student.state_dict():
    print(param_tensor, '\t', Student.state_dict()[param_tensor].size(), '\t', Student.state_dict()[param_tensor].device)
    total_param += np.prod(Student.state_dict()[param_tensor].size())
print('Student\'s total params:', total_param)
#--------------------------------------------------
print('Optimizer\'s state_dict:')
for var_name in optimizerStudent.state_dict():
    print(var_name, '\t', optimizerStudent.state_dict()[var_name])


In [None]:
def masked_mae(preds, labels, null_val=np.nan):
    if np.isnan(null_val):
        mask = ~torch.isnan(labels)
    else:
        mask = (labels != null_val)
    mask = mask.float()
    mask /= torch.mean((mask))
    mask = torch.where(torch.isnan(mask), torch.zeros_like(mask), mask)
    loss = torch.abs(preds - labels)
    loss = loss * mask
    loss = torch.where(torch.isnan(loss), torch.zeros_like(loss), loss)
    return torch.mean(loss)


In [None]:
class RMSELoss(nn.Module):
    def __init__(self):
        super().__init__()
        self.mse = nn.MSELoss()
        
    def forward(self,ytil,y):
        return torch.sqrt(self.mse(ytil,y))

In [None]:

alpha=0.1# trade off teacher weights to students, 70% teach 30% stud
masked_flag=0
criterionStudent=RMSELoss().to(DEVICE)

criterion_masked = masked_mae
loss_function = 'mae'

metric_method = 'unmask'
missing_value=0.0

if loss_function=='masked_mse':
    criterion_masked = masked_mse         #nn.MSELoss().to(DEVICE)
    masked_flag=1
elif loss_function=='masked_mae':
    criterion_masked = masked_mae
    masked_flag = 1
elif loss_function == 'mae':
    criterion = nn.L1Loss().to(DEVICE)
    masked_flag = 0
elif loss_function == 'rmse':
    criterion = nn.MSELoss().to(DEVICE)
    masked_flag= 0

In [None]:
ValLoss=[]
TrainLoss=[]

In [None]:
def compute_val_loss_mstgcn(net, val_loader, criterion,  masked_flag,missing_value,sw, epoch, edge_index_data, limit=None):
    '''
    for rnn, compute mean loss on validation set
    :param net: model
    :param val_loader: torch.utils.data.utils.DataLoader
    :param criterion: torch.nn.MSELoss
    :param sw: tensorboardX.SummaryWriter
    :param global_step: int, current global_step
    :param limit: int,
    :return: val_loss
    '''
    net.train(False)  # ensure dropout layers are in evaluation mode
    with torch.no_grad():
        val_loader_length = len(val_loader)  # nb of batch
        tmp = []  # batch loss
        for batch_index, batch_data in enumerate(val_loader):
            encoder_inputs, labels = batch_data
            outputs = net(encoder_inputs, edge_index_data)
            if masked_flag:
                loss = criterion(outputs, labels)
            else:
                loss = criterion(outputs, labels)
            tmp.append(loss.item())
            if batch_index % 20 == 0:
                print('validation batch %s / %s, loss: %.2f' % (batch_index + 1, val_loader_length, loss.item()))
            if (limit is not None) and batch_index >= limit:
                break

        validation_loss = sum(tmp) / len(tmp)
        ValLoss.append(validation_loss)
        sw.add_scalar('validation_loss', validation_loss, epoch)
        print("rata rata val_loss",validation_loss,"pada epoch:",epoch)
    return validation_loss

In [None]:
global_step = 0
best_epoch = 0
best_val_loss = np.inf
start_time= time()

# load weights from teacher

In [None]:
epoch =  13
params_filename = os.path.join('./Teacher/', 'checkpoint_%s.pth' % epoch)
loaded_checkpoint=torch.load(params_filename)
epoch=loaded_checkpoint['epoch']
Teacher.load_state_dict(loaded_checkpoint['model_state'])
optimizerTeacher.load_state_dict(loaded_checkpoint['optimizer_state'])
Teacher.eval()

In [None]:
print(Teacher.state_dict())

In [None]:
for var_name in optimizerTeacher.state_dict():
    print(var_name, "\t", optimizerTeacher.state_dict()[var_name])

In [None]:
# normalizeN

In [None]:
a=os.path.join('./Teacher/','N.npy')
N=np.load(a)
N=torch.tensor(N)
N.to(DEVICE)

In [None]:
#Student kd
Studloss1=[]
for epoch in range(40):
    params_filename = os.path.join('./Student_AIL/', 'checkpoint_%s.pth' % epoch)
    tmp=[]
    tmp0=[]
    Student.train()  # ensure dropout layers are in train mode
    Teacher.eval()
    for batch_index, batch_data in enumerate(train_loader):
        encoder_inputs, labels = batch_data   # encoder_inputs torch.Size([32, 307, 1, 12])  label torch.Size([32, 307, 12])
        optimizerStudent.zero_grad()
        P_s = Student(encoder_inputs, edge_index_data) # torch.Size([32, 307, 12]) Ps
        with torch.no_grad():
            P_t=Teacher(encoder_inputs, edge_index_data)# Pt
        P_tloss=criterionStudent(P_t,labels)
        P_sloss=criterionStudent(P_s,labels)
        psi=(1-(P_tloss/N))
        loss=P_sloss * alpha + psi*criterionStudent(P_s, P_t) * (1-alpha) #limit terregulasi bergantung dengan output
        loss.backward()
        
        optimizerStudent.step()
        distilation_loss = loss.item()
        tmp.append(distilation_loss)
        tmp0.append(P_sloss.item())
        global_step += 1
        sw.add_scalar('training_loss', P_sloss, global_step)
        if global_step % 100 == 0:
            print('global step: %s, Training loss: %.2f, Distilation loss: %.2f, time: %.2fs' % (global_step,P_sloss,distilation_loss, time() - start_time))
    studloss=sum(tmp0)/len(tmp0)
    rtloss_t=sum(tmp) / len(tmp)
    Studloss1.append(studloss)
    TrainLoss.append(rtloss_t)
    print("rata rata Trainingloss: ",studloss,"rata rata Distilation loss",rtloss_t,"pada epoch:",epoch)
    val_loss = compute_val_loss_mstgcn(Student, val_loader, criterionStudent, masked_flag, missing_value, sw, epoch,edge_index_data)

    if val_loss < best_val_loss:
        best_epoch=epoch
        best_val_loss = val_loss
        checkpoint = {
            'epoch': best_epoch,
            'model_state': Student.state_dict(),
            'optimizer_state': optimizerStudent.state_dict()
        }
        torch.save(checkpoint, params_filename)
        print('save parameters to file: %s' % params_filename)

print('best epoch:', best_epoch)
path=os.path.join("./Student_AIL/",'losshistval.npy')
path1=os.path.join("./Student_AIL/",'losshistdistilation.npy')
path2=os.path.join("./Student_AIL/",'losshisttrain.npy')

np.save(path,ValLoss)
np.save(path1,TrainLoss)
np.save(path2,Studloss1)

* label smoothing

In [None]:
epoch =  best
params_filename = os.path.join('./Student_AIL/', 'checkpoint_%s.pth' % epoch)
loaded_checkpoint=torch.load(params_filename)
epoch=loaded_checkpoint['epoch']
Student.load_state_dict(loaded_checkpoint['model_state'])
optimizerStudent.load_state_dict(loaded_checkpoint['optimizer_state'])
Student.eval()

In [None]:
checkpoint = {
            'epoch': best_epoch,
            'model_state': Student.state_dict(),
            'optimizer_state': optimizerStudent.state_dict()
        }
torch.save(checkpoint, params_filename)
print('save parameters to file: %s' % params_filename)

In [None]:
from time import process_time as ps
start = ps() 
Student.train(False)  # ensure dropout layers are in evaluation mode
with torch.no_grad():
    test_loader_length = len(test_loader)  # nb of batch
    tmp = []  # batch loss
    for batch_index, batch_data in enumerate(test_loader):
        encoder_inputs, labels = batch_data
        outputs = Student(encoder_inputs, edge_index_data)
        loss = criterionStudent(outputs, labels)
        tmp.append(loss.item())
        if batch_index % 1 == 0:
            print('test_loss batch %s / %s, loss: %.2f ' % (batch_index + 1, test_loader_length, loss.item()))

    
    test_loss = sum(tmp) / len(tmp)
end = ps()
print("rate test : ",end-start, "second", "Test loss : ",test_loss)  
    
# print(test_loss)
np.save('./Student_AIL/test.npy',tmp)