In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch
import time
import os
import scipy.sparse as sp

# os.environ['CUDA_VISIBLE_DEVICES'] = str('1')

## Parameters

In [None]:
keep_len=12 
sensor_num=6
direction = 2
order_num=3

## Load data

In [None]:

train_adjmatrix = np.load('./data/train_adjmatrix.npy')
test_adjmatrix = np.load('./data/test_adjmatrix.npy')

train_inputdata = np.load('./data/train_inputdata.npy')
test_inputdata = np.load('./data/test_inputdata.npy')

train_pollutiondata = np.load('./data/train_pollutiondata.npy')
test_pollutiondata = np.load('./data/test_pollutiondata.npy')


print(train_adjmatrix.shape)
print(test_adjmatrix.shape)

print(train_inputdata.shape)
print(test_inputdata.shape)

print(train_pollutiondata.shape)
print(test_pollutiondata.shape)


In [None]:
def calculate_random_walk_matrix(adj_mx):
    adj_mx = sp.coo_matrix(adj_mx)
    d = np.array(adj_mx.sum(1))
    d_inv = np.power(d, -1).flatten()
    d_inv[np.isinf(d_inv)] = 0.
    d_mat_inv = sp.diags(d_inv)
    random_walk_mx = d_mat_inv.dot(adj_mx).todense()
    return random_walk_mx

In [None]:
train_supports =[]
for i in range(len(train_adjmatrix)):
    adj_sample = train_adjmatrix[i].astype(float)
    adj_sample = adj_sample-np.eye(100)
    supports_sample = []
    supports_sample.append(calculate_random_walk_matrix(adj_sample).T)
    if direction==2:
        supports_sample.append(calculate_random_walk_matrix(adj_sample.T).T)
    train_supports.append(supports_sample)
    
train_supports = np.array(train_supports)
print(train_supports.shape)

test_supports =[]
for i in range(len(test_adjmatrix)):
    adj_sample = test_adjmatrix[i].astype(float)
    adj_sample = adj_sample-np.eye(100)
    supports_sample = []
    supports_sample.append(calculate_random_walk_matrix(adj_sample).T)
    if direction==2:
        supports_sample.append(calculate_random_walk_matrix(adj_sample.T).T)
    test_supports.append(supports_sample)
    
test_supports = np.array(test_supports)
print(test_supports.shape)

In [None]:
train_num = train_inputdata.shape[0]
test_num = test_inputdata.shape[0]


train_dataset= np.concatenate((train_supports.reshape(train_num,-1),train_inputdata.reshape(train_num,-1), train_pollutiondata),1)
test_dataset=  np.concatenate((test_supports.reshape(test_num,-1),test_inputdata.reshape(test_num,-1), test_pollutiondata),1)

print(train_dataset.shape)
print(test_dataset.shape)

# CNN+DCNN

In [None]:
import torch.nn as nn
import torch
# import torch.nn.functional as F

class DiffusionGCN(nn.Module):
    def __init__(self, node_num, dim_in, dim_out, order, supports_len=2):
        #order must be integer
        super(DiffusionGCN, self).__init__()
        self.node_num = node_num
        
        self.dim_in = dim_in
        self.dim_out = dim_out
        self.order = order
        self.supports_len = supports_len
        
        
        self.weight = nn.Parameter(torch.FloatTensor(size=(dim_in*(order*self.supports_len+1), dim_out)))
        self.biases = nn.Parameter(torch.FloatTensor(size=(dim_out,)))
        
        nn.init.xavier_normal_(self.weight.data, gain=1.414)
        nn.init.constant_(self.biases.data, val=0.)
        

    def forward(self, x, supports):
        #shape of x is [B, N, D]
        #shape of supports is [supports_len, B, N, D]
        
        batch_size = x.shape[0]
        assert x.shape[1] == self.node_num and self.dim_in == x.shape[2]
        
        out = [x]
        x0 = x
        for support in supports:
            x1 = torch.einsum('bij, bjk -> bik', support, x0)
            out.append(x1)
            for k in range(2, self.order+1):
                x2 = 2 * torch.einsum('bij, bjk -> bik', support, x1) - x0
                out.append(x2)
                x1, x0 = x2, x1
        out = torch.cat(out,dim=-1)     #B, N, D, order
        out = out.reshape(batch_size*self.node_num, -1)     #B*N, D
        out = torch.matmul(out, self.weight)  # (batch_size * self._num_nodes, output_size)
        out = torch.add(out, self.biases)
        out = out.reshape(batch_size, self.node_num, self.dim_out)
        
        return out

In [None]:
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

class GCN(nn.Module):
    def __init__(self, nnode, nfeat, dropout):
        super(GCN, self).__init__()

        self.cnn1 = nn.Conv2d(in_channels=1, out_channels=4, kernel_size=(1, 3), padding=(0, 1))
        self.cnn2 = nn.Conv2d(in_channels=4, out_channels=16, kernel_size=(1, 3), padding=(0, 1))
        self.cnn3 = nn.Conv2d(in_channels=16, out_channels=1, kernel_size=(1, 1))
        
        self.gc1 = DiffusionGCN(node_num=nnode, dim_in=12, dim_out=24, order=order_num, supports_len=2)
        self.gc2 = DiffusionGCN(node_num=nnode, dim_in=24, dim_out=1, order=order_num, supports_len=2)
        self.fc = nn.Linear(100, nnode)
        
        self.nnode = nnode
        self.nfeat = nfeat
        self.dropout = dropout

    def forward(self, x, supports):
        
        x = x.reshape(-1, 1, self.nnode, self.nfeat)

        x = F.relu(self.cnn1(x))
        x = F.relu(self.cnn2(x))
        x = F.relu(self.cnn3(x))
        x = x.reshape(-1, self.nnode, self.nfeat)

        x = F.relu(self.gc1(x, supports))
        x = F.dropout(x, self.dropout, training=self.training)
        
        x = F.relu(self.gc2(x, supports))
        x = F.dropout(x, self.dropout, training=self.training)

        x = x.reshape(-1, self.nnode)
        x = self.fc(x)


        return x

In [None]:
from torch.utils.data import DataLoader
trainloader = torch.utils.data.DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=4)
testloader = torch.utils.data.DataLoader(test_dataset, batch_size=64, shuffle=False, num_workers=4)


In [None]:
# Model and optimizer

model = GCN(nnode=100,
            nfeat=keep_len,
            dropout=0.1
           )

optimizer = optim.Adam(model.parameters(), lr=0.01, weight_decay=0.01)

criterion = nn.CrossEntropyLoss()

model.cuda()

lr_milestones = [30, 50]
scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=lr_milestones, gamma=0.5)



In [None]:
class AverageMeter(object):
    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count
    

In [None]:
import time
import math

if not os.path.isdir('checkpoint'):
    os.mkdir('checkpoint')
    
if not os.path.isdir('results'):
    os.mkdir('results')
        
best_loss = float('inf')
# Train model
for epoch in range(60):
    model.train()
    scheduler.step()
    total_loss_train = AverageMeter()
    for index, data in enumerate(trainloader):

        model.train()
        batch_size =len(data)

        train_supports = data[:,:direction*10000].float()
        train_input_x = data[:,direction*10000:-1].float()
        train_Y = data[:,-1].long()

        train_supports = train_supports.reshape(batch_size, direction, 100, 100)
        train_supports = torch.transpose(train_supports,0,1)
        
        train_input_x = train_input_x.reshape(batch_size, 100, keep_len)

        train_supports, train_input_x, train_Y  = train_supports.cuda(), train_input_x.cuda(), train_Y.cuda()

        train_Y_hat = model(train_input_x, train_supports)

        batch_loss = criterion(train_Y_hat, train_Y)

        optimizer.zero_grad()
        batch_loss.backward()
        optimizer.step()
        total_loss_train.update(batch_loss.item(), batch_size)
        if index%10==0:
            print('Epoch: {}, index: {}, train loss: {:.4f}'.format(epoch+1,int(index), total_loss_train.avg))
            file = open("./results/loss.txt","a")
            file.write("Epoch = {}, index = {}  ".format(epoch+1,int(index)))
            file.write("\n")
            file.write("train_loss = {:.4f}  ".format(total_loss_train.avg))
            file.write("\n")
            file.close

    model.eval()
    total_loss_test = AverageMeter()
    right_num_top1 = 0
    right_num_top5 = 0
    right_num_top10 = 0
    for index_test, test_data in enumerate(testloader):
        
        batch_size =len(test_data)

        test_supports = test_data[:,:direction*10000].float()
        test_input_x = test_data[:,direction*10000:-1].float()
        test_Y = test_data[:,-1].long()

        test_supports = test_supports.reshape(batch_size, direction, 100, 100)
        test_supports = torch.transpose(test_supports,0,1)
        
        test_input_x = test_input_x.reshape(batch_size, 100, keep_len)

        test_supports, test_input_x, test_Y  = test_supports.cuda(), test_input_x.cuda(), test_Y.cuda()

        test_Y_hat = model(test_input_x, test_supports)


        loss_test = criterion(test_Y_hat, test_Y)

        total_loss_test.update(loss_test.item(), test_input_x.size(0))
        
        gt_node =  test_Y.detach().cpu().numpy()
        
        test_Y_hat = F.log_softmax(test_Y_hat).detach().cpu().numpy()
        gnn_pre_node = np.argmax(test_Y_hat, axis=1)
        
        right_num_top1+=np.sum(gt_node==gnn_pre_node)
        for j in range(batch_size):
            gt_node_sample = gt_node[j]
            gnn_pre_node_top5 = np.argpartition(test_Y_hat[j,:], -5)[-5:]
            gnn_pre_node_top10 = np.argpartition(test_Y_hat[j,:], -10)[-10:]
            if np.isin(gt_node_sample, gnn_pre_node_top5):
                right_num_top5+=1
            if np.isin(gt_node_sample, gnn_pre_node_top10):
                right_num_top10+=1
                
    print('*'*50)
    print('Epoch = ', epoch+1)             
    print('test loss: {:.4f}, test accuracy top1:{:.5f}, test accuracy top5:{:.5f}, test accuracy top10:{:.5f}'.format(total_loss_test.avg, right_num_top1/len(test_dataset), right_num_top5/len(test_dataset), right_num_top10/len(test_dataset)))
    
    file = open("./results/results.txt","a")
    file.write("test_loss = {:.4f}  ".format(total_loss_test.avg))
    file.write("test_accuracy_top1 = {:.5f}  ".format(right_num_top1/len(test_dataset)))
    file.write("test_accuracy_top5 = {:.5f}  ".format(right_num_top5/len(test_dataset)))
    file.write("test_accuracy_top10 = {:.5f}  ".format(right_num_top10/len(test_dataset)))
    file.write("\n")
    file.close
    
    if total_loss_test.avg<best_loss:
        best_loss = total_loss_test.avg
        model_state = {
            'net_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'epoch':epoch,
            'best_loss':best_loss
        }
       
        save_point = './checkpoint/CNN_DCN'
        torch.save(model_state, save_point)

