In [1]:
# Importing required libraries
from IPython.display import display
import numpy as np
import pandas as pd
import random
import argparse
from torch.autograd import Variable
import torch.nn as nn
import torch.optim as optim
import torch
import torch.nn.functional as F
from torch.utils.data import TensorDataset, DataLoader
from sklearn.preprocessing import MinMaxScaler
import os
import glob

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
def mape(y_true, y_pred):
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    return np.mean(np.abs((y_true - y_pred) / max(y_true))) * 100

def RMSELoss(yhat,y):
    return torch.sqrt(torch.mean((yhat-y)**2))

In [4]:
class GraphAttentionLayer(nn.Module):

    def __init__(self, in_features, out_features, dropout, alpha, concat=True):
        super(GraphAttentionLayer, self).__init__()
        self.dropout = dropout
        self.in_features = in_features
        self.out_features = out_features
        self.alpha = alpha
        self.concat = concat
        
        # Initializing weights and activations
        self.W = nn.Parameter(torch.zeros(size=(in_features, out_features)))
        nn.init.xavier_uniform_(self.W.data, gain=1.414)
        self.a = nn.Parameter(torch.zeros(size=(2*out_features, 1)))
        nn.init.xavier_uniform_(self.a.data, gain=1.414)

        self.leakyrelu = nn.LeakyReLU(self.alpha)
        
        
    def _prepare_attentional_mechanism_input(self, Wh):
        Wh1 = torch.matmul(Wh, self.a[:self.out_features, :])
        Wh2 = torch.matmul(Wh, self.a[self.out_features:, :])
        e = Wh1 + Wh2.T
        return self.leakyrelu(e)
    

    def forward(self, input, adj):
        
        # Applying linear transformation to each node
        Wh = torch.mm(input, self.W)
        
        # Performing the dot product of a_input & activations and applying leakyReLU to the dot product
        e = self._prepare_attentional_mechanism_input(Wh)
        
        # Returning a tensor filled with 1, with the same shape
        zero_vec = -9e15*torch.ones_like(e) 
        
        # If nodes not connected then put attention equal to zero
        attention = torch.where(adj > 0, e, zero_vec)
        
        # Applying softmax to the attentions
        attention = F.softmax(attention, dim=1)
        # attention = F.softmax(e, dim=1)
        
        # Applying dropout to avoid overfitting
        attention = F.dropout(attention, self.dropout, training=self.training)
        
        # Embeddings are aggregated from neighbors and scaled by the attentions
        h_prime = torch.matmul(attention, Wh)
        
        # For aggregation in intermediate layers, we apply concat and in final layer aggregation we apply average.
        if self.concat:
            return F.elu(h_prime)
        else:
            return h_prime
    

    def __repr__(self):
        return self.__class__.__name__ + ' (' + str(self.in_features) + ' -> ' + str(self.out_features) + ')'

In [5]:
class GAT(nn.Module):
    def __init__(self, nfeat, nhid, nclass, dropout, alpha, nheads):
        """Dense version of GAT."""
        super(GAT, self).__init__()
        self.dropout = dropout
        self.nclass = nclass
        
        # Concatenating the outputs with all the heads from intermediate layers 
        self.attentions = [GraphAttentionLayer(nfeat, nhid, dropout=dropout, alpha=alpha, concat=True) for _ in range(nheads)]
        for i, attention in enumerate(self.attentions):
            self.add_module('attention_{}'.format(i), attention)
  
        self.out_att = GraphAttentionLayer(nhid * nheads, nclass, dropout=dropout, alpha=alpha, concat=False)
        
        self.linears = torch.nn.ModuleList([torch.nn.Linear(nclass, 1) for _ in range(nclass)])
        
        

    def forward(self, x, adj):
        x = F.dropout(x, self.dropout, training=self.training)
        x = torch.cat([att(x, adj) for att in self.attentions], dim=1)
        x = F.dropout(x, self.dropout, training=self.training)
        x = F.relu(self.out_att(x, adj))
        out = [self.linears[i](x[i, :]) for i in range(self.nclass)]

        return out

## Processing the file to input into the model

In [6]:
# Specifying the path of file
path= r"C:\Users\sashah8\OneDrive - North Carolina State University\Desktop\DOE\DOE_Q1\L4\data_cleaned"

# Listing just the required files
fileList = os.listdir(path)
fileList.remove("Date.csv")

# Previewing the required file names
print(fileList)

['Capital.csv', 'Central.csv', 'Dunwoody.csv', 'Genese.csv', 'Hudson.csv', 'LongIsland.csv', 'Milwood.csv', 'NYC.csv']


In [7]:
# Just for previewing the column name
pd.read_csv(os.path.join(path, fileList[0]))

Unnamed: 0,Demand,Normalized_net,Temprature
0,1280.8,0.531006,-5.8
1,1230.8,0.510277,-6.1
2,1198.5,0.496886,-7.5
3,1188.9,0.492906,-8.4
4,1190.9,0.493735,-7.4
...,...,...,...
43819,1452.6,0.602233,3.4
43820,1399.7,0.580301,3.3
43821,1339.0,0.555136,2.6
43822,1280.9,0.531048,2.7


## Processing the load data into train-test split

In [8]:
# Fetching and concatenating the data
Load_DS = pd.concat([pd.read_csv(os.path.join(path, fileName), usecols= ["Demand"]) for fileName in fileList], axis= 1)
Load_DS.columns = [i.removesuffix('.csv') for i in fileList]
Load_DS = Load_DS.add_prefix("LOAD_")

Load_DS

Unnamed: 0,LOAD_Capital,LOAD_Central,LOAD_Dunwoody,LOAD_Genese,LOAD_Hudson,LOAD_LongIsland,LOAD_Milwood,LOAD_NYC
0,1280.8,1813.7,643.8,1042.4,1069.8,2338.0,336.5,5272.0
1,1230.8,1737.4,612.9,991.2,1020.4,2164.1,317.7,5092.6
2,1198.5,1700.7,587.9,961.5,982.8,2060.0,307.0,4909.5
3,1188.9,1683.3,570.0,947.1,962.1,1989.3,300.6,4771.4
4,1190.9,1688.4,563.5,951.2,957.5,1968.8,298.4,4698.5
...,...,...,...,...,...,...,...,...
43819,1452.6,1947.5,696.4,1179.4,1180.2,2574.2,385.7,5909.3
43820,1399.7,1881.5,668.5,1136.7,1134.0,2446.3,366.2,5689.2
43821,1339.0,1804.6,637.9,1094.5,1091.2,2324.1,347.3,5488.3
43822,1280.9,1734.2,608.4,1049.6,1041.3,2201.8,326.5,5276.0


In [9]:
# Creating 1-day lag loads
Load_lag_1 = Load_DS.shift(24).fillna(0)
Load_lag_1.columns = [i.removeprefix('LOAD_') for i in Load_lag_1.columns]
Load_lag_1 = Load_lag_1.add_prefix("LAG1_")
Load_lag_1

Unnamed: 0,LAG1_Capital,LAG1_Central,LAG1_Dunwoody,LAG1_Genese,LAG1_Hudson,LAG1_LongIsland,LAG1_Milwood,LAG1_NYC
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...
43819,1553.3,2011.3,745.3,1205.7,1268.3,2709.7,425.6,6199.9
43820,1491.2,1955.1,718.9,1166.7,1228.9,2615.3,411.0,5997.1
43821,1413.1,1853.3,683.9,1110.0,1167.4,2487.5,395.1,5793.5
43822,1310.5,1734.9,639.0,1038.7,1088.6,2306.0,358.1,5505.2


In [10]:
# Creating 7-day lag loads
Load_lag_7 = Load_DS.shift(24*7).fillna(0)
Load_lag_7.columns = [i.removeprefix('LOAD_') for i in Load_lag_7.columns]
Load_lag_7 = Load_lag_7.add_prefix("LAG7_")
Load_lag_7

Unnamed: 0,LAG7_Capital,LAG7_Central,LAG7_Dunwoody,LAG7_Genese,LAG7_Hudson,LAG7_LongIsland,LAG7_Milwood,LAG7_NYC
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...
43819,1394.4,1846.2,693.1,1119.7,1172.9,2508.5,369.0,5756.6
43820,1375.5,1826.5,672.7,1105.0,1151.3,2438.5,364.1,5594.8
43821,1351.5,1805.3,655.1,1085.8,1130.6,2376.6,352.2,5431.0
43822,1304.1,1738.8,630.8,1046.8,1094.8,2292.6,341.5,5244.8


In [11]:
# Setting the train-test split
ratio = 0.2
Num_test, Num_train = int(len(Load_DS) * ratio), len(Load_DS) - int(len(Load_DS) * ratio)

In [12]:
# Scaling the load data wrt WEST
mmScaler_load = MinMaxScaler()

# Splitting the data into train and test [LOAD]
Load_train, Load_test = Load_DS[:Num_train], Load_DS[Num_train:]
print("Raw load - Train: ")
display(Load_train.head(2))
print("\n")

# Splitting the data into train and test [LAG 1]
Load_Lag1_train, Load_Lag1_test = Load_lag_1[:Num_train], Load_lag_1[Num_train:]
print("Raw lag 1 load - Train: ")
display(Load_Lag1_train.head(2))
print("\n")

# Splitting the data into train and test [LAG 7]
Load_Lag7_train, Load_Lag7_test = Load_lag_7[:Num_train], Load_lag_7[Num_train:]
print("Raw lag 7 load - Train: ")
display(Load_Lag7_train.head(2))
print("\n")

Raw load - Train: 


Unnamed: 0,LOAD_Capital,LOAD_Central,LOAD_Dunwoody,LOAD_Genese,LOAD_Hudson,LOAD_LongIsland,LOAD_Milwood,LOAD_NYC
0,1280.8,1813.7,643.8,1042.4,1069.8,2338.0,336.5,5272.0
1,1230.8,1737.4,612.9,991.2,1020.4,2164.1,317.7,5092.6




Raw lag 1 load - Train: 


Unnamed: 0,LAG1_Capital,LAG1_Central,LAG1_Dunwoody,LAG1_Genese,LAG1_Hudson,LAG1_LongIsland,LAG1_Milwood,LAG1_NYC
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0




Raw lag 7 load - Train: 


Unnamed: 0,LAG7_Capital,LAG7_Central,LAG7_Dunwoody,LAG7_Genese,LAG7_Hudson,LAG7_LongIsland,LAG7_Milwood,LAG7_NYC
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0






In [13]:
# Scaling the data using mix-max scaler [TRAINING]
Load_train = mmScaler_load.fit_transform(Load_train)
print("Scaled load - Train: ")
display(Load_train)

Load_Lag1_train = mmScaler_load.transform(Load_Lag1_train)
print("Scaled lag 1 load - Train: ")
display(Load_Lag1_train)

Load_Lag7_train = mmScaler_load.transform(Load_Lag7_train)
print("Scaled lag 7 load - Train: ")
display(Load_Lag7_train)

Scaled load - Train: 


array([[0.311464  , 0.35276697, 0.29652515, ..., 0.3342417 , 0.45202772,
        0.25254137],
       [0.28103076, 0.31017882, 0.26693885, ..., 0.29453435, 0.41817267,
        0.22850567],
       [0.26137088, 0.28969409, 0.24300172, ..., 0.27076474, 0.39890411,
        0.20397424],
       ...,
       [0.47221242, 0.49934165, 0.42770065, ..., 0.43269948, 0.53432431,
        0.39457175],
       [0.44823102, 0.47561955, 0.41822154, ..., 0.42671711, 0.53270359,
        0.37405968],
       [0.4092156 , 0.43738511, 0.39351842, ..., 0.39892882, 0.51343502,
        0.34837604]])

Scaled lag 1 load - Train: 


Feature names unseen at fit time:
- LAG1_Capital
- LAG1_Central
- LAG1_Dunwoody
- LAG1_Genese
- LAG1_Hudson
- ...
Feature names seen at fit time, yet now missing:
- LOAD_Capital
- LOAD_Central
- LOAD_Dunwoody
- LOAD_Genese
- LOAD_Hudson
- ...



array([[-0.46811408, -0.65958071, -0.31990393, ..., -0.19960405,
        -0.15394168, -0.45379215],
       [-0.46811408, -0.65958071, -0.31990393, ..., -0.19960405,
        -0.15394168, -0.45379215],
       [-0.46811408, -0.65958071, -0.31990393, ..., -0.19960405,
        -0.15394168, -0.45379215],
       ...,
       [ 0.49400263,  0.50090452,  0.38394357, ...,  0.41148721,
         0.52387967,  0.32300055],
       [ 0.47884687,  0.49091333,  0.38279458, ...,  0.41189821,
         0.53090279,  0.32116505],
       [ 0.45711753,  0.47355433,  0.37254949, ...,  0.39963666,
         0.51397526,  0.31216171]])

Scaled lag 7 load - Train: 


Feature names unseen at fit time:
- LAG7_Capital
- LAG7_Central
- LAG7_Dunwoody
- LAG7_Genese
- LAG7_Hudson
- ...
Feature names seen at fit time, yet now missing:
- LOAD_Capital
- LOAD_Central
- LOAD_Dunwoody
- LOAD_Genese
- LOAD_Hudson
- ...



array([[-0.46811408, -0.65958071, -0.31990393, ..., -0.19960405,
        -0.15394168, -0.45379215],
       [-0.46811408, -0.65958071, -0.31990393, ..., -0.19960405,
        -0.15394168, -0.45379215],
       [-0.46811408, -0.65958071, -0.31990393, ..., -0.19960405,
        -0.15394168, -0.45379215],
       ...,
       [ 0.44117051,  0.4500555 ,  0.40280603, ...,  0.4242511 ,
         0.53162311,  0.36950441],
       [ 0.41603265,  0.42393328,  0.38442231, ...,  0.40979754,
         0.52568047,  0.34684869],
       [ 0.3852342 ,  0.40032281,  0.3654641 , ...,  0.38666727,
         0.50317046,  0.32450111]])

In [14]:
# Scaling the data using mix-max scaler [TESTING]
Load_test = mmScaler_load.transform(Load_test)
print("Scaled load - Test: ")
display(Load_test)

Load_Lag1_test = mmScaler_load.transform(Load_Lag1_test)
print("Scaled lag 1 load - Test: ")
display(Load_Lag1_test)

Load_Lag7_test = mmScaler_load.transform(Load_Lag7_test)
print("Scaled lag 7 load - Test: ")
display(Load_Lag7_test)

Scaled load - Test: 


array([[0.37446083, 0.39931811, 0.36661308, ..., 0.3713917 , 0.48966446,
        0.3208301 ],
       [0.3361758 , 0.35628342, 0.33731403, ..., 0.34026972, 0.4423034 ,
        0.29229273],
       [0.29691691, 0.31285802, 0.30408929, ..., 0.31106576, 0.41024915,
        0.2604059 ],
       ...,
       [0.34688831, 0.34768765, 0.29087599, ..., 0.33106785, 0.47147637,
        0.28152087],
       [0.31152487, 0.30839269, 0.26263017, ..., 0.30314256, 0.43401972,
        0.25307728],
       [0.2780483 , 0.27685624, 0.23524609, ..., 0.27766044, 0.40412643,
        0.2245935 ]])

Scaled lag 1 load - Test: 


Feature names unseen at fit time:
- LAG1_Capital
- LAG1_Central
- LAG1_Dunwoody
- LAG1_Genese
- LAG1_Hudson
- ...
Feature names seen at fit time, yet now missing:
- LOAD_Capital
- LOAD_Central
- LOAD_Dunwoody
- LOAD_Genese
- LOAD_Hudson
- ...



array([[0.43313613, 0.45055785, 0.36297464, ..., 0.38589093, 0.50028918,
        0.30029123],
       [0.39162518, 0.41561659, 0.34076098, ..., 0.36376533, 0.47327717,
        0.28403967],
       [0.33927999, 0.37163302, 0.3094512 , ..., 0.32988051, 0.42987788,
        0.25725741],
       ...,
       [0.39199038, 0.37487039, 0.33492031, ..., 0.36837768, 0.55755464,
        0.32241105],
       [0.32954135, 0.3087834 , 0.29192922, ..., 0.326935  , 0.49092502,
        0.28378511],
       [0.27427458, 0.26574872, 0.24788489, ..., 0.28206729, 0.4260962 ,
        0.24138098]])

Scaled lag 7 load - Test: 


Feature names unseen at fit time:
- LAG7_Capital
- LAG7_Central
- LAG7_Dunwoody
- LAG7_Genese
- LAG7_Hudson
- ...
Feature names seen at fit time, yet now missing:
- LOAD_Capital
- LOAD_Central
- LOAD_Dunwoody
- LOAD_Genese
- LOAD_Hudson
- ...



array([[0.36660905, 0.37933573, 0.3488996 , ..., 0.36890285, 0.47561821,
        0.30258226],
       [0.34938383, 0.36058131, 0.33195211, ..., 0.35396979, 0.47075605,
        0.2813735 ],
       [0.31280307, 0.32117471, 0.30236581, ..., 0.33325986, 0.45743013,
        0.25369358],
       ...,
       [0.35449662, 0.34807837, 0.30734474, ..., 0.3430554 , 0.48030029,
        0.27384392],
       [0.3256459 , 0.31096026, 0.28407784, ..., 0.32387532, 0.46103173,
        0.24889716],
       [0.28480448, 0.26809302, 0.25678951, ..., 0.2991467 , 0.4269966 ,
        0.22129763]])

In [15]:
## Transformation assigns perfectly.
Load_train[0]

array([0.311464  , 0.35276697, 0.29652515, 0.31030523, 0.32509539,
       0.3342417 , 0.45202772, 0.25254137])

In [16]:
Load_Lag1_train[24]

array([0.311464  , 0.35276697, 0.29652515, 0.31030523, 0.32509539,
       0.3342417 , 0.45202772, 0.25254137])

In [17]:
Load_Lag7_train[24*7]

array([0.311464  , 0.35276697, 0.29652515, 0.31030523, 0.32509539,
       0.3342417 , 0.45202772, 0.25254137])

In [18]:
## Transformation assigns perfectly.
Load_test[5]

array([0.19046141, 0.19977337, 0.21667087, 0.18132939, 0.21946732,
       0.22256336, 0.31426648, 0.17853177])

In [19]:
Load_Lag1_test[24+5]

array([0.19046141, 0.19977337, 0.21667087, 0.18132939, 0.21946732,
       0.22256336, 0.31426648, 0.17853177])

In [20]:
Load_Lag7_test[24*7+5]

array([0.19046141, 0.19977337, 0.21667087, 0.18132939, 0.21946732,
       0.22256336, 0.31426648, 0.17853177])

## Processing the temperature data in train test split

In [21]:
# Fetching and concatenating the data
Temp_DS = pd.concat([pd.read_csv(os.path.join(path, fileName), usecols= ["Temprature"]) for fileName in fileList], axis= 1)
Temp_DS.columns = [i.removesuffix('.csv') for i in fileList]
Temp_DS = Temp_DS.add_prefix("TEMP_")

Temp_DS

Unnamed: 0,TEMP_Capital,TEMP_Central,TEMP_Dunwoody,TEMP_Genese,TEMP_Hudson,TEMP_LongIsland,TEMP_Milwood,TEMP_NYC
0,-5.8,-5.8,-5.8,-8.1,-5.8,-5.8,-5.8,-5.8
1,-6.1,-6.1,-6.1,-8.0,-6.1,-6.1,-6.1,-6.1
2,-7.5,-7.5,-7.5,-7.9,-7.5,-7.5,-7.5,-7.5
3,-8.4,-8.4,-8.4,-7.7,-8.4,-8.4,-8.4,-8.4
4,-7.4,-7.4,-7.4,-7.7,-7.4,-7.4,-7.4,-7.4
...,...,...,...,...,...,...,...,...
43819,3.4,3.4,3.4,3.2,3.4,3.4,3.4,3.4
43820,3.3,3.3,3.3,2.6,3.3,3.3,3.3,3.3
43821,2.6,2.6,2.6,1.9,2.6,2.6,2.6,2.6
43822,2.7,2.7,2.7,0.1,2.7,2.7,2.7,2.7


In [22]:
# Scaling the temperature data for each individual region
mmScaler_temp = MinMaxScaler()

# Splitting the data into train and test
Temp_train, Temp_test = Temp_DS[:Num_train], Temp_DS[Num_train:]
print("Raw temperature - Train: ")
display(Temp_train.head(5))
print("Raw temperature - Test: ")
display(Temp_test.head(5))
print("\n")

# Scaling the data using mix-max scaler
Temp_train = mmScaler_temp.fit_transform(Temp_train)
print("Scaled temperature - Train: ")
display(Temp_train)

Temp_test = mmScaler_temp.transform(Temp_test)
print("Scaled temperature - Test: ")
display(Temp_test)

Raw temperature - Train: 


Unnamed: 0,TEMP_Capital,TEMP_Central,TEMP_Dunwoody,TEMP_Genese,TEMP_Hudson,TEMP_LongIsland,TEMP_Milwood,TEMP_NYC
0,-5.8,-5.8,-5.8,-8.1,-5.8,-5.8,-5.8,-5.8
1,-6.1,-6.1,-6.1,-8.0,-6.1,-6.1,-6.1,-6.1
2,-7.5,-7.5,-7.5,-7.9,-7.5,-7.5,-7.5,-7.5
3,-8.4,-8.4,-8.4,-7.7,-8.4,-8.4,-8.4,-8.4
4,-7.4,-7.4,-7.4,-7.7,-7.4,-7.4,-7.4,-7.4


Raw temperature - Test: 


Unnamed: 0,TEMP_Capital,TEMP_Central,TEMP_Dunwoody,TEMP_Genese,TEMP_Hudson,TEMP_LongIsland,TEMP_Milwood,TEMP_NYC
35060,4.9,4.9,4.9,2.2,4.9,4.9,4.9,4.9
35061,4.7,4.7,4.7,2.1,4.7,4.7,4.7,4.7
35062,3.8,3.8,3.8,2.5,3.8,3.8,3.8,3.8
35063,3.5,3.5,3.5,2.6,3.5,3.5,3.5,3.5
35064,3.5,3.5,3.5,2.8,3.5,3.5,3.5,3.5




Scaled temperature - Train: 


array([[0.34471545, 0.34471545, 0.34471545, ..., 0.34471545, 0.34471545,
        0.34471545],
       [0.3398374 , 0.3398374 , 0.3398374 , ..., 0.3398374 , 0.3398374 ,
        0.3398374 ],
       [0.31707317, 0.31707317, 0.31707317, ..., 0.31707317, 0.31707317,
        0.31707317],
       ...,
       [0.50406504, 0.50406504, 0.50406504, ..., 0.50406504, 0.50406504,
        0.50406504],
       [0.51219512, 0.51219512, 0.51219512, ..., 0.51219512, 0.51219512,
        0.51219512],
       [0.51544715, 0.51544715, 0.51544715, ..., 0.51544715, 0.51544715,
        0.51544715]])

Scaled temperature - Test: 


array([[0.51869919, 0.51869919, 0.51869919, ..., 0.51869919, 0.51869919,
        0.51869919],
       [0.51544715, 0.51544715, 0.51544715, ..., 0.51544715, 0.51544715,
        0.51544715],
       [0.50081301, 0.50081301, 0.50081301, ..., 0.50081301, 0.50081301,
        0.50081301],
       ...,
       [0.48130081, 0.48130081, 0.48130081, ..., 0.48130081, 0.48130081,
        0.48130081],
       [0.48292683, 0.48292683, 0.48292683, ..., 0.48292683, 0.48292683,
        0.48292683],
       [0.48780488, 0.48780488, 0.48780488, ..., 0.48780488, 0.48780488,
        0.48780488]])

# Preparing the training and testing split for model input

In [23]:
X_train = np.array([[[i, j, k] for i,j,k in zip(Temp_train[m], Load_Lag1_train[m], Load_Lag7_train[m])] for m in range(len(Temp_train))])
print(X_train.shape)
print(X_train[0])

(35060, 8, 3)
[[ 0.34471545 -0.46811408 -0.46811408]
 [ 0.34471545 -0.65958071 -0.65958071]
 [ 0.34471545 -0.31990393 -0.31990393]
 [ 0.33873582 -0.43084369 -0.43084369]
 [ 0.34471545 -0.32658229 -0.32658229]
 [ 0.34471545 -0.19960405 -0.19960405]
 [ 0.34471545 -0.15394168 -0.15394168]
 [ 0.34471545 -0.45379215 -0.45379215]]


In [24]:
X_test = np.array([[[i, j, k] for i,j,k in zip(Temp_test[m], Load_Lag1_test[m], Load_Lag7_test[m])] for m in range(len(Temp_test))])
print(X_test.shape)
print(X_test[0])

(8764, 8, 3)
[[0.51869919 0.43313613 0.36660905]
 [0.51869919 0.45055785 0.37933573]
 [0.51869919 0.36297464 0.3488996 ]
 [0.50567261 0.40565066 0.36746983]
 [0.51869919 0.40641804 0.37583829]
 [0.51869919 0.38589093 0.36890285]
 [0.51869919 0.50028918 0.47561821]
 [0.51869919 0.30029123 0.30258226]]


In [25]:
# Setting up the batch and node parameters
num_batch, num_node = Load_DS.shape

number_feat = X_train.shape[2]

print("No. of batches: ", num_batch)
print("No. of nodes: " , num_node)
print("No. of features: ", number_feat)

No. of batches:  43824
No. of nodes:  8
No. of features:  3


In [26]:
X_train, X_test = Variable(torch.FloatTensor(X_train)), torch.FloatTensor(X_test)
Y_train, Y_test = Variable(torch.FloatTensor(Load_train)), Load_test

## DEFINING THE MODEL

In [27]:
adj = np.ones((num_node, num_node))/num_node
adj = Variable(torch.FloatTensor(adj))
adj

tensor([[0.1250, 0.1250, 0.1250, 0.1250, 0.1250, 0.1250, 0.1250, 0.1250],
        [0.1250, 0.1250, 0.1250, 0.1250, 0.1250, 0.1250, 0.1250, 0.1250],
        [0.1250, 0.1250, 0.1250, 0.1250, 0.1250, 0.1250, 0.1250, 0.1250],
        [0.1250, 0.1250, 0.1250, 0.1250, 0.1250, 0.1250, 0.1250, 0.1250],
        [0.1250, 0.1250, 0.1250, 0.1250, 0.1250, 0.1250, 0.1250, 0.1250],
        [0.1250, 0.1250, 0.1250, 0.1250, 0.1250, 0.1250, 0.1250, 0.1250],
        [0.1250, 0.1250, 0.1250, 0.1250, 0.1250, 0.1250, 0.1250, 0.1250],
        [0.1250, 0.1250, 0.1250, 0.1250, 0.1250, 0.1250, 0.1250, 0.1250]])

In [28]:
parser = argparse.ArgumentParser()
parser.add_argument('--no-cuda', action='store_true', default=False, help='Disables CUDA training.')
parser.add_argument('--seed', type=int, default=20, help='Random seed.')
parser.add_argument('--epochs', type=int, default=10000, help='Number of epochs to train.')
parser.add_argument('--lr', type=float, default=0.0001, help='Initial learning rate.')
parser.add_argument('--weight_decay', type=float, default=5e-4, help='Weight decay (L2 loss on parameters).')
parser.add_argument('--hidden', type=int, default=12, help='Number of hidden units.')
parser.add_argument('--nb_heads', type=int, default=3, help='Number of head attentions.')
parser.add_argument('--dropout', type=float, default=0., help='Dropout rate (1 - keep probability).')
parser.add_argument('--alpha', type=float, default=0.2, help='Alpha for the leaky_relu.')
parser.add_argument('--batch_size', type=float, default=Num_train, help='batch size')

parser.add_argument("-f", "--fff", help="a dummy argument to fool ipython", default="1")

args = parser.parse_args()
args.cuda = not args.no_cuda and torch.cuda.is_available()

In [29]:
random.seed(args.seed)
np.random.seed(args.seed)
torch.manual_seed(args.seed)

<torch._C.Generator at 0x2bfe0a0d8b0>

In [30]:
model = GAT(nfeat=number_feat, 
            nhid=args.hidden, 
            nclass= num_node, #1
            dropout=args.dropout, 
            nheads=args.nb_heads,
            alpha=args.alpha)

In [31]:
optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay)

In [33]:
if args.cuda:
    torch.cuda.manual_seed(args.seed)
    X_train = X_train.to(device)
    adj = adj.to(device)
    Y_train = Y_train.to(device)
    model.to(device)
    print("Yes")

In [34]:
Epochs = 150

dataset = TensorDataset(X_train, Y_train)    
loader = DataLoader(dataset, batch_size = 64, shuffle=False) 

In [35]:
model.train()

for epoch in range(Epochs):
    loss = 0
    for step, (x, y) in enumerate(loader):
        optimizer.zero_grad()
        if args.cuda:
            x = x.cuda()
            y = y.cuda()
            adj = adj.cuda()
        loss_train = 0
        for i in range(x.size(0)):
            output = model(x[i], adj)
            loss_train += F.l1_loss(output[0][0], y[i,0]) + F.l1_loss(output[1][0], y[i,1]) + F.l1_loss(output[2][0], y[i,2]) + F.l1_loss(output[3][0], y[i,3]) + F.l1_loss(output[4][0], y[i,4]) + F.l1_loss(output[5][0], y[i,5]) + F.l1_loss(output[6][0], y[i,6]) + F.l1_loss(output[7][0], y[i,7]) 
        loss += loss_train/i
        loss_train.backward()
        optimizer.step()
    loss = loss/step
    print( "Epoch {}: the train loss = {:.4f}".format(epoch+1, loss))

Epoch 1: the train loss = 2.5757
Epoch 2: the train loss = 1.2889
Epoch 3: the train loss = 0.8237
Epoch 4: the train loss = 0.6993
Epoch 5: the train loss = 0.6492
Epoch 6: the train loss = 0.6222
Epoch 7: the train loss = 0.6049
Epoch 8: the train loss = 0.5913
Epoch 9: the train loss = 0.5797
Epoch 10: the train loss = 0.5693
Epoch 11: the train loss = 0.5600
Epoch 12: the train loss = 0.5516
Epoch 13: the train loss = 0.5440
Epoch 14: the train loss = 0.5371
Epoch 15: the train loss = 0.5307
Epoch 16: the train loss = 0.5250
Epoch 17: the train loss = 0.5197
Epoch 18: the train loss = 0.5148
Epoch 19: the train loss = 0.5104
Epoch 20: the train loss = 0.5063
Epoch 21: the train loss = 0.5025
Epoch 22: the train loss = 0.4989
Epoch 23: the train loss = 0.4957
Epoch 24: the train loss = 0.4926
Epoch 25: the train loss = 0.4897
Epoch 26: the train loss = 0.4870
Epoch 27: the train loss = 0.4845
Epoch 28: the train loss = 0.4821
Epoch 29: the train loss = 0.4798
Epoch 30: the train los

## MAKING PREDICTIONS

In [36]:
predictions = []

# Switching to eval mode
model.eval()

with torch.no_grad():
    for i in range(X_test.size(0)):
        p = model(X_test[i], adj)
        predictions.append(torch.cat([p[0], p[1], p[2], p[3], p[4], p[5], p[6], p[7]], dim=-1).tolist())

predictions = np.array(predictions)

In [37]:
predictions

array([[0.42611918, 0.43221018, 0.3452087 , ..., 0.35635963, 0.45229053,
        0.35521784],
       [0.40019122, 0.40610167, 0.32072282, ..., 0.33343276, 0.4288066 ,
        0.32878712],
       [0.36232552, 0.36987269, 0.28459451, ..., 0.29893437, 0.39619374,
        0.28845581],
       ...,
       [0.40599144, 0.41913471, 0.32238454, ..., 0.33288863, 0.43881303,
        0.32691672],
       [0.35847518, 0.37069401, 0.27774867, ..., 0.29124638, 0.39525068,
        0.279183  ],
       [0.31161651, 0.3213309 , 0.2365512 , ..., 0.25273535, 0.3531996 ,
        0.23468503]])

In [38]:
inversed_predictions = mmScaler_load.inverse_transform(predictions)
inversed_predictions

array([[1469.17156786, 1956.02871167,  694.64521891, ..., 2434.86642936,
         336.64594145, 6038.36653008],
       [1426.57348462, 1909.25328738,  669.07210113, ..., 2334.45724746,
         323.60511275, 5841.09033648],
       [1364.36241536, 1844.34622993,  631.33960564, ..., 2183.37007664,
         305.49491201, 5540.06167254],
       ...,
       [1436.1028936 , 1932.60298357,  670.80760326, ..., 2332.07420364,
         329.16176784, 5827.12991181],
       [1358.03653649, 1845.81769284,  624.18980252, ..., 2149.70021786,
         304.97122011, 5470.85035363],
       [1281.05055739, 1757.3798295 ,  581.16305754, ..., 1981.03950199,
         281.61989676, 5138.72215217]])

In [39]:
inversed_Y_test = mmScaler_load.inverse_transform(Y_test)
inversed_Y_test

array([[1384.3, 1897.1,  717. , ..., 2500.7,  357.4, 5781.7],
       [1321.4, 1820. ,  686.4, ..., 2364.4,  331.1, 5568.7],
       [1256.9, 1742.2,  651.7, ..., 2236.5,  313.3, 5330.7],
       ...,
       [1339. , 1804.6,  637.9, ..., 2324.1,  347.3, 5488.3],
       [1280.9, 1734.2,  608.4, ..., 2201.8,  326.5, 5276. ],
       [1225.9, 1677.7,  579.8, ..., 2090.2,  309.9, 5063.4]])

In [40]:
MAPE1 = mape(inversed_Y_test[:, 0].reshape(-1), inversed_predictions[:, 0].reshape(-1))
MAPE2 = mape(inversed_Y_test[:, 1].reshape(-1), inversed_predictions[:, 1].reshape(-1))
MAPE3 = mape(inversed_Y_test[:, 2].reshape(-1), inversed_predictions[:, 2].reshape(-1))
MAPE4 = mape(inversed_Y_test[:, 3].reshape(-1), inversed_predictions[:, 3].reshape(-1))
MAPE5 = mape(inversed_Y_test[:, 4].reshape(-1), inversed_predictions[:, 4].reshape(-1))
MAPE6 = mape(inversed_Y_test[:, 5].reshape(-1), inversed_predictions[:, 5].reshape(-1))
MAPE7 = mape(inversed_Y_test[:, 6].reshape(-1), inversed_predictions[:, 6].reshape(-1))
MAPE8 = mape(inversed_Y_test[:, 7].reshape(-1), inversed_predictions[:, 7].reshape(-1))

print( "The test mape is {:.3f}, {:.3f}, {:.3f}, {:.3f}, {:.3f}, {:.3f}, {:.3f}, {:.3f}".format(MAPE1, MAPE2, MAPE3, MAPE4, MAPE5, MAPE6, MAPE7, MAPE8))

The test mape is 3.436, 4.248, 3.733, 4.378, 3.973, 3.884, 4.889, 3.942


#### AVERAGE MAPE

In [41]:
meanMAPE = np.mean([MAPE1, MAPE2, MAPE3, MAPE4, MAPE5, MAPE6, MAPE7, MAPE8])
meanMAPE

4.060341307854638

#### WEIGHTED MAPE

In [42]:
# Scaled MAPE with max values
weightMax = pd.Series(Load_DS.max() / sum(Load_DS.max())).reset_index(drop= True)
print(weightMax)
print("\n")
mapeMetric = pd.Series([MAPE1, MAPE2, MAPE3, MAPE4, MAPE5, MAPE6, MAPE7, MAPE8])
print(mapeMetric)
print("\n")

weightedMAPE = weightMax.multiply(mapeMetric)
weightedMAPE = sum(weightedMAPE)
print("Weighted using the max load of a region: ", weightedMAPE)

0    0.087018
1    0.107266
2    0.049732
3    0.072602
4    0.078565
5    0.190233
6    0.023118
7    0.391466
dtype: float64


0    3.435842
1    4.247656
2    3.733016
3    4.378423
4    3.972623
5    3.883834
6    4.889228
7    3.942108
dtype: float64


Weighted using the max load of a region:  3.965314470811466


In [43]:
# Scaled MAPE with average values
weightMax = pd.Series(Load_DS.mean() / sum(Load_DS.mean())).reset_index(drop= True)
print(weightMax)
print("\n")
mapeMetric = pd.Series([MAPE1, MAPE2, MAPE3, MAPE4, MAPE5, MAPE6, MAPE7, MAPE8])
print(mapeMetric)
print("\n")

weightedMAPE = weightMax.multiply(mapeMetric)
weightedMAPE = sum(weightedMAPE)
print("Weighted using the mean load of a region: ", weightedMAPE)

0    0.094499
1    0.124278
2    0.046050
3    0.075759
4    0.074729
5    0.159653
6    0.021565
7    0.403467
dtype: float64


0    3.435842
1    4.247656
2    3.733016
3    4.378423
4    3.972623
5    3.883834
6    4.889228
7    3.942108
dtype: float64


Weighted using the mean load of a region:  3.9690672720529707
