In [1]:
!pip install pytorch_lightning



In [2]:
import seaborn as sns
from pylab import rcParams
import matplotlib.pyplot as plt
from matplotlib import rc
import math
import matplotlib

import pandas as pd
import numpy as np
from tqdm.notebook import tqdm
import pytorch_lightning as pl
from pytorch_lightning.loggers import TensorBoardLogger
from pytorch_lightning.callbacks import ModelCheckpoint, EarlyStopping
from sklearn.preprocessing import MinMaxScaler

import torch
import torch.autograd as autograd
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

from torch.utils.data import Dataset, DataLoader

from collections import defaultdict



In [3]:
%matplotlib inline
%config InlineBackend.figure_format='retina'

sns.set(style='whitegrid', palette='muted', font_scale=1.5)

HAPPY_COLORS_PALETTE = ["#01BEFE", "#FFDD00", "#FF7D00", "#FF006D", "#ADFF02", "#8F00FF"]

sns.set_palette(sns.color_palette(HAPPY_COLORS_PALETTE))

rcParams['figure.figsize'] = 12, 8

tqdm.pandas()

In [4]:
pl.seed_everything(42)

INFO:lightning_fabric.utilities.seed:Global seed set to 42


42

In [9]:
from google.colab import files

uploaded = files.upload()

Saving load_weather_programs_df.csv to load_weather_programs_df.csv


In [10]:
import io

df = pd.read_csv(io.BytesIO(uploaded['load_weather_programs_df.csv']), parse_dates=['intervalStart'])
df.head()

Unnamed: 0.1,Unnamed: 0,intervalStart,Connexus_kWh,unixTime,latitude,longitude,station,apparentTemperature,cloudCover,dewPoint,...,windGust,windSpeed,DVR_duration_mins,CampusGen_duration_mins,CIGen_duration_mins,ACST_duration_mins,PTR_duration_mins,Interruptible Irrigation_duration_mins,Cycled Air Conditioning_duration_mins,Interruptible Water Heating_duration_mins
0,0,2017-01-01 01:00:00,194634.4219,1483254000.0,45.395556,-93.386667,0.0,18.93,0.39,19.23,...,10.51,5.48,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,1,2017-01-01 02:00:00,185003.5234,1483258000.0,45.395556,-93.386667,0.0,20.16,0.0,18.98,...,8.88,4.4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,2,2017-01-01 03:00:00,179523.5938,1483261000.0,45.395556,-93.386667,0.0,20.4,0.04,18.58,...,6.34,3.59,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,3,2017-01-01 04:00:00,178223.6797,1483265000.0,45.395556,-93.386667,0.0,24.46,0.2,18.62,...,5.83,2.88,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,4,2017-01-01 05:00:00,180018.2031,1483268000.0,45.395556,-93.386667,0.0,23.61,0.11,18.69,...,3.13,1.66,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [11]:
df.shape

(58634, 33)

### Preprocessing

In [12]:
df['prev_Connexus_kWh'] = df.shift(1)['Connexus_kWh']

In [13]:
df['Connexus_kWh_change'] = df.progress_apply(
    lambda row : 0 if np.isnan(row.prev_Connexus_kWh) else (row.Connexus_kWh - row.prev_Connexus_kWh),
    axis=1
)

  0%|          | 0/58634 [00:00<?, ?it/s]

In [15]:
df.head()

Unnamed: 0.1,Unnamed: 0,intervalStart,Connexus_kWh,unixTime,latitude,longitude,station,apparentTemperature,cloudCover,dewPoint,...,DVR_duration_mins,CampusGen_duration_mins,CIGen_duration_mins,ACST_duration_mins,PTR_duration_mins,Interruptible Irrigation_duration_mins,Cycled Air Conditioning_duration_mins,Interruptible Water Heating_duration_mins,prev_Connexus_kWh,Connexus_kWh_change
0,0,2017-01-01 01:00:00,194634.4219,1483254000.0,45.395556,-93.386667,0.0,18.93,0.39,19.23,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,0.0
1,1,2017-01-01 02:00:00,185003.5234,1483258000.0,45.395556,-93.386667,0.0,20.16,0.0,18.98,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,194634.4219,-9630.8985
2,2,2017-01-01 03:00:00,179523.5938,1483261000.0,45.395556,-93.386667,0.0,20.4,0.04,18.58,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,185003.5234,-5479.9296
3,3,2017-01-01 04:00:00,178223.6797,1483265000.0,45.395556,-93.386667,0.0,24.46,0.2,18.62,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,179523.5938,-1299.9141
4,4,2017-01-01 05:00:00,180018.2031,1483268000.0,45.395556,-93.386667,0.0,23.61,0.11,18.69,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,178223.6797,1794.5234


In [16]:
df.columns

Index(['Unnamed: 0', 'intervalStart', 'Connexus_kWh', 'unixTime', 'latitude',
       'longitude', 'station', 'apparentTemperature', 'cloudCover', 'dewPoint',
       'humidity', 'icon', 'precipAccumulation', 'precipIntensity',
       'precipProbability', 'precipType', 'pressure', 'snowAccumulation',
       'snowIntensity', 'temperature', 'uvIndex', 'visibility', 'windBearing',
       'windGust', 'windSpeed', 'DVR_duration_mins', 'CampusGen_duration_mins',
       'CIGen_duration_mins', 'ACST_duration_mins', 'PTR_duration_mins',
       'Interruptible Irrigation_duration_mins',
       'Cycled Air Conditioning_duration_mins',
       'Interruptible Water Heating_duration_mins', 'prev_Connexus_kWh',
       'Connexus_kWh_change'],
      dtype='object')

In [26]:
rows = []

for _, row in tqdm(df.iterrows(), total=df.shape[0]):
  row_data = dict()

  for column in df.columns:
    if column == 'intervalStart':
      row_data['day_of_week'] = row.intervalStart.dayofweek
      row_data['day_of_month'] = row.intervalStart.day
      row_data['week_of_year'] = row.intervalStart.week
      row_data['month'] = row.intervalStart.month

    else:
      row_data[str(column)] = row[str(column)]


  rows.append(row_data)

features_df = pd.DataFrame(rows)
features_df.head()

  0%|          | 0/58634 [00:00<?, ?it/s]

Unnamed: 0.1,Unnamed: 0,day_of_week,day_of_month,week_of_year,month,Connexus_kWh,unixTime,latitude,longitude,station,...,DVR_duration_mins,CampusGen_duration_mins,CIGen_duration_mins,ACST_duration_mins,PTR_duration_mins,Interruptible Irrigation_duration_mins,Cycled Air Conditioning_duration_mins,Interruptible Water Heating_duration_mins,prev_Connexus_kWh,Connexus_kWh_change
0,0,6,1,52,1,194634.4219,1483254000.0,45.395556,-93.386667,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,0.0
1,1,6,1,52,1,185003.5234,1483258000.0,45.395556,-93.386667,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,194634.4219,-9630.8985
2,2,6,1,52,1,179523.5938,1483261000.0,45.395556,-93.386667,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,185003.5234,-5479.9296
3,3,6,1,52,1,178223.6797,1483265000.0,45.395556,-93.386667,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,179523.5938,-1299.9141
4,4,6,1,52,1,180018.2031,1483268000.0,45.395556,-93.386667,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,178223.6797,1794.5234


In [42]:
features_df = features_df.drop(['Unnamed: 0'], axis=1)
features_df.shape

(58634, 37)

In [43]:
train_size = int(len(features_df)*0.9)
train_size

52770

In [44]:
train_df, test_df = features_df[:train_size], features_df[train_size:]
train_df.shape, test_df.shape


((52770, 37), (5864, 37))

In [45]:
scaler = MinMaxScaler(feature_range=(-1, 1))
scaler = scaler.fit(train_df)

In [46]:
train_df = pd.DataFrame(scaler.transform(train_df), columns=train_df.columns, index=train_df.index)
test_df = pd.DataFrame(scaler.transform(test_df), columns=test_df.columns, index=test_df.index)

In [47]:
train_df.head()

Unnamed: 0,day_of_week,day_of_month,week_of_year,month,Connexus_kWh,unixTime,latitude,longitude,station,apparentTemperature,...,DVR_duration_mins,CampusGen_duration_mins,CIGen_duration_mins,ACST_duration_mins,PTR_duration_mins,Interruptible Irrigation_duration_mins,Cycled Air Conditioning_duration_mins,Interruptible Water Heating_duration_mins,prev_Connexus_kWh,Connexus_kWh_change
0,1.0,-1.0,0.961538,-1.0,-0.686765,-1.0,-1.0,-1.0,-1.0,-0.110541,...,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,,0.20137
1,1.0,-1.0,0.961538,-1.0,-0.733464,-0.999962,-1.0,-1.0,-1.0,-0.094758,...,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-0.686765,0.06801
2,1.0,-1.0,0.961538,-1.0,-0.760036,-0.999925,-1.0,-1.0,-1.0,-0.091679,...,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-0.733464,0.125489
3,1.0,-1.0,0.961538,-1.0,-0.766339,-0.999887,-1.0,-1.0,-1.0,-0.039584,...,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-0.760036,0.18337
4,1.0,-1.0,0.961538,-1.0,-0.757638,-0.999849,-1.0,-1.0,-1.0,-0.050491,...,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-0.766339,0.226219


In [None]:
df.describe(include=object)


In [None]:
train_size = int(len(df)*0.9)
train_size

In [None]:
train_df, test_df = df[:train_size], df[train_size:]
train_df.shape, test_df.shape