In [1]:
import pandas as pd
import numpy as np
import matplotlib as plt
import collections
from pathlib import Path
from tqdm import tqdm



import math
%pylab inline
%reload_ext autoreload
%autoreload 2

Populating the interactive namespace from numpy and matplotlib
`%matplotlib` prevents importing * from pylab and numpy
  "\n`%matplotlib` prevents importing * from pylab and numpy"


In [2]:
import logging
logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logger = logging.getLogger("smartmeters.ipynb")

In [3]:
import torch
from torch import nn
import torch.nn.functional as F

In [4]:
from src.models.model import LatentModel
from src.data.smart_meter import collate_fns, SmartMeterDataSet

In [5]:
# Params
device='cuda'
batch_size=16

In [20]:
csv_files = sorted(Path('data/smart-meters-in-london/halfhourly_dataset').glob('*.csv'))[:2]
df = pd.concat([pd.read_csv(f, parse_dates=[1], na_values=['Null']) for f in tqdm(csv_files)])
print(df.info())

df.head(3)

100%|██████████| 2/2 [00:04<00:00,  1.96s/it]<class 'pandas.core.frame.DataFrame'>
Int64Index: 2738535 entries, 0 to 1515864
Data columns (total 3 columns):
LCLid             object
tstp              datetime64[ns]
energy(kWh/hh)    float64
dtypes: datetime64[ns](1), float64(1), object(1)
memory usage: 83.6+ MB
None



Unnamed: 0,LCLid,tstp,energy(kWh/hh)
0,MAC000002,2012-10-12 00:30:00,0.0
1,MAC000002,2012-10-12 01:00:00,0.0
2,MAC000002,2012-10-12 01:30:00,0.0


In [21]:
# df['energy(kWh/hh)'] = df['energy(kWh/hh)'].replace('Null', np.nan)
df = df[['tstp', 'energy(kWh/hh)']].dropna()
eps = 1e-4
df['energy(kWh/hh)'] = np.log(df['energy(kWh/hh)']+eps)
# df['tstp'] = pd.to_datetime(df['tstp'])
# df['energy(kWh/hh)'] = pd.to_numeric(df['energy(kWh/hh)'])
df = df.sort_values('tstp')
df = df.drop_duplicates(subset=['tstp'])

df.describe()

Unnamed: 0,energy(kWh/hh)
count,39247.0
mean,-1.510907
std,1.127931
min,-9.21034
25%,-2.291645
50%,-1.569737
75%,-0.737935
max,1.948493


In [23]:
n_split = -int(len(df)*0.1)
df_train = df[:n_split]
df_test = df[n_split:]
len(df_train), len(df_test)

(35323, 3924)

In [24]:
def plot_functions(target_x, target_y, context_x, context_y, pred_y, std):
    """Plots the predicted mean and variance and the context points.
  
  Args: 
    target_x: An array of shape [B,num_targets,1] that contains the
        x values of the target points.
    target_y: An array of shape [B,num_targets,1] that contains the
        y values of the target points.
    context_x: An array of shape [B,num_contexts,1] that contains 
        the x values of the context points.
    context_y: An array of shape [B,num_contexts,1] that contains 
        the y values of the context points.
    pred_y: An array of shape [B,num_targets,1] that contains the
        predicted means of the y values at the target points in target_x.
    std: An array of shape [B,num_targets,1] that contains the
        predicted std dev of the y values at the target points in target_x.
      """
  # Plot everything
    plt.plot(target_x[0], pred_y[0], 'b', linewidth=2)
    plt.plot(target_x[0], target_y[0], 'k:', linewidth=2)
    plt.plot(context_x[0], context_y[0], 'ko', markersize=10)
    plt.fill_between(
          target_x[0, :, 0],
          pred_y[0, :, 0] - std[0, :, 0],
          pred_y[0, :, 0] + std[0, :, 0],
          alpha=0.5,
          facecolor='#65c9f7',
          interpolate=True)

    # Make the plot pretty
    plt.ylim([context_y.min()-2, context_y.max()+2])
    plt.grid('off')
    ax = plt.gca()
    plt.show()

In [25]:
num_context, num_extra_target = 40, 10
data_train = SmartMeterDataSet(df_train, num_context, num_extra_target)
data_test = SmartMeterDataSet(df_test, num_context, num_extra_target)
# data_train[0]

In [26]:
loader_train = torch.utils.data.DataLoader(data_train, batch_size=batch_size, shuffle=True, 
    collate_fn=collate_fns(num_context, num_extra_target, sample=True)
)

loader_test = torch.utils.data.DataLoader(data_test, batch_size=batch_size, shuffle=False, 
    collate_fn=collate_fns(num_context, num_extra_target, sample=False)
)

In [27]:
x, y, _, _ = next(iter(loader_train))
x.shape, y.shape, x[0], y[0]

(torch.Size([16, 19, 1]), torch.Size([16, 19, 1]), tensor([[0.0000],
         [0.0417],
         [0.0625],
         [0.1458],
         [0.1875],
         [0.2083],
         [0.2708],
         [0.2917],
         [0.3125],
         [0.4167],
         [0.4375],
         [0.4792],
         [0.6250],
         [0.6458],
         [0.6875],
         [0.7083],
         [0.7292],
         [0.7500],
         [0.7917]]), tensor([[-1.8382],
         [-1.8382],
         [-0.9111],
         [-0.1472],
         [-2.4068],
         [-1.7773],
         [-3.0769],
         [-0.6990],
         [-2.6023],
         [-1.2655],
         [-1.3899],
         [-2.5757],
         [-1.9944],
         [-2.9169],
         [-1.4435],
         [-2.0707],
         [-2.1707],
         [-2.2818],
         [-2.0318]]))

In [29]:
# from src.models.model import LatentModel
model = LatentModel(
    x_dim=x.shape[-1], 
    y_dim=y.shape[-1], 
    hidden_dim=32, 
    latent_dim=32,
    dropout=0,
    num_heads=4,
    n_latent_encoder_layers=3,
    n_det_encoder_layers=3,
    n_decoder_layers=3,
    latent_enc_self_attn_type="multihead", 
    det_enc_self_attn_type="multihead",
    det_enc_cross_attn_type="multihead").to(device)

In [30]:
optim = torch.optim.AdamW(model.parameters(), lr=1e-4)

In [31]:
epochs = 100
grad_clip = 10
for epoch in tqdm(range(epochs), desc='epochs'):
    model.train()
    for i, data in enumerate(tqdm(loader_train, mininterval=60, desc='Training')):
        assert all(torch.isfinite(d).all() for d in data)
        data = [d.to(device) for d in data]
        context_x, context_y, target_x, target_y = data

        optim.zero_grad()
        y_pred, kl, loss, y_std = model(context_x, context_y, target_x, target_y)
        assert torch.isfinite(y_pred.sum())
        if not torch.isfinite(loss):
            logger.error("loss is not finite")
            continue
        loss.backward()
        grad_norm = torch.nn.utils.clip_grad_norm_(model.parameters(), grad_clip)
        if not np.isfinite(grad_norm):
            logger.error("grad_norm is not finite")
            continue
        optim.step()

        if i % 100 == 0:
            print(f"train: {epoch}, {i}/{len(loader_train)}, loss: {loss.item():4.4g}, grad_norm: {grad_norm: 2.2g}")
    
    for i, data in enumerate(tqdm(loader_test, mininterval=60, desc='Testing')):
        data = [d.to(device) for d in data]
        context_x, context_y, target_x, target_y = data

        model.eval()
        with torch.no_grad():
            y_pred, kl, loss_test, y_std = model(context_x, context_y, target_x)
            
            plt.title(f"epoch {epoch}")
            plot_functions(target_x.detach().cpu().numpy(),
                           target_y.detach().cpu().numpy(),
                           context_x.detach().cpu().numpy(),
                           context_y.detach().cpu().numpy(),
                           y_pred.detach().cpu().numpy(),
                           y_std.detach().cpu().numpy())
    print(f"epoch: {epoch}, loss: {loss.item():4.4g}, loss_test:{loss_test.item():4.4g}")

epochs:   0%|          | 0/100 [00:00<?, ?it/s]
Training:   0%|          | 0/2205 [00:00<?, ?it/s][Atrain: 0, 0/2205, loss: 50.16, grad_norm:  2.6e+02
train: 0, 100/2205, loss: 33.41, grad_norm:  2.4e+02
train: 0, 200/2205, loss: 21.67, grad_norm:  1.4e+02
train: 0, 300/2205, loss: 13.5, grad_norm:  39

Training:  17%|█▋        | 376/2205 [01:00<04:52,  6.25it/s][Atrain: 0, 400/2205, loss: 10.09, grad_norm:  78

Training:  17%|█▋        | 376/2205 [01:20<04:52,  6.25it/s][Atrain: 0, 500/2205, loss: 10.32, grad_norm:  79
