In [None]:
!pip install icecream
import pandas as pd
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import MinMaxScaler
import os
import torch
import numpy as np
import random
import matplotlib.pyplot as plt
from joblib import dump
from icecream import ic

Collecting icecream
  Downloading icecream-2.1.1-py2.py3-none-any.whl (8.1 kB)
Collecting executing>=0.3.1
  Downloading executing-0.8.2-py2.py3-none-any.whl (16 kB)
Collecting colorama>=0.3.9
  Downloading colorama-0.4.4-py2.py3-none-any.whl (16 kB)
Collecting asttokens>=2.0.1
  Downloading asttokens-2.0.5-py2.py3-none-any.whl (20 kB)
Installing collected packages: executing, colorama, asttokens, icecream
Successfully installed asttokens-2.0.5 colorama-0.4.4 executing-0.8.2 icecream-2.1.1


***Data Loader***

In [None]:
class SensorDataset(Dataset):
    

    def __init__(self, csv_name, training_length, forecast_window):
        
        # load raw data file
        csv_file = os.path.join(csv_name)
        self.df = pd.read_csv(csv_file)
        df = pd.read_csv(csv_file)
        self.df.columns = df.columns.str.replace(' ', '')
        self.transform = MinMaxScaler()
        self.T = training_length
        self.S = forecast_window

    def __len__(self):
        # return number of sensors
        return len(self.df.groupby(by=["id"]))

    # Will pull an index between 0 and __len__. 
    def __getitem__(self, idx):
        
        # Sensors are indexed from 1
        idx = idx+1


        start = np.random.randint(0, len(self.df[self.df["id"]==idx]) - self.T - self.S) 
        sensor_number = str(self.df[self.df["id"]==idx][["sensor_id"]][start:start+1].values.item())
        index_in = torch.tensor([i for i in range(start, start+self.T)])
        index_tar = torch.tensor([i for i in range(start + self.T, start + self.T + self.S)])
        _input = torch.tensor(self.df[self.df["id"]==idx][["station","adv", "sin_second", "cos_second", "sin_minute", "cos_minute"]][start : start + self.T].values)
        target = torch.tensor(self.df[self.df["id"]==idx][["station","adv", "sin_second", "cos_second", "sin_minute", "cos_minute"]][start + self.T : start + self.T + self.S].values)

        # ---------------Infiltrated Depth-----------------#
        # _input = torch.tensor(self.df[self.df["id"]==idx][["station","inf_depth", "sin_second", "cos_second", "sin_minute", "cos_minute"]][start : start + self.T].values)
        #target = torch.tensor(self.df[self.df["id"]==idx][["station","inf_depth", "sin_second", "cos_second", "sin_minute", "cos_minute"]][start + self.T : start + self.T + self.S].values)


        # scalar is fit only to the input
        scaler = self.transform

        scaler.fit(_input[:,0].unsqueeze(-1))
        _input[:,0] = torch.tensor(scaler.transform(_input[:,0].unsqueeze(-1)).squeeze(-1))
        target[:,0] = torch.tensor(scaler.transform(target[:,0].unsqueeze(-1)).squeeze(-1))

        # save the scalar to be used later when inverse translating the data for plotting.
        dump(scaler, 'scalar_item.joblib')

        return index_in, index_tar, _input, target, sensor_number

***helpers***

In [None]:
import os, shutil

# save train or validation loss
def log_loss(loss_val : float, path_to_save_loss : str, train : bool = True):
    if train:
        file_name = "train_loss.txt"
    else:
        file_name = "val_loss.txt"

    path_to_file = path_to_save_loss+file_name
    os.makedirs(os.path.dirname(path_to_file), exist_ok=True)
    with open(path_to_file, "a") as f:
        f.write(str(loss_val)+"\n")
        f.close()

# Exponential Moving Average
def EMA(values, alpha=0.1):
    ema_values = [values[0]]
    for idx, item in enumerate(values[1:]):
        ema_values.append(alpha*item + (1-alpha)*ema_values[idx])
    return ema_values

# Remove all files from previous executions and re-run the model.
def clean_directory():

    if os.path.exists('save_loss'):
        shutil.rmtree('save_loss')
    if os.path.exists('save_model'): 
        shutil.rmtree('save_model')
    if os.path.exists('save_predictions'): 
        shutil.rmtree('save_predictions')
    os.mkdir("save_loss")
    os.mkdir("save_model")
    os.mkdir("save_predictions")

***model***

In [None]:
import torch.nn as nn
import torch, math
from icecream import ic
import time

class Transformer(nn.Module):
    # d_model : number of features
    def __init__(self,feature_size=6,num_layers=3,dropout=0):
        super(Transformer, self).__init__()

        self.encoder_layer = nn.TransformerEncoderLayer(d_model=feature_size, nhead=6, dropout=dropout)
        self.transformer_encoder = nn.TransformerEncoder(self.encoder_layer, num_layers=num_layers)        
        self.decoder = nn.Linear(feature_size,1)
        self.init_weights()

    def init_weights(self):
        initrange = 0.1    
        self.decoder.bias.data.zero_()
        self.decoder.weight.data.uniform_(-initrange, initrange)

    def _generate_square_subsequent_mask(self, sz):
        mask = (torch.triu(torch.ones(sz, sz)) == 1).transpose(0, 1)
        mask = mask.float().masked_fill(mask == 0, float('-inf')).masked_fill(mask == 1, float(0.0))
        return mask

    def forward(self, src, device):
        
        mask = self._generate_square_subsequent_mask(len(src)).to(device)
        output = self.transformer_encoder(src,mask)
        output = self.decoder(output)
        return output

In [None]:
from torch.utils.data import DataLoader
import torch
import torch.nn as nn
import logging
import time # debugging
from joblib import load
from icecream import ic

***inference***

In [None]:
logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(name)s %(message)s", datefmt="[%H:%M:%S]")
logger = logging.getLogger(__name__)

def inference(path_to_save_predictions, forecast_window, dataloader, device, path_to_save_model, best_model):

    device = torch.device(device)
    
    model = Transformer().double().to(device)
    model.load_state_dict(torch.load(path_to_save_model+best_model))
    criterion = torch.nn.MSELoss()

    val_loss = 0
    with torch.no_grad():

        model.eval()
        for plot in range(25):

            for index_in, index_tar, _input, target, sensor_number in dataloader:
                
                # starting from 1 so that src matches with target, but has same length as when training
                src = _input.permute(1,0,2).double().to(device)[1:, :, :] 
                target = target.permute(1,0,2).double().to(device) 

                next_input_model = src
                all_predictions = []

                for i in range(forecast_window - 1):
                    
                    prediction = model(next_input_model, device) 

                    if all_predictions == []:
                        all_predictions = prediction 
                    else:
                        all_predictions = torch.cat((all_predictions, prediction[-1,:,:].unsqueeze(0))) 

                    pos_encoding_old_vals = src[i+1:, :, 1:] 
                    pos_encoding_new_val = target[i + 1, :, 1:].unsqueeze(1) 
                    pos_encodings = torch.cat((pos_encoding_old_vals, pos_encoding_new_val)) 
                    
                    next_input_model = torch.cat((src[i+1:, :, 0].unsqueeze(-1), prediction[-1,:,:].unsqueeze(0))) 
                    next_input_model = torch.cat((next_input_model, pos_encodings), dim = 2) 

                true = torch.cat((src[1:,:,0],target[:-1,:,0]))
                loss = criterion(true, all_predictions[:,:,0])
                val_loss += loss
            
            val_loss = val_loss/10
            scaler = load('scalar_item.joblib')
            src_adv = scaler.inverse_transform(src[:,:,0].cpu())
            target_adv = scaler.inverse_transform(target[:,:,0].cpu())
            prediction_adv = scaler.inverse_transform(all_predictions[:,:,0].detach().cpu().numpy())
            plot_prediction(plot, path_to_save_predictions, src_adv, target_adv, prediction_adv, sensor_number, index_in, index_tar)

        logger.info(f"Loss On Unseen Dataset: {val_loss.item()}")

***plot***

In [None]:
import matplotlib.pyplot as plt
from icecream import ic 
import numpy as np
import torch

def plot_loss(path_to_save, train=True):
    plt.rcParams.update({'font.size': 10})
    with open(path_to_save + "/train_loss.txt", 'r') as f:
        loss_list = [float(line) for line in f.readlines()]
    if train:
        title = "Train"
    else:
        title = "Validation"
    EMA_loss = EMA(loss_list)
    plt.plot(loss_list, label = "loss")
    plt.plot(EMA_loss, label="EMA loss")
    plt.xlabel("Epochs")
    plt.ylabel("Loss")
    plt.legend()
    plt.title(title+"_loss")
    plt.savefig(path_to_save+f"/{title}.png")
    plt.close()

def plot_prediction(title, path_to_save, src, tgt, prediction, sensor_number, index_in, index_tar):

    idx_scr = index_in[0, 1:].tolist()
    idx_tgt = index_tar[0].tolist()
    idx_pred = [i for i in range(idx_scr[0] +1, idx_tgt[-1])] 

    plt.figure(figsize=(15,6))
    plt.rcParams.update({"font.size" : 16})

    # plotting
    plt.plot(idx_scr, src, '-', color = 'blue', label = 'Input', linewidth=2)
    plt.plot(idx_tgt, tgt, '-', color = 'indigo', label = 'Target', linewidth=2)
    plt.plot(idx_pred, prediction,'--', color = 'limegreen', label = 'Forecast', linewidth=2)

    #formatting
    plt.grid(b=True, which='major', linestyle = 'solid')
    plt.minorticks_on()
    plt.grid(b=True, which='minor', linestyle = 'dashed', alpha=0.5)
    plt.xlabel("Time Elapsed")
    plt.ylabel("Adv (%)")
    plt.legend()
    plt.title("Forecast from Sensor " + str(sensor_number[0]))

    # save
    plt.savefig(path_to_save+f"Prediction_{title}.png")
    plt.close()

def plot_training(epoch, path_to_save, src, prediction, sensor_number, index_in, index_tar):


    idx_scr = [i for i in range(len(src))]
    idx_pred = [i for i in range(1, len(prediction)+1)]

    plt.figure(figsize=(15,6))
    plt.rcParams.update({"font.size" : 18})
    plt.grid(b=True, which='major', linestyle = '-')
    plt.grid(b=True, which='minor', linestyle = '--', alpha=0.5)
    plt.minorticks_on()

    plt.plot(idx_scr, src, 'o-.', color = 'blue', label = 'input sequence', linewidth=1)
    plt.plot(idx_pred, prediction, 'o-.', color = 'limegreen', label = 'prediction sequence', linewidth=1)

    plt.title("Teaching Forcing from Sensor " + str(sensor_number[0]) + ", Epoch " + str(epoch))
    plt.xlabel("Time Elapsed")
    plt.ylabel("Adv (%)")
    plt.legend()
    plt.savefig(path_to_save+f"/Epoch_{str(epoch)}.png")
    plt.close()

def plot_training_3(epoch, path_to_save, src, sampled_src, prediction, sensor_number, index_in, index_tar):
  

    idx_scr = [i for i in range(len(src))]
    idx_pred = [i for i in range(1, len(prediction)+1)]
    idx_sampled_src = [i for i in range(len(sampled_src))]

    plt.figure(figsize=(15,6))
    plt.rcParams.update({"font.size" : 18})
    plt.grid(b=True, which='major', linestyle = '-')
    plt.grid(b=True, which='minor', linestyle = '--', alpha=0.5)
    plt.minorticks_on()

    ## REMOVE DROPOUT FOR THIS PLOT TO APPEAR AS EXPECTED !! DROPOUT INTERFERES WITH HOW THE SAMPLED SOURCES ARE PLOTTED
    plt.plot(idx_sampled_src, sampled_src, 'o-.', color='red', label = 'sampled source', linewidth=1, markersize=10)
    plt.plot(idx_scr, src, 'o-.', color = 'blue', label = 'input sequence', linewidth=1)
    plt.plot(idx_pred, prediction, 'o-.', color = 'limegreen', label = 'prediction sequence', linewidth=1)
    plt.title("Teaching Forcing from Sensor " + str(sensor_number[0]) + ", Epoch " + str(epoch))
    plt.xlabel("Time Elapsed")
    plt.ylabel("Adv (%)")
    plt.legend()
    plt.savefig(path_to_save+f"/Epoch_{str(epoch)}.png")
    plt.close()

***Preprocessing***

In [None]:
import pandas as pd
import time
import numpy as np
import datetime
from icecream import ic

def process_data(source):

    source = "/content/drive/MyDrive/train_tf.csv"
    df = pd.read_csv(source)

    df_new = df[df['time'].notnull()]
    
    
    timestamps = [ts.split('+')[0] for ts in  df['time']]
    
    timestamps_second = np.array([float(datetime.datetime.strptime(t, '%H:%M:%S').second) for t in timestamps])
    timestamps_minute = np.array([float(datetime.datetime.strptime(t, '%H:%M:%S').minute) for t in timestamps])
    timestamps_hour = np.array([float(datetime.datetime.strptime(t, '%H:%M:%S').hour) for t in timestamps])
 
    second_in_minute = 60
    minute_in_hour = 60

    df['sin_second'] = np.sin(2*np.pi*timestamps_second/second_in_minute)
    df['cos_second'] = np.cos(2*np.pi*timestamps_second/second_in_minute)
    df['sin_minute'] = np.sin(2*np.pi*timestamps_minute/minute_in_hour)
    df['cos_minute'] = np.cos(2*np.pi*timestamps_minute/minute_in_hour)

    return df

train_dataset = process_data('/content/drive/MyDrive/train_tf.csv')
test_dataset = process_data('/content/drive/MyDrive/test_tf.csv')

train_dataset.to_csv(r'/content/drive/MyDrive/train_dataset.csv', index=False)
test_dataset.to_csv(r'/content/drive/MyDrive/test_dataset.csv', index=False)


In [None]:
from torch.utils.data import DataLoader
import torch
import torch.nn as nn
import logging
import time # debugging
from joblib import load
from icecream import ic
from torch.optim.lr_scheduler import ReduceLROnPlateau

***train_teacher_forcing***

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:

logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(name)s %(message)s", datefmt="[%H:%M:%S]")
logger = logging.getLogger(__name__)

def transformer(dataloader, EPOCH, frequency, path_to_save_model, path_to_save_loss, path_to_save_predictions, device):

    device = torch.device(device)

    model = Transformer().double().to(device)
    optimizer = torch.optim.Adam(model.parameters())
    criterion = torch.nn.MSELoss()
    best_model = ""
    min_train_loss = float('inf')

    for epoch in range(EPOCH + 1):

        train_loss = 0
        val_loss = 0

        ## TRAIN -- TEACHER FORCING
        model.train()
        for index_in, index_tar, _input, target, sensor_number in dataloader: # for each data set 
        
            optimizer.zero_grad()

            src = _input.permute(1,0,2).double().to(device)[:-1,:,:] 
            target = _input.permute(1,0,2).double().to(device)[1:,:,:] 
            prediction = model(src, device) 
            loss = criterion(prediction, target[:,:,0].unsqueeze(-1))
            loss.backward()
            optimizer.step()
            train_loss += loss.detach().item()

        if train_loss < min_train_loss:
            torch.save(model.state_dict(), path_to_save_model + f"best_train_{epoch}.pth")
            torch.save(optimizer.state_dict(), path_to_save_model + f"optimizer_{epoch}.pth")
            min_train_loss = train_loss
            best_model = f"best_train_{epoch}.pth"


        if epoch % 100 == 0: # Plot 1-Step Predictions

            logger.info(f"Epoch: {epoch}, Training loss: {train_loss}")
            scaler = load('scalar_item.joblib')
            src_adv = scaler.inverse_transform(src[:,:,0].cpu()) 
            target_adv = scaler.inverse_transform(target[:,:,0].cpu()) 
            prediction_adv = scaler.inverse_transform(prediction[:,:,0].detach().cpu().numpy()) 
            plot_training(epoch, path_to_save_predictions, src_adv, prediction_adv, sensor_number, index_in, index_tar)

        train_loss /= len(dataloader)
        log_loss(train_loss, path_to_save_loss, train=True)
        
    plot_loss(path_to_save_loss, train=True)
    return best_model

In [None]:
from torch.utils.data import DataLoader
import torch
import torch.nn as nn
import logging
import time # debugging
from joblib import load
from icecream import ic
from torch.optim.lr_scheduler import ReduceLROnPlateau
import math, random

***train_with_sampling***

In [None]:

logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(name)s %(message)s", datefmt="[%H:%M:%S]")
logger = logging.getLogger(__name__)

def flip_from_probability(p):
    return True if random.random() < p else False

def transformer(dataloader, EPOCH, k, frequency, path_to_save_model, path_to_save_loss, path_to_save_predictions, device):

    device = torch.device(device)

    model = Transformer().double().to(device)
    optimizer = torch.optim.Adam(model.parameters())
    
    criterion = torch.nn.MSELoss()
    best_model = ""
    min_train_loss = float('inf')

    for epoch in range(EPOCH + 1):
        train_loss = 0
        val_loss = 0

        ## TRAIN -- TEACHER FORCING
        model.train()
        for index_in, index_tar, _input, target, sensor_number in dataloader:
        

            optimizer.zero_grad()
            src = _input.permute(1,0,2).double().to(device)[:-1,:,:] 
            target = _input.permute(1,0,2).double().to(device)[1:,:,:] 
            sampled_src = src[:1, :, :] 

            for i in range(len(target)-1):

                prediction = model(sampled_src, device) 
                

                if i < 60: # One hour, enough data to make inferences about cycles
                    prob_true_val = True
                else:
                    ## coin flip
                    v = k/(k+math.exp(epoch/k)) # probability of heads/tails depends on the epoch, evolves with time.
                    prob_true_val = flip_from_probability(v) # starts with over 95 % probability of true val for each flip in epoch 0.
                    ## if using true value as new value

                if prob_true_val: # Using true value as next value
                    sampled_src = torch.cat((sampled_src.detach(), src[i+1, :, :].unsqueeze(0).detach()))
                else: ## using prediction as new value
                    positional_encodings_new_val = src[i+1,:,1:].unsqueeze(0)
                    predicted_adv = torch.cat((prediction[-1,:,:].unsqueeze(0), positional_encodings_new_val), dim=2)
                    sampled_src = torch.cat((sampled_src.detach(), predicted_adv.detach()))
            
            """To update model after each sequence"""
            loss = criterion(target[:-1,:,0].unsqueeze(-1), prediction)
            loss.backward()
            optimizer.step()
            train_loss += loss.detach().item()

        if train_loss < min_train_loss:
            torch.save(model.state_dict(), path_to_save_model + f"best_train_{epoch}.pth")
            torch.save(optimizer.state_dict(), path_to_save_model + f"optimizer_{epoch}.pth")
            min_train_loss = train_loss
            best_model = f"best_train_{epoch}.pth"


        if epoch % 10 == 0: # Plot 1-Step Predictions

            logger.info(f"Epoch: {epoch}, Training loss: {train_loss}")
            scaler = load('scalar_item.joblib')
            sampled_src_adv = scaler.inverse_transform(sampled_src[:,:,0].cpu()) 
            src_adv = scaler.inverse_transform(src[:,:,0].cpu()) 
            target_adv = scaler.inverse_transform(target[:,:,0].cpu()) 
            prediction_adv = scaler.inverse_transform(prediction[:,:,0].detach().cpu().numpy()) 
            plot_training_3(epoch, path_to_save_predictions, src_adv, sampled_src_adv, prediction_adv, sensor_number, index_in, index_tar)

        train_loss /= len(dataloader)
        log_loss(train_loss, path_to_save_loss, train=True)
        
    plot_loss(path_to_save_loss, train=True)
    return best_model

In [None]:
import argparse
from torch.utils.data import DataLoader
import torch.nn as nn
import torch

***main***

In [None]:
  def main(
    epoch: int = 100,
    k: int = 60,
    batch_size: int = 1,
    frequency: int = 100,
    training_length = 20,
    forecast_window = 10,
    train_csv = "/content/drive/MyDrive/train_dataset.csv",
    test_csv = "/content/drive/MyDrive/test_dataset.csv",
    path_to_save_model = "save_model/",
    path_to_save_loss = "save_loss/", 
    path_to_save_predictions = "save_predictions/", 
    device = "cpu"
):

    clean_directory()

    train_dataset = SensorDataset(csv_name = train_csv, training_length = training_length, forecast_window = forecast_window)
    train_dataloader = DataLoader(train_dataset, batch_size=1, shuffle=True)
    test_dataset = SensorDataset(csv_name = test_csv, training_length = training_length, forecast_window = forecast_window)
    test_dataloader = DataLoader(test_dataset, batch_size=1, shuffle=True)

    best_model = transformer(train_dataloader, epoch, k, frequency, path_to_save_model, path_to_save_loss, path_to_save_predictions, device)
    inference(path_to_save_predictions, forecast_window, test_dataloader, device, path_to_save_model, best_model)

if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("--epoch", type=int, default=1000)
    parser.add_argument("--k", type=int, default=60)
    parser.add_argument("--batch_size", type=int, default=1)
    parser.add_argument("--frequency", type=int, default=100)
    parser.add_argument("--path_to_save_model",type=str,default="save_model/")
    parser.add_argument("--path_to_save_loss",type=str,default="save_loss/")
    parser.add_argument("--path_to_save_predictions",type=str,default="save_predictions/")
    parser.add_argument("--device", type=str, default="cpu")
    parser.add_argument('-f')
    args = parser.parse_args()

    main(
        epoch=args.epoch,
        k = args.k,
        batch_size=args.batch_size,
        frequency=args.frequency,
        path_to_save_model=args.path_to_save_model,
        path_to_save_loss=args.path_to_save_loss,
        path_to_save_predictions=args.path_to_save_predictions,
        device=args.device,
    )



[16:15:03] [INFO] numexpr.utils NumExpr defaulting to 2 threads.
[16:15:03] [INFO] __main__ Epoch: 0, Training loss: 1.532418952139624
[16:15:06] [INFO] __main__ Epoch: 10, Training loss: 0.343826187792709
[16:15:08] [INFO] __main__ Epoch: 20, Training loss: 0.2900434066778815
[16:15:10] [INFO] __main__ Epoch: 30, Training loss: 0.2787524962637764
[16:15:12] [INFO] __main__ Epoch: 40, Training loss: 0.20757770208568357
[16:15:15] [INFO] __main__ Epoch: 50, Training loss: 0.19424026324049043
[16:15:17] [INFO] __main__ Epoch: 60, Training loss: 0.1516767547851013
[16:15:19] [INFO] __main__ Epoch: 70, Training loss: 0.1348850622758433
[16:15:21] [INFO] __main__ Epoch: 80, Training loss: 0.2685659794453261
[16:15:23] [INFO] __main__ Epoch: 90, Training loss: 0.15355413853275304
[16:15:26] [INFO] __main__ Epoch: 100, Training loss: 0.15213427433682508
[16:15:28] [INFO] __main__ Epoch: 110, Training loss: 0.16579302747608865
[16:15:30] [INFO] __main__ Epoch: 120, Training loss: 0.11186755597