# Timing the Pipeline

In this notebook, we explore how long it takes to go from having the sTEC data in float format to getting a predictio for that window (the machine learning part of the pipeline). 

## Imports

In [17]:
import datetime
from datetime import timedelta
import fastai
from fastai.vision.all import *
import fastprogress
from hyperdash import monitor_cell, Experiment
import matplotlib.pyplot as plt
import natsort
import numpy as np
import os 
import seaborn as sns
from sklearn.preprocessing import minmax_scale
import time
from tqdm.notebook import tqdm
from typing import List, Tuple


# # load any resources fr4om this library 
from src import data

from pyts.image import GramianAngularField # not currently working in env 

In [8]:
days = list(range(290, 305))
days

[290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, 302, 303, 304]

In [9]:
year = 2012
location = "hawaii"

In [10]:
dataframes = list()
for d in days: 
    
    print("\n--- " + str(d) + "---")
    
    # read in the data 
    df = data.read_day(
        location=location,
        year=year,
        day_of_year=d
    )
    dataframes.append(df)


  0%|          | 0/1656 [00:00<?, ?it/s]


--- 290---
Reading dataframes...


100%|██████████| 1656/1656 [00:30<00:00, 53.93it/s]


Concatenating dataframes...


  0%|          | 0/1654 [00:00<?, ?it/s]


--- 291---
Reading dataframes...


100%|██████████| 1654/1654 [00:30<00:00, 53.45it/s]


Concatenating dataframes...


  0%|          | 0/1646 [00:00<?, ?it/s]


--- 292---
Reading dataframes...


100%|██████████| 1646/1646 [00:27<00:00, 60.00it/s]


Concatenating dataframes...

--- 293---


  0%|          | 3/1649 [00:00<01:12, 22.72it/s]

Reading dataframes...


100%|██████████| 1649/1649 [00:29<00:00, 56.51it/s]


Concatenating dataframes...


  0%|          | 0/1655 [00:00<?, ?it/s]


--- 294---
Reading dataframes...


100%|██████████| 1655/1655 [00:30<00:00, 55.06it/s]


Concatenating dataframes...


  0%|          | 0/1648 [00:00<?, ?it/s]


--- 295---
Reading dataframes...


100%|██████████| 1648/1648 [00:31<00:00, 53.15it/s]


Concatenating dataframes...


  0%|          | 0/1655 [00:00<?, ?it/s]


--- 296---
Reading dataframes...


100%|██████████| 1655/1655 [00:29<00:00, 55.97it/s]


Concatenating dataframes...


  0%|          | 0/1647 [00:00<?, ?it/s]


--- 297---
Reading dataframes...


100%|██████████| 1647/1647 [00:29<00:00, 55.14it/s]


Concatenating dataframes...


  0%|          | 0/1657 [00:00<?, ?it/s]


--- 298---
Reading dataframes...


100%|██████████| 1657/1657 [00:29<00:00, 55.36it/s]


Concatenating dataframes...


  0%|          | 0/1656 [00:00<?, ?it/s]


--- 299---
Reading dataframes...


100%|██████████| 1656/1656 [00:30<00:00, 55.19it/s]


Concatenating dataframes...


  0%|          | 0/1595 [00:00<?, ?it/s]


--- 300---
Reading dataframes...


100%|██████████| 1595/1595 [00:30<00:00, 51.83it/s]


Concatenating dataframes...


  0%|          | 0/1596 [00:00<?, ?it/s]


--- 301---
Reading dataframes...


100%|██████████| 1596/1596 [00:30<00:00, 52.08it/s]


Concatenating dataframes...


  0%|          | 0/1597 [00:00<?, ?it/s]


--- 302---
Reading dataframes...


100%|██████████| 1597/1597 [00:30<00:00, 53.12it/s]


Concatenating dataframes...


  0%|          | 0/1597 [00:00<?, ?it/s]


--- 303---
Reading dataframes...


100%|██████████| 1597/1597 [00:30<00:00, 52.83it/s]


Concatenating dataframes...


  0%|          | 0/1596 [00:00<?, ?it/s]


--- 304---
Reading dataframes...


100%|██████████| 1596/1596 [00:29<00:00, 53.45it/s]


Concatenating dataframes...


In [11]:
# concatenate the dataframes loaded previously into one large dataframe 
df_all = pd.concat(dataframes) 
df_model = df_all.filter(regex='ahup__G07', axis=1).resample("1min").mean()

In [12]:
events = np.split(df_model, np.where(np.isnan(df_model))[0])
events = [ev[~np.isnan(ev)] for ev in events if not isinstance(ev, np.ndarray)]
events = [ev.dropna() for ev in events if not ev.empty and ev.shape[0] > 100]

In [13]:
normalized_events = list()
for ev in events: 

    # for each column in the data, rescale -1 to 1 
    col_data = list()
    for col in ev.columns.values:

        normalized_data = minmax_scale(
                    ev[col].dropna(), 
                    feature_range=(-1, 1)
                )
        col_data.append(normalized_data)

    df_period = pd.DataFrame(np.array(col_data).T, columns=list(ev.columns.values) )
    df_period["timestamp"] = ev[col].index
    df_period.index = df_period["timestamp"]
    df_period = df_period.drop(columns=["timestamp"])

    # convert to seconds of the day for later annotation 
    df_period["sod"] = (df_period.index.hour*60+df_period.index.minute)*60 + df_period.index.second

    normalized_events.append(df_period)

In [22]:
# start the timing here 

t1 = time.time()

WINDOW_SIZE = 60
period = events[0]
doy = days[0]
idx = 0
subset = period.iloc[idx:idx+WINDOW_SIZE, :]

# now generate the field 
transformer = GramianAngularField()
X_new = transformer.fit_transform(np.array([subset["ahup__G07"]]))

figure = plt.figure(figsize=(5,5), frameon=False)

ax = plt.Axes(figure, [0., 0., 1., 1.])
ax.set_axis_off()
figure.add_axes(ax)

figure = plt.imshow(X_new[0], cmap='viridis', origin='lower')

x_axis = figure.axes.get_xaxis()
x_axis.set_visible(False)

y_axis = figure.axes.get_yaxis()
y_axis.set_visible(False)

plt.savefig("../GAF_test.jpg")

plt.close()

# now load the trained model and one particular image and make a prediction 
learner = load_learner("/home/vconstan/projects/sTEC-d-dt-Anomaly-Detection/models/model.pkl")


# load in the image and predict the classification 
prediction = learner.predict("../GAF_test.jpg")

print(prediction[0])

t2 = time.time()

print(t2 - t1)      

normal
1.076570987701416
