In [3]:
import os
import json
import logging

import torch
from ts.torch_handler.base_handler import BaseHandler

import copy
from pathlib import Path
import warnings

import numpy as np
import pandas as pd
import pytorch_lightning as pl
from pytorch_lightning.callbacks import EarlyStopping, LearningRateMonitor
from pytorch_lightning.loggers import TensorBoardLogger
import torch

from pytorch_forecasting import Baseline, TemporalFusionTransformer, TimeSeriesDataSet


        


In [32]:
import logging
logger = logging.getLogger(__name__)

In [114]:
class ForcastHandler(BaseHandler):
    def __init__(self):
        super(ForcastHandler, self).__init__()
        self.initialized = False
        
    def initialize(self, ctx):
        """ Loads the model.pt file and initialized the model object.
        Instantiates Tokenizer for preprocessor to use
        Loads labels to name mapping file for post-processing inference response
        """
        self.manifest = ctx.manifest

        properties = ctx.system_properties
        model_dir = properties.get("model_dir")
        self.device = torch.device("cuda:" + str(properties.get("gpu_id")) if torch.cuda.is_available() else "cpu")

        # Read model serialize/pt file
        serialized_file = self.manifest["model"]["serializedFile"]
        model_pt_path = os.path.join(model_dir, serialized_file)
        print("DEBUG:", model_pt_path)
        if not os.path.isfile(model_pt_path):
            raise RuntimeError("Missing the model.pt or pytorch_model.bin file")
        
        # Load model
        self.model = torch.load(model_pt_path)
        self.model.to(self.device)
        self.model.eval()
        logger.debug('Forecasting model from path {0} loaded successfully'.format(model_dir))
        

        self.initialized = True
        
    def preprocess(self, data):
        data = data[0]['data']
        max_prediction_length = 24
        max_encoder_length = 120
        print("DEBUG:", type(data))
        data = pd.DataFrame.from_dict(data)
        print(data.columns)
        print(data.describe())
        data["time_idx"] =  data["Date_Time"].dt.year*365*24 + data["Date_Time"].dt.dayofyear * 24 + data["Date_Time"].dt.hour
        #data["time_idx"] =  data["Date_Time"].dt.hour
       
        data["time_idx"] -= data["time_idx"].min()
        training_cutoff = data["time_idx"].max() - 100*max_prediction_length

        time_varying_known_reals = [
            'p__mbar',
            'Tpot__K',
            'Tdew__degC',
            'rh__percent',
            'VPmax__mbar',
            'VPact__mbar',
            'VPdef__mbar',
            'sh__g_per_kg',
            'H2OC__mmol_per_mol',
            'rho__gm_per_cubic_m',
            'wv__m_per_s',
            'max_w__vm_per_s',
            'wd__deg',
            'time_idx'
        ]
        inference_set = TimeSeriesDataSet(
            data,
            time_idx="time_idx",
            target="T__degC",
            #categorical_encoders={"series": NaNLabelEncoder().fit(data.series)},
            group_ids=["series"],
            # only unknown variable is "value" - and N-Beats can also not take any additional variables
            time_varying_unknown_reals=["T__degC"],
            time_varying_known_reals=time_varying_known_reals,
            min_encoder_length=max_encoder_length // 2,  # keep encoder length long (as it is in the validation set)
            max_encoder_length=max_encoder_length,
            min_prediction_length=1,
            max_prediction_length=max_prediction_length,
            add_relative_time_idx=True,
            add_target_scales=True,
            add_encoder_length=True,
            allow_missing_timesteps=True,
        )

        # create validation set (predict=True) which means to predict the last max_prediction_length points in time
        # for each series
        
        return inference_set

        # create dataloaders for model
        batch_size = 128  # set this between 32 to 128        

        def inference(self, inputs):
            return model.predict(inputs)
        

In [115]:
from ts.torch_handler.unit_tests.test_utils.mock_context import MockContext

In [116]:
project_id = 'pytorch-tpu-nfs'
dataset_id = 'view_dataset'
table_id = 'weather_time_series_named'
location = 'us-central1'
staging_bucket = 'automl-samples'
experiment_prefix = 'pytorch-forecasting'
from google.cloud import bigquery

sql = f"""
SELECT *
FROM  `{project_id}.{dataset_id}.{table_id}`
LIMIT 240
"""
client = bigquery.Client(location=location, project=project_id)
query_job = client.query(
  sql,
  # Location must match that of the dataset(s) referenced in the query.
  location=location,
)  # API request - starts the query

data = query_job.to_dataframe()

In [117]:
data.describe()

Unnamed: 0,p__mbar,T__degC,Tpot__K,Tdew__degC,rh__percent,VPmax__mbar,VPact__mbar,VPdef__mbar,sh__g_per_kg,H2OC__mmol_per_mol,rho__gm_per_cubic_m,wv__m_per_s,max_w__vm_per_s,wd__deg
count,240.0,240.0,240.0,240.0,240.0,240.0,240.0,240.0,240.0,240.0,240.0,240.0,240.0,240.0
mean,997.59475,-8.572583,264.774375,-10.588625,85.743542,3.4615,2.959042,0.502875,1.849125,2.968667,1312.651042,1.31275,2.300292,166.619458
std,5.425166,5.704442,5.85247,5.670704,8.819942,1.365469,1.207863,0.451055,0.762419,1.222751,32.001549,1.28447,1.87073,80.410988
min,984.74,-22.76,250.85,-24.8,48.39,0.97,0.81,0.16,0.51,0.81,1259.56,0.05,0.38,0.12
25%,994.0125,-12.64,260.5175,-14.675,84.3,2.3075,1.9525,0.23,1.215,1.95,1290.4975,0.55,1.0,139.775
50%,998.495,-7.86,265.41,-10.1,88.6,3.375,2.83,0.36,1.765,2.835,1310.285,0.9,1.75,178.65
75%,1002.27,-3.835,269.4475,-5.815,91.1,4.6,3.9525,0.5325,2.465,3.9575,1336.4575,1.5525,2.75,214.075
max,1004.6,-0.7,273.48,-2.31,96.2,5.8,5.15,2.86,3.26,5.23,1382.1,7.29,10.38,356.4


In [118]:
data_dict = data.to_dict()

In [119]:
a = pd.DataFrame.from_dict(data_dict)

In [120]:
a['Date_Time']

0     2009-01-01 01:00:00+00:00
1     2009-01-01 02:00:00+00:00
2     2009-01-01 03:00:00+00:00
3     2009-01-01 04:00:00+00:00
4     2009-01-01 05:00:00+00:00
                 ...           
235   2009-01-10 20:00:00+00:00
236   2009-01-10 21:00:00+00:00
237   2009-01-10 22:00:00+00:00
238   2009-01-10 23:00:00+00:00
239   2009-01-11 00:00:00+00:00
Name: Date_Time, Length: 240, dtype: datetime64[ns, UTC]

In [121]:
# Test Handler
def model_setup(dataframe):
    context = MockContext(model_name="temp_forecasting", model_dir='/home/jupyter/ml-ops-patterns/view_demo/research',)
    sample = dataframe.to_dict()
    return (context, sample)

def test_initialize(model_setup):
    model_context, _ = model_setup
    handler = ForcastHandler()
    handler.initialize(model_context)

    assert(True)
    return handler

def test_handle(model_setup):
    context, data = model_setup
    handler = test_initialize(model_setup)
    test_data = [{'data': data}] * 2
    results = handler.handle(test_data, context)
    #assert(len(results) == 2)
    #assert('tiger_cat' in results[0])

In [122]:
_model_setup = model_setup(data)
#test_initialize(model_setup

In [123]:
test_initialize(_model_setup)

DEBUG: /home/jupyter/ml-ops-patterns/view_demo/research/model.pt


<__main__.ForcastHandler at 0x7fd3ad358b90>

In [124]:
test_handle(_model_setup)

DEBUG: /home/jupyter/ml-ops-patterns/view_demo/research/model.pt
DEBUG: <class 'dict'>
Index(['series', 'Date_Time', 'p__mbar', 'T__degC', 'Tpot__K', 'Tdew__degC',
       'rh__percent', 'VPmax__mbar', 'VPact__mbar', 'VPdef__mbar',
       'sh__g_per_kg', 'H2OC__mmol_per_mol', 'rho__gm_per_cubic_m',
       'wv__m_per_s', 'max_w__vm_per_s', 'wd__deg'],
      dtype='object')
           p__mbar     T__degC     Tpot__K  Tdew__degC  rh__percent  \
count   240.000000  240.000000  240.000000  240.000000   240.000000   
mean    997.594750   -8.572583  264.774375  -10.588625    85.743542   
std       5.425166    5.704442    5.852470    5.670704     8.819942   
min     984.740000  -22.760000  250.850000  -24.800000    48.390000   
25%     994.012500  -12.640000  260.517500  -14.675000    84.300000   
50%     998.495000   -7.860000  265.410000  -10.100000    88.600000   
75%    1002.270000   -3.835000  269.447500   -5.815000    91.100000   
max    1004.600000   -0.700000  273.480000   -2.310000    

TypeError: __init__() got an unexpected keyword argument 'stop_randomization'