This notebook provides three different strategies how incremental drift can be handled by applying explicit drift detectors. Four different drift detectors are implemented: ADWIN, HDDDM, STEPD, MK. However, in this notebook only ADWIN & HDDDM are used.
The strategies comprise: 
- 1) incremental training/updating of a model after a drift was detected 
- 2) training of a new model and discard old model
- 3) a combination of incremental training and new training after drift was detected (referred to as "Switching")

All strategies are applied with a custom feedforward MLP model which was trained to predict taxi demand in different areas in New York City at the same time. 

## Import Packages

In [22]:
import pandas as pd
import numpy as np
 
import matplotlib.pyplot as plt
import matplotlib.dates as mdates

import datetime
from dateutil.relativedelta import relativedelta


import itertools

import os


#from tqdm import tqdm

#import keras specific functions for storing and loading models
from keras.models import Model

from keras.models import model_from_json



#load custom deep Models (MLP)
from complex_mlp import ComplexMLP

#import custom functions to store all kinds of results on disk:
import save_files_collection as sv_files

#import all functions to perform drift detection & retraining
import drift_retraining_collection as dft


## Load & prepare Data

In [4]:
#load data
TRAIN_PATH = '/media/...'
Store_PATH = '/media/...'
file_final = 'preprocessed_data.csv'

df_m = pd.read_csv(TRAIN_PATH + file_final, header=0)

#convert to datetime format:
df_m['date'] = pd.to_datetime(df_m['date'], utc=True)
df_m['date'] = df_m['date'].dt.strftime('%Y-%m-%d %H:%M:%S')
df_m['date'] = pd.to_datetime(df_m['date'])
#df_m = df_m.set_index("date") -> set index later, since we need "date" column to find highest demand columns..
print(df_m.head())

                 date  1  2  3    4  5  6   7  8  9  ...  254  255  256  257  \
0 2009-01-01 05:00:00  0  0  0   91  0  0  30  0  0  ...    0   50   39    3   
1 2009-01-01 06:00:00  1  0  0  105  0  0  62  0  0  ...    0   77   67    5   
2 2009-01-01 07:00:00  0  0  0   96  0  0  79  0  0  ...    0   90   83    4   
3 2009-01-01 08:00:00  0  0  0   91  0  0  84  0  0  ...    0   54   77    3   
4 2009-01-01 09:00:00  2  0  0   82  0  0  85  0  1  ...    0   66   54    4   

   258  259  260  261  262  263  
0    1    0    3   52  127  326  
1    0    0   15   65  166  476  
2    0    0   19   39  125  460  
3    1    0   19   54   79  313  
4    0    0   13   24   47  224  

[5 rows x 264 columns]


In [5]:
'''filter areas with highest demand '''
#get time series with highest "demand patterns":

#function filters nlargest areas:
def get_nlargest_areas(nlargest, org_dataset = df_m):
    
    #get time series with highest "demand patterns":
    df_sum = org_dataset.copy(deep=True).drop(columns=["date"],axis=1)
    df_sum = df_sum.sum(axis=0,numeric_only=True)

    #store nlargest values:
    df_sum = df_sum.nlargest(nlargest) 
    idx_filter = list(df_sum.index.values)
    #append "date" column
    idx_filter.append("date")

    del df_sum
    
    #filter columns with largest values:
    ts_largest = org_dataset[idx_filter].copy(deep=True)
    ts_largest = ts_largest.set_index("date")

    #shift datetimeindex to use local NYC time not UTC:
    ts_largest.index = ts_largest.index.shift(-5,freq='H')

    return ts_largest


In [6]:
ts_20largest = get_nlargest_areas(20)
ts_10largest = get_nlargest_areas(10)

print('20 largest areas: ', ts_20largest.shape)
print('10 largest areas: ', ts_10largest.shape)


20 largest areas:  (83231, 20)
10 largest areas:  (83231, 10)


## Load Models from Disk (trained on 2009-2010)

### function to load models

In [5]:
def load_pretrained_model_from_disk(model_type):
    
    '''
    Returns pre-trained model from disk
    '''


    model_PATH = '/media/...'    
    #complex MLP:
    model_architecture_complex_MLP_PATH = '/media/...'   
    complex_MLP_model_file = 'complex_MLP_early_stopping_W168_20areas__y2012.json'
    complex_MLP_weights = 'complex_MLP_early_stopping_W168_20areas__y2012_weights.h5'
    
    
    #multivar LSTM without additional features:
    model_architecture_multivar_20 = '/media/...'   
    multivar_LSTM_file = 'Model_Architectures_multivar20/multivariate_stacked_lstm_non_st_model_2H_256_32_batch512_drop03_clip_norm_shuffle_scaling_tanh_W168_20largest_areas__y2012.json'
    multivar_LSTM_weights = 'multivariate_stacked_lstm_non_st_model_2H_256_32_batch512_drop03_clip_norm_shuffle_scaling_tanh_W168_20largest_areas__y2012_weights.h5'

    
    
    #multivar LSTM with additional features:
    #prepare files for models:
    model_architecture_multivar_20 = '/media/...'   
    multivar_LSTM_feat_file = 'Model_Architectures_multivar20/multivariate_stacked_lstm_non_st_model_2H_256_128_batch512_drop03_clip_norm_shuffle_scaling_tanh_encoded_lag_feats_added_W168_20largest_areas__y2012.json'
    multivar_LSTM_feat_weights = 'multivariate_stacked_lstm_non_st_model_2H_256_128_batch512_drop03_clip_norm_shuffle_scaling_tanh_encoded_lag_feats_added_W168_20largest_areas__y2012_weights.h5'

    
    
    #XGBoost 2year n_estimators=2000:
    multivar_XGBoost_2000_model_file = 'multivar_XGBoost_est2000_maxdep7_s1_2009_e12_2010_2year_train'
    model_architecture_multivar_XGBoost_path = '/media/...'   
    
    model_architecture_multivar_XGBoost_path_new = '/media/...'   
    
    #XGBoost 2year n_estimators=1000:
    multivar_XGBoost_1000_model_file = 'multivar_XGBoost_est1000_maxdep7_20largest_areas_s1_2009_e12_2010_2year_train.pickle.dat'

    
    instances_dict = {'SingleMLP': (),
                  'SingleLSTM': (),
                  'ComplexMLP': (complex_MLP_model_file, model_architecture_complex_MLP_PATH, complex_MLP_weights),
                  'MultivarLSTM': (multivar_LSTM_file, model_architecture_multivar_20, multivar_LSTM_weights),
                  'MultivarLSTM_lag_feat': (multivar_LSTM_feat_file, model_architecture_multivar_20, multivar_LSTM_feat_weights),
                  'MultivarXGBoost_2year_est2000': (multivar_XGBoost_2000_model_file, model_architecture_multivar_XGBoost_path),
                  'MultivarXGBoost_2year_est1000': (multivar_XGBoost_1000_model_file, model_architecture_multivar_XGBoost_path_new)

                 }
    
       
    if 'XGBoost' not in model_type:

        #load complexMLP model 20largest areas:
        json_file = open(model_PATH + instances_dict[model_type][0], 'r')
        loaded_model_json = json_file.read()
        json_file.close()
        prediction_model = model_from_json(loaded_model_json)

        #load weights of best model:
        prediction_model.load_weights(instances_dict[model_type][1] + instances_dict[model_type][2])

    
    else:
        model_PATH = instances_dict[model_type][1]

        model_name = instances_dict[model_type][0]

        #load model:
        file_to_load = model_PATH + model_name

        #load model into dict:
        prediction_model = pickle.load(open(file_to_load, "rb"))
        
     
    
    return prediction_model



### helper function to call multiple instances of a class

In [12]:
#function needed to create new instances, otherwise same instances is used!!
def create_model_instance(model_type, n_epochs_init = 150):
    
    instances_dict = {'ComplexMLP': ComplexMLP(n_epochs = n_epochs_init),
                      
                     }
    
    return instances_dict[model_type]

    
    

## ADWIN

### Strategy: training of a new model if drift is detected & discarding of old model
- Params for test purpose only: number of epochs = 20 instead of 150, end of dataset: 2013

In [17]:
#set model_name based on used params:
model_name = 'complex_MLP_2H_128_32_batch512_drop03_clip_norm_shuffle_scaling_std_W168_20largest_areas_live_drift_retraining_adwin'


#create instance of class:
cplxMLP_model_new = create_model_instance('ComplexMLP')

#update instance with model from disk:
prediction_model = load_pretrained_model_from_disk('ComplexMLP')
cplxMLP_model_new.load_model(prediction_model)

#set dataset for slicing:
ts_series_input = ts_20largest.copy()



#call function for drift detection & retraining:
retraining_results_tuple = dft.drift_detection_retraining(model_instance = cplxMLP_model_new, org_ts_series=ts_series_input, 
                                                    model_name = model_name, detector_type = 'ADWIN', 
                                                    update_retrain_switch=False, first_forecast_range_days=168,
                                                    n_epochs_retrain = 20, overwrite_params = True,
                                                    end_of_dataset_date = '2013-12-31 23:00:00',
                                                    verbosity = 2)
                               

#assign results:
all_MODELS_dict_MLP_adwin = retraining_results_tuple[0]
all_model_RESULTS_dict_MLP_adwin = retraining_results_tuple[1] 
all_detected_DATES_dict_MLP_adwin = retraining_results_tuple[2]
avg_rmse_resulst_all_adwin = retraining_results_tuple[3]
all_retraining_dates_adwin = retraining_results_tuple[4]
all_weight_update_dates_adwin = retraining_results_tuple[5]





# Very first predictions are made for next 168 days..
## Assigned Dates are double checked..
 ++ Number of days contained in train_set used for scaling:  729
selected years for training:  ['2009', '2010']
year_list given:  ['2009', '2010', '2011-01-01 00:00:00', None]
#### Make predictions model: complex_MLP_2H_128_32_batch512_drop03_clip_norm_shuffle_scaling_std_W168_20largest_areas_live_drift_retraining_adwin ####
Shape of org. dataset after shift:  (4033, 20)
Shape of org. dataset after shift:  (1, 20)
## Avg. RMSE of recent predictions: 
[63.25613309572718]
## area  237
Share of wrongly classified observations:  0.16265807091495166
## area  237
Share of wrongly classified observations:  0.16265807091495166
## area  161
Share of wrongly classified observations:  0.2392759732209273
## area  161
Share of wrongly classified observations:  0.23927597322092736
## area  230
Share of wrongly classified observations:  0.25663277956855934
## area  230
Share of wrongly classified observations

## Change detected in area binary170, index: 1631
date: 2011-03-09 23:00:00
Drift detected at:  2011-03-09 23:00:00
>> Current Time:  23/01/2020 11:32:48
## ++ previous detected dates:  [Timestamp('2011-01-01 00:00:00'), Timestamp('2011-03-09 23:00:00')]
## ++ last training dates:  [Timestamp('2009-01-01 00:00:00')]
 ++ Number of days contained in train_set used for scaling/retraining:  730
#### Current dates: 
#### training_start_date:  2009-03-09 23:00:00
#### start_valid_set:  None
#### start_test_set:  None
### ### New Model is trained
selected years for training:  [Timestamp('2009-03-09 23:00:00'), Timestamp('2011-03-09 23:00:00')]
year_list given:  [Timestamp('2009-03-09 23:00:00'), Timestamp('2011-03-09 23:00:00'), None, None]
#### Train model: complex_MLP_2H_128_32_batch512_drop03_clip_norm_shuffle_scaling_std_W168_20largest_areas_live_drift_retraining_adwin_count1__trainsize730_s3_2009_e3_2011__stepsize1__p12_2013 ####
>> Dates are assigned...
>date_valid:  None
>date_test:  N

Shape of org. dataset after shift:  (1, 20)
## Avg. RMSE of recent predictions: 
[61.998517665137136]
## area  237
Share of wrongly classified observations:  0.1678729037952339
## area  237
Share of wrongly classified observations:  0.1678729037952339
## area  161
Share of wrongly classified observations:  0.2510150044130627
## area  161
Share of wrongly classified observations:  0.2510150044130627
## area  230
Share of wrongly classified observations:  0.24942630185348635
## area  230
Share of wrongly classified observations:  0.24942630185348633
## area  79
Share of wrongly classified observations:  0.24430714916151808
## area  79
Share of wrongly classified observations:  0.2443071491615181
## area  236
Share of wrongly classified observations:  0.15728155339805827
## area  236
Share of wrongly classified observations:  0.15728155339805824
## area  162
Share of wrongly classified observations:  0.22453662842012356
## area  162
Share of wrongly classified observations:  0.22453662842

# adjusted dates..
start_train_year:  2009-04-16 07:00:00
last_train_set_year:  2011-04-16 07:00:00
start_validation_set_year:  2011-04-16 07:00:00
start_test_set_year:  2011-04-16 07:00:00
end_validation_set_year:  2011-04-16 07:00:00
end_test_set_year:  2011-04-16 07:00:00
Data is scaled...
Standardizing used...
Data is scaled...
Standardizing used...
Data is scaled...
Standardizing used...
Data is scaled...
Standardizing used...
Data is scaled...
Standardizing used...
Data is scaled...
Standardizing used...
Data is scaled...
Standardizing used...
Data is scaled...
Standardizing used...
Data is scaled...
Standardizing used...
Data is scaled...
Standardizing used...
Data is scaled...
Standardizing used...
Data is scaled...
Standardizing used...
Data is scaled...
Standardizing used...
Data is scaled...
Standardizing used...
Data is scaled...
Standardizing used...
Data is scaled...
Standardizing used...
Data is scaled...
Standardizing used...
Data is scaled...
Standardizing used...
Data

## area  142
Share of wrongly classified observations:  0.21109586953208354
## area  107
Share of wrongly classified observations:  0.1800030483158055
## area  107
Share of wrongly classified observations:  0.18000304831580552
## area  163
Share of wrongly classified observations:  0.19387288523090995
## area  163
Share of wrongly classified observations:  0.19387288523090992
## area  68
Share of wrongly classified observations:  0.1970736168267032
## area  68
Share of wrongly classified observations:  0.19707361682670324
## area  239
Share of wrongly classified observations:  0.2014936747447036
## area  239
Share of wrongly classified observations:  0.20149367474470356
## area  164
Share of wrongly classified observations:  0.18533760097546104
## area  164
Share of wrongly classified observations:  0.18533760097546106
## area  141
Share of wrongly classified observations:  0.15622618503276942
## area  141
Share of wrongly classified observations:  0.1562261850327694
## area  249
Share

## area  186
Share of wrongly classified observations:  0.2714710252600297
## area  142
Share of wrongly classified observations:  0.21188707280832098
## area  142
Share of wrongly classified observations:  0.21188707280832095
## area  107
Share of wrongly classified observations:  0.18023774145616644
## area  107
Share of wrongly classified observations:  0.18023774145616642
## area  163
Share of wrongly classified observations:  0.19316493313521543
## area  163
Share of wrongly classified observations:  0.19316493313521546
## area  68
Share of wrongly classified observations:  0.19747399702823176
## area  68
Share of wrongly classified observations:  0.1974739970282318
## area  239
Share of wrongly classified observations:  0.20297176820208018
## area  239
Share of wrongly classified observations:  0.20297176820208024
## area  164
Share of wrongly classified observations:  0.18588410104011888
## area  164
Share of wrongly classified observations:  0.18588410104011888
## area  141
Sha

## area  48
Share of wrongly classified observations:  0.2062617770691405
## area  48
Share of wrongly classified observations:  0.20626177706914045
## area  186
Share of wrongly classified observations:  0.2704739817364835
## area  186
Share of wrongly classified observations:  0.27047398173648357
## area  142
Share of wrongly classified observations:  0.2120597187998261
## area  142
Share of wrongly classified observations:  0.21205971879982607
## area  107
Share of wrongly classified observations:  0.17959124510798663
## area  107
Share of wrongly classified observations:  0.17959124510798666
## area  163
Share of wrongly classified observations:  0.19336135671836496
## area  163
Share of wrongly classified observations:  0.193361356718365
## area  68
Share of wrongly classified observations:  0.19756486447311206
## area  68
Share of wrongly classified observations:  0.19756486447311206
## area  239
Share of wrongly classified observations:  0.20263806348746194
## area  239
Share of

## area  234
Share of wrongly classified observations:  0.1762874929258631
## area  234
Share of wrongly classified observations:  0.17628749292586304
## area  48
Share of wrongly classified observations:  0.20741369552914546
## area  48
Share of wrongly classified observations:  0.20741369552914543
## area  186
Share of wrongly classified observations:  0.2705149971703452
## area  186
Share of wrongly classified observations:  0.2705149971703452
## area  142
Share of wrongly classified observations:  0.21123372948500285
## area  142
Share of wrongly classified observations:  0.21123372948500282
## area  107
Share of wrongly classified observations:  0.17968307866440292
## area  107
Share of wrongly classified observations:  0.17968307866440295
## area  163
Share of wrongly classified observations:  0.19170911148839842
## area  163
Share of wrongly classified observations:  0.19170911148839842
## area  68
Share of wrongly classified observations:  0.1977928692699491
## area  68
Share o

## area  162
Share of wrongly classified observations:  0.22412601906867485
## area  170
Share of wrongly classified observations:  0.1699599281470222
## area  170
Share of wrongly classified observations:  0.16995992814702224
## area  234
Share of wrongly classified observations:  0.17645433190548565
## area  234
Share of wrongly classified observations:  0.1764543319054857
## area  48
Share of wrongly classified observations:  0.20643913223711485
## area  48
Share of wrongly classified observations:  0.20643913223711483
## area  186
Share of wrongly classified observations:  0.26986320298466215
## area  186
Share of wrongly classified observations:  0.26986320298466215
## area  142
Share of wrongly classified observations:  0.21086085394500487
## area  142
Share of wrongly classified observations:  0.21086085394500484
## area  107
Share of wrongly classified observations:  0.1800469807931463
## area  107
Share of wrongly classified observations:  0.18004698079314632
## area  163
Shar

## area  236
Share of wrongly classified observations:  0.15987037537132054
## area  236
Share of wrongly classified observations:  0.15987037537132054
## area  162
Share of wrongly classified observations:  0.22427761274642177
## area  162
Share of wrongly classified observations:  0.22427761274642183
## area  170
Share of wrongly classified observations:  0.16905211990278157
## area  170
Share of wrongly classified observations:  0.16905211990278152
## area  234
Share of wrongly classified observations:  0.17674858223062384
## area  234
Share of wrongly classified observations:  0.1767485822306238
## area  48
Share of wrongly classified observations:  0.20537402106400215
## area  48
Share of wrongly classified observations:  0.20537402106400215
## area  186
Share of wrongly classified observations:  0.2688360788549824
## area  186
Share of wrongly classified observations:  0.26883607885498245
## area  142
Share of wrongly classified observations:  0.21050499594923033
## area  142
Sha

## area  79
Share of wrongly classified observations:  0.2418481848184818
## area  79
Share of wrongly classified observations:  0.24184818481848186
## area  236
Share of wrongly classified observations:  0.16264026402640264
## area  236
Share of wrongly classified observations:  0.16264026402640264
## area  162
Share of wrongly classified observations:  0.22640264026402646
## area  162
Share of wrongly classified observations:  0.2264026402640264
## area  170
Share of wrongly classified observations:  0.1712211221122112
## area  170
Share of wrongly classified observations:  0.17122112211221122
## area  234
Share of wrongly classified observations:  0.17980198019801985
## area  234
Share of wrongly classified observations:  0.1798019801980198
## area  48
Share of wrongly classified observations:  0.20871287128712868
## area  48
Share of wrongly classified observations:  0.2087128712871287
## area  186
Share of wrongly classified observations:  0.27036303630363034
## area  186
Share of

## area  161
Share of wrongly classified observations:  0.24987086776859505
## area  230
Share of wrongly classified observations:  0.24948347107438018
## area  230
Share of wrongly classified observations:  0.24948347107438015
## area  79
Share of wrongly classified observations:  0.24108987603305787
## area  79
Share of wrongly classified observations:  0.24108987603305784
## area  236
Share of wrongly classified observations:  0.1628357438016529
## area  236
Share of wrongly classified observations:  0.1628357438016529
## area  162
Share of wrongly classified observations:  0.22804752066115708
## area  162
Share of wrongly classified observations:  0.22804752066115702
## area  170
Share of wrongly classified observations:  0.17135847107438018
## area  170
Share of wrongly classified observations:  0.17135847107438015
## area  234
Share of wrongly classified observations:  0.18001033057851235
## area  234
Share of wrongly classified observations:  0.1800103305785124
## area  48
Share

## area  237
Share of wrongly classified observations:  0.17464931125995198
## area  161
Share of wrongly classified observations:  0.25123214962719576
## area  161
Share of wrongly classified observations:  0.25123214962719576
## area  230
Share of wrongly classified observations:  0.25199039555162395
## area  230
Share of wrongly classified observations:  0.2519903955516239
## area  79
Share of wrongly classified observations:  0.24263869581701003
## area  79
Share of wrongly classified observations:  0.24263869581700998
## area  236
Share of wrongly classified observations:  0.1639074939972197
## area  236
Share of wrongly classified observations:  0.16390749399721977
## area  162
Share of wrongly classified observations:  0.2300012637432074
## area  162
Share of wrongly classified observations:  0.23000126374320737
## area  170
Share of wrongly classified observations:  0.17224819916592948
## area  170
Share of wrongly classified observations:  0.17224819916592948
## area  234
Shar

Shape of org. dataset after shift:  (1, 20)
## Avg. RMSE of recent predictions: 
[68.06127531421508]
## area  237
Share of wrongly classified observations:  0.17446176688938386
## area  237
Share of wrongly classified observations:  0.1744617668893838
## area  161
Share of wrongly classified observations:  0.251051719871319
## area  161
Share of wrongly classified observations:  0.25105171987131897
## area  230
Share of wrongly classified observations:  0.2532788913635239
## area  230
Share of wrongly classified observations:  0.2532788913635239
## area  79
Share of wrongly classified observations:  0.24226676565206628
## area  79
Share of wrongly classified observations:  0.2422667656520663
## area  236
Share of wrongly classified observations:  0.16419203167532792
## area  236
Share of wrongly classified observations:  0.1641920316753279
## area  162
Share of wrongly classified observations:  0.2312546399406088
## area  162
Share of wrongly classified observations:  0.231254639940608

Shape of org. dataset after shift:  (169, 20)
Shape of org. dataset after shift:  (1, 20)
## Avg. RMSE of recent predictions: 
[60.68370074919236]
## area  237
Share of wrongly classified observations:  0.17391831293176585
## area  237
Share of wrongly classified observations:  0.17391831293176585
## area  161
Share of wrongly classified observations:  0.25184826081687073
## area  161
Share of wrongly classified observations:  0.2518482608168707
## area  230
Share of wrongly classified observations:  0.2531814325536299
## area  230
Share of wrongly classified observations:  0.25318143255362985
## area  79
Share of wrongly classified observations:  0.24312204581262875
## area  79
Share of wrongly classified observations:  0.24312204581262878
## area  236
Share of wrongly classified observations:  0.16313174160707788
## area  236
Share of wrongly classified observations:  0.16313174160707794
## area  162
Share of wrongly classified observations:  0.23112350018179617
## area  162
Share of

# adjusted dates..
start_train_year:  2009-12-06 23:00:00
last_train_set_year:  2011-12-06 23:00:00
start_validation_set_year:  2011-12-06 23:00:00
start_test_set_year:  2011-12-06 23:00:00
end_validation_set_year:  2011-12-06 23:00:00
end_test_set_year:  2011-12-06 23:00:00
Data is scaled...
Standardizing used...
Data is scaled...
Standardizing used...
Data is scaled...
Standardizing used...
Data is scaled...
Standardizing used...
Data is scaled...
Standardizing used...
Data is scaled...
Standardizing used...
Data is scaled...
Standardizing used...
Data is scaled...
Standardizing used...
Data is scaled...
Standardizing used...
Data is scaled...
Standardizing used...
Data is scaled...
Standardizing used...
Data is scaled...
Standardizing used...
Data is scaled...
Standardizing used...
Data is scaled...
Standardizing used...
Data is scaled...
Standardizing used...
Data is scaled...
Standardizing used...
Data is scaled...
Standardizing used...
Data is scaled...
Standardizing used...
Data

## area  142
Share of wrongly classified observations:  0.21520544574756006
## area  107
Share of wrongly classified observations:  0.1765767243500369
## area  107
Share of wrongly classified observations:  0.1765767243500369
## area  163
Share of wrongly classified observations:  0.19322562125809895
## area  163
Share of wrongly classified observations:  0.19322562125809892
## area  68
Share of wrongly classified observations:  0.19880259165094727
## area  68
Share of wrongly classified observations:  0.19880259165094727
## area  239
Share of wrongly classified observations:  0.1981464774870828
## area  239
Share of wrongly classified observations:  0.19814647748708275
## area  164
Share of wrongly classified observations:  0.1858443369146231
## area  164
Share of wrongly classified observations:  0.18584433691462315
## area  141
Share of wrongly classified observations:  0.1545968998605758
## area  141
Share of wrongly classified observations:  0.15459689986057573
## area  249
Share 

Data is scaled...
Standardizing used...
Data is scaled...
Standardizing used...
Data is scaled...
Standardizing used...
Data is scaled...
Standardizing used...
Data is scaled...
Standardizing used...
Data is scaled...
Standardizing used...
Data is scaled...
Standardizing used...
Data is scaled...
Standardizing used...
Data is scaled...
Standardizing used...
Data is scaled...
Standardizing used...
Data is scaled...
Standardizing used...
Data is scaled...
Standardizing used...
Data is scaled...
Standardizing used...
Data is scaled...
Standardizing used...
Data is scaled...
Standardizing used...
Data is scaled...
Standardizing used...
Data is scaled...
Standardizing used...
X_train shape of area237 before concat with other areas:  (16848, 203)
X_valid shape of area237 before concat with other areas:  (1, 203)
X_test shape of area237 before concat with other areas:  (1, 203)
y_train shape of area237 before concat with other areas:  (16848,)
y_valid shape of area237 before concat with other

## area  68
Share of wrongly classified observations:  0.19890230515916574
## area  68
Share of wrongly classified observations:  0.19890230515916577
## area  239
Share of wrongly classified observations:  0.19604829857299666
## area  239
Share of wrongly classified observations:  0.19604829857299672
## area  164
Share of wrongly classified observations:  0.18580314672521037
## area  164
Share of wrongly classified observations:  0.1858031467252104
## area  141
Share of wrongly classified observations:  0.153823637028906
## area  141
Share of wrongly classified observations:  0.15382363702890597
## area  249
Share of wrongly classified observations:  0.17716794731064767
## area  249
Share of wrongly classified observations:  0.17716794731064764
## area  138
Share of wrongly classified observations:  0.3754116355653129
## area  138
Share of wrongly classified observations:  0.3754116355653128
## area  90
Share of wrongly classified observations:  0.17094767654592025
## area  90
Share of

Data is scaled...
Standardizing used...
Data is scaled...
Standardizing used...
Data is scaled...
Standardizing used...
Data is scaled...
Standardizing used...
Data is scaled...
Standardizing used...
Data is scaled...
Standardizing used...
Data is scaled...
Standardizing used...
Data is scaled...
Standardizing used...
X_train shape of area237 before concat with other areas:  (16872, 203)
X_valid shape of area237 before concat with other areas:  (1, 203)
X_test shape of area237 before concat with other areas:  (1, 203)
y_train shape of area237 before concat with other areas:  (16872,)
y_valid shape of area237 before concat with other areas:  (1,)
y_test shape of area237 before concat with other areas:  (1,)
final concatenated shape of X_train :  (337440, 203)
create MLP Model:
#Dropout applied
#Clipping Norm applied
Train on 337440 samples, validate on 20 samples
Epoch 1/20
#Current LearningRate:  0.001
Epoch 2/20
#Current LearningRate:  0.001
Epoch 3/20
#Current LearningRate:  0.001
Ep

## area  164
Share of wrongly classified observations:  0.18356632298058068
## area  141
Share of wrongly classified observations:  0.1525713201341008
## area  141
Share of wrongly classified observations:  0.15257132013410082
## area  249
Share of wrongly classified observations:  0.17167436270478842
## area  249
Share of wrongly classified observations:  0.17167436270478842
## area  138
Share of wrongly classified observations:  0.37915111645265354
## area  138
Share of wrongly classified observations:  0.37915111645265354
## area  90
Share of wrongly classified observations:  0.16674046429249156
## area  90
Share of wrongly classified observations:  0.16674046429249162
## converted_stream_flag used:  True
## Shape of streaming_df:  (4033, 20)
## Head of streaming_df:                       binary237  binary161  binary230  binary79  binary236  \
date                                                                        
2012-05-05 16:00:00          1          1          1         1  

final concatenated shape of X_train :  (337440, 203)
create MLP Model:
#Dropout applied
#Clipping Norm applied
Train on 337440 samples, validate on 20 samples
Epoch 1/20
#Current LearningRate:  0.001
Epoch 2/20
#Current LearningRate:  0.001
Epoch 3/20
#Current LearningRate:  0.001
Epoch 4/20
#Current LearningRate:  0.001
Epoch 5/20
#Current LearningRate:  0.001
Epoch 6/20
#Current LearningRate:  0.001
Epoch 7/20
#Current LearningRate:  0.001
Epoch 8/20
#Current LearningRate:  0.001
Epoch 9/20
#Current LearningRate:  0.001
Epoch 10/20
#Current LearningRate:  0.001
Epoch 11/20
#Current LearningRate:  0.001
Epoch 12/20
#Current LearningRate:  0.001
Epoch 13/20
#Current LearningRate:  0.001
Epoch 14/20
#Current LearningRate:  0.001
Epoch 15/20
#Current LearningRate:  0.001
Epoch 16/20
#Current LearningRate:  0.001
Epoch 17/20
#Current LearningRate:  0.001
Epoch 18/20
#Current LearningRate:  0.001
Epoch 19/20
#Current LearningRate:  0.001
Epoch 20/20
#Current LearningRate:  0.001
## Only tr

## Change detected in area binary186, index: 959
date: 2012-10-20 15:00:00
Drift detected at:  2012-10-20 15:00:00
>> Current Time:  23/01/2020 11:50:20
## ++ previous detected dates:  [Timestamp('2012-09-10 15:00:00'), Timestamp('2012-10-20 15:00:00')]
## ++ last training dates:  [Timestamp('2009-01-01 00:00:00'), Timestamp('2009-03-09 23:00:00'), Timestamp('2009-04-16 07:00:00'), Timestamp('2009-12-06 23:00:00'), Timestamp('2010-02-06 07:00:00'), Timestamp('2010-05-05 15:00:00'), Timestamp('2010-09-10 15:00:00')]
 ++ Number of days contained in train_set used for scaling/retraining:  731
#### Current dates: 
#### training_start_date:  2010-10-20 15:00:00
#### start_valid_set:  None
#### start_test_set:  None
### ### New Model is trained
selected years for training:  [Timestamp('2010-10-20 15:00:00'), Timestamp('2012-10-20 15:00:00')]
year_list given:  [Timestamp('2010-10-20 15:00:00'), Timestamp('2012-10-20 15:00:00'), None, None]
#### Train model: complex_MLP_2H_128_32_batch512_drop

Shape of org. dataset after shift:  (4033, 20)
Shape of org. dataset after shift:  (1, 20)
## Avg. RMSE of recent predictions: 
[67.57617479809649]
## area  237
Share of wrongly classified observations:  0.17468877576735042
## area  237
Share of wrongly classified observations:  0.17468877576735042
## area  161
Share of wrongly classified observations:  0.24686255733077966
## area  161
Share of wrongly classified observations:  0.2468625573307797
## area  230
Share of wrongly classified observations:  0.25250743410110377
## area  230
Share of wrongly classified observations:  0.25250743410110377
## area  79
Share of wrongly classified observations:  0.23612721133007408
## area  79
Share of wrongly classified observations:  0.23612721133007408
## area  236
Share of wrongly classified observations:  0.16743107706264804
## area  236
Share of wrongly classified observations:  0.16743107706264806
## area  162
Share of wrongly classified observations:  0.23214555717957763
## area  162
Share 

# adjusted dates..
start_train_year:  2010-11-08 07:00:00
last_train_set_year:  2012-11-08 07:00:00
start_validation_set_year:  2012-11-08 07:00:00
start_test_set_year:  2012-11-08 07:00:00
end_validation_set_year:  2012-11-08 07:00:00
end_test_set_year:  2012-11-08 07:00:00
Data is scaled...
Standardizing used...
Data is scaled...
Standardizing used...
Data is scaled...
Standardizing used...
Data is scaled...
Standardizing used...
Data is scaled...
Standardizing used...
Data is scaled...
Standardizing used...
Data is scaled...
Standardizing used...
Data is scaled...
Standardizing used...
Data is scaled...
Standardizing used...
Data is scaled...
Standardizing used...
Data is scaled...
Standardizing used...
Data is scaled...
Standardizing used...
Data is scaled...
Standardizing used...
Data is scaled...
Standardizing used...
Data is scaled...
Standardizing used...
Data is scaled...
Standardizing used...
Data is scaled...
Standardizing used...
Data is scaled...
Standardizing used...
Data

## area  142
Share of wrongly classified observations:  0.21864064271280004
## area  107
Share of wrongly classified observations:  0.17852038050174968
## area  107
Share of wrongly classified observations:  0.1785203805017497
## area  163
Share of wrongly classified observations:  0.19853122381586086
## area  163
Share of wrongly classified observations:  0.1985312238158608
## area  68
Share of wrongly classified observations:  0.20134062792646257
## area  68
Share of wrongly classified observations:  0.20134062792646262
## area  239
Share of wrongly classified observations:  0.1963625609936419
## area  239
Share of wrongly classified observations:  0.19636256099364188
## area  164
Share of wrongly classified observations:  0.18877224111587565
## area  164
Share of wrongly classified observations:  0.1887722411158756
## area  141
Share of wrongly classified observations:  0.15658731332248998
## area  141
Share of wrongly classified observations:  0.15658731332249
## area  249
Share of

Data is scaled...
Standardizing used...
Data is scaled...
Standardizing used...
Data is scaled...
Standardizing used...
Data is scaled...
Standardizing used...
Data is scaled...
Standardizing used...
Data is scaled...
Standardizing used...
Data is scaled...
Standardizing used...
Data is scaled...
Standardizing used...
Data is scaled...
Standardizing used...
Data is scaled...
Standardizing used...
Data is scaled...
Standardizing used...
Data is scaled...
Standardizing used...
Data is scaled...
Standardizing used...
Data is scaled...
Standardizing used...
Data is scaled...
Standardizing used...
Data is scaled...
Standardizing used...
Data is scaled...
Standardizing used...
X_train shape of area237 before concat with other areas:  (16872, 203)
X_valid shape of area237 before concat with other areas:  (1, 203)
X_test shape of area237 before concat with other areas:  (1, 203)
y_train shape of area237 before concat with other areas:  (16872,)
y_valid shape of area237 before concat with other

## area  68
Share of wrongly classified observations:  0.19967509197763866
## area  68
Share of wrongly classified observations:  0.1996750919776387
## area  239
Share of wrongly classified observations:  0.1959004252472646
## area  239
Share of wrongly classified observations:  0.19590042524726456
## area  164
Share of wrongly classified observations:  0.1881121888288977
## area  164
Share of wrongly classified observations:  0.18811218882889771
## area  141
Share of wrongly classified observations:  0.15600363132495576
## area  141
Share of wrongly classified observations:  0.15600363132495582
## area  249
Share of wrongly classified observations:  0.1762626021310144
## area  249
Share of wrongly classified observations:  0.17626260213101438
## area  138
Share of wrongly classified observations:  0.38162358449997613
## area  138
Share of wrongly classified observations:  0.38162358449997613
## area  90
Share of wrongly classified observations:  0.17162788475321322
## area  90
Share o

Data is scaled...
Standardizing used...
Data is scaled...
Standardizing used...
Data is scaled...
Standardizing used...
Data is scaled...
Standardizing used...
Data is scaled...
Standardizing used...
Data is scaled...
Standardizing used...
Data is scaled...
Standardizing used...
Data is scaled...
Standardizing used...
Data is scaled...
Standardizing used...
Data is scaled...
Standardizing used...
Data is scaled...
Standardizing used...
Data is scaled...
Standardizing used...
X_train shape of area237 before concat with other areas:  (16872, 203)
X_valid shape of area237 before concat with other areas:  (1, 203)
X_test shape of area237 before concat with other areas:  (1, 203)
y_train shape of area237 before concat with other areas:  (16872,)
y_valid shape of area237 before concat with other areas:  (1,)
y_test shape of area237 before concat with other areas:  (1,)
final concatenated shape of X_train :  (337440, 203)
create MLP Model:
#Dropout applied
#Clipping Norm applied
Train on 3374

## area  68
Share of wrongly classified observations:  0.2019519183417648
## area  239
Share of wrongly classified observations:  0.19429645879034785
## area  239
Share of wrongly classified observations:  0.19429645879034785
## area  164
Share of wrongly classified observations:  0.18758114339436804
## area  164
Share of wrongly classified observations:  0.1875811433943681
## area  141
Share of wrongly classified observations:  0.15467609795406723
## area  141
Share of wrongly classified observations:  0.15467609795406725
## area  249
Share of wrongly classified observations:  0.17607556968258942
## area  249
Share of wrongly classified observations:  0.17607556968258942
## area  138
Share of wrongly classified observations:  0.3839817343421229
## area  138
Share of wrongly classified observations:  0.38398173434212296
## area  90
Share of wrongly classified observations:  0.16904687290146392
## area  90
Share of wrongly classified observations:  0.16904687290146395
## converted_strea

## area  163
Share of wrongly classified observations:  0.19865813560828227
## area  163
Share of wrongly classified observations:  0.19865813560828224
## area  68
Share of wrongly classified observations:  0.20167955211943478
## area  68
Share of wrongly classified observations:  0.2016795521194348
## area  239
Share of wrongly classified observations:  0.19412601084155334
## area  239
Share of wrongly classified observations:  0.19412601084155337
## area  164
Share of wrongly classified observations:  0.18737225628721232
## area  164
Share of wrongly classified observations:  0.1873722562872123
## area  141
Share of wrongly classified observations:  0.1543144050475429
## area  141
Share of wrongly classified observations:  0.15431440504754287
## area  249
Share of wrongly classified observations:  0.17595307917888559
## area  249
Share of wrongly classified observations:  0.17595307917888564
## area  138
Share of wrongly classified observations:  0.3845196836399183
## area  138
Share

Data is scaled...
Standardizing used...
Data is scaled...
Standardizing used...
Data is scaled...
Standardizing used...
Data is scaled...
Standardizing used...
Data is scaled...
Standardizing used...
Data is scaled...
Standardizing used...
Data is scaled...
Standardizing used...
Data is scaled...
Standardizing used...
Data is scaled...
Standardizing used...
Data is scaled...
Standardizing used...
Data is scaled...
Standardizing used...
Data is scaled...
Standardizing used...
Data is scaled...
Standardizing used...
Data is scaled...
Standardizing used...
Data is scaled...
Standardizing used...
Data is scaled...
Standardizing used...
Data is scaled...
Standardizing used...
X_train shape of area237 before concat with other areas:  (16872, 203)
X_valid shape of area237 before concat with other areas:  (1, 203)
X_test shape of area237 before concat with other areas:  (1, 203)
y_train shape of area237 before concat with other areas:  (16872,)
y_valid shape of area237 before concat with other

## area  163
Share of wrongly classified observations:  0.20217457420924578
## area  163
Share of wrongly classified observations:  0.20217457420924576
## area  68
Share of wrongly classified observations:  0.2019084549878345
## area  68
Share of wrongly classified observations:  0.20190845498783455
## area  239
Share of wrongly classified observations:  0.19286040145985406
## area  239
Share of wrongly classified observations:  0.192860401459854
## area  164
Share of wrongly classified observations:  0.18727189781021902
## area  164
Share of wrongly classified observations:  0.18727189781021897
## area  141
Share of wrongly classified observations:  0.15309458637469586
## area  141
Share of wrongly classified observations:  0.15309458637469586
## area  249
Share of wrongly classified observations:  0.1754105839416058
## area  249
Share of wrongly classified observations:  0.17541058394160583
## area  138
Share of wrongly classified observations:  0.3810827250608273
## area  138
Share 

Data is scaled...
Standardizing used...
Data is scaled...
Standardizing used...
Data is scaled...
Standardizing used...
Data is scaled...
Standardizing used...
Data is scaled...
Standardizing used...
Data is scaled...
Standardizing used...
Data is scaled...
Standardizing used...
Data is scaled...
Standardizing used...
Data is scaled...
Standardizing used...
Data is scaled...
Standardizing used...
Data is scaled...
Standardizing used...
Data is scaled...
Standardizing used...
Data is scaled...
Standardizing used...
Data is scaled...
Standardizing used...
Data is scaled...
Standardizing used...
Data is scaled...
Standardizing used...
Data is scaled...
Standardizing used...
X_train shape of area237 before concat with other areas:  (16872, 203)
X_valid shape of area237 before concat with other areas:  (1, 203)
X_test shape of area237 before concat with other areas:  (1, 203)
y_train shape of area237 before concat with other areas:  (16872,)
y_valid shape of area237 before concat with other

## area  163
Share of wrongly classified observations:  0.20209854014598538
## area  163
Share of wrongly classified observations:  0.2020985401459854
## area  68
Share of wrongly classified observations:  0.2018324209245742
## area  68
Share of wrongly classified observations:  0.2018324209245742
## area  239
Share of wrongly classified observations:  0.1933546228710462
## area  239
Share of wrongly classified observations:  0.19335462287104624
## area  164
Share of wrongly classified observations:  0.1876140510948905
## area  164
Share of wrongly classified observations:  0.18761405109489052
## area  141
Share of wrongly classified observations:  0.15320863746958635
## area  141
Share of wrongly classified observations:  0.15320863746958638
## area  249
Share of wrongly classified observations:  0.17598083941605835
## area  249
Share of wrongly classified observations:  0.17598083941605838
## area  138
Share of wrongly classified observations:  0.3818810827250608
## area  138
Share o

### Strategy: incremental training/updating of model if drift is detected
- Params for test purpose only: number of epochs = 10 instead of 150, end of dataset: 2011, verbosity = 0

In [19]:
#set model_name based on used params:
model_name = 'complex_MLP_2H_128_32_batch512_drop03_clip_norm_shuffle_scaling_std_W168_20largest_areas_live_drift_w_update_adwin'


#create instance of class:
cplxMLP_model_new = create_model_instance('ComplexMLP')

#update instance with model from disk:
prediction_model = load_pretrained_model_from_disk('ComplexMLP')
cplxMLP_model_new.load_model(prediction_model)

#set dataset for slicing:
ts_series_input = ts_20largest.copy()



#call function for drift detection & retraining:
retraining_results_tuple_w_update_adwin = dft.drift_detection_retraining(model_instance = cplxMLP_model_new, org_ts_series=ts_series_input, 
                                                    model_name = model_name, detector_type = 'ADWIN', 
                                                    update_retrain_switch=False, first_forecast_range_days=168,
                                                    n_epochs_weight = 10, overwrite_params = True,
                                                    end_of_dataset_date = '2011-12-31 23:00:00',
                                                    update_weights_flag = True,
                                                    verbosity = 0)
                               



# Very first predictions are made for next 168 days..
## Assigned Dates are double checked..
selected years for training:  ['2009', '2010']
year_list given:  ['2009', '2010', '2011-01-01 00:00:00', None]
#### Make predictions model: complex_MLP_2H_128_32_batch512_drop03_clip_norm_shuffle_scaling_std_W168_20largest_areas_live_drift_w_update_adwin ####
Shape of org. dataset after shift:  (4033, 20)
Shape of org. dataset after shift:  (1, 20)
## Start Drift Detection
new detectors are created for each area...
## Change detected in area binary170, index: 1631
date: 2011-03-09 23:00:00
Drift detected at:  2011-03-09 23:00:00
>> Current Time:  23/01/2020 12:23:40
### ### New Model is trained
selected years for training:  [Timestamp('2009-03-09 23:00:00'), Timestamp('2011-03-09 23:00:00')]
year_list given:  [Timestamp('2009-03-09 23:00:00'), Timestamp('2011-03-09 23:00:00'), None, None]
#### Train model: complex_MLP_2H_128_32_batch512_drop03_clip_norm_shuffle_scaling_std_W168_20largest_areas_

Shape of org. dataset after shift:  (169, 20)
Shape of org. dataset after shift:  (1, 20)
## Start Drift Detection
pre-defined detectors are used...
> No drifts detected!
Make preds for next 7 days...
## Assigned Dates are double checked..
selected years for training:  [Timestamp('2009-04-21 15:00:00'), Timestamp('2011-04-21 15:00:00')]
year_list given:  [Timestamp('2009-04-21 15:00:00'), Timestamp('2011-04-21 15:00:00'), Timestamp('2011-10-27 20:00:00'), None]
#### Make predictions model: complex_MLP_2H_128_32_batch512_drop03_clip_norm_shuffle_scaling_std_W168_20largest_areas_live_drift_w_update_adwin ####
Shape of org. dataset after shift:  (169, 20)
Shape of org. dataset after shift:  (1, 20)
## Start Drift Detection
pre-defined detectors are used...
> No drifts detected!
Make preds for next 7 days...
## Assigned Dates are double checked..
selected years for training:  [Timestamp('2009-04-21 15:00:00'), Timestamp('2011-04-21 15:00:00')]
year_list given:  [Timestamp('2009-04-21 15:00

### Strategy: Switching Scheme 
- Params: backshift = 2, weight_range == True
- Params for test purpose only: number of epochs = 10 instead of 150, end of dataset: 2011

In [20]:
#Apply switching of retraining scheme: update weights & retrain model

#set model_name based on used params:
model_name = 'complex_MLP_2H_128_32_batch512_drop03_clip_norm_shuffle_scaling_std_W168_20largest_areas_live_drift_retraining_switch_backshift2_weight_range_adwin'


#create instance of class:
cplxMLP_model_new = create_model_instance('ComplexMLP')

#update instance with model from disk:
prediction_model = load_pretrained_model_from_disk('ComplexMLP')
cplxMLP_model_new.load_model(prediction_model)

#set dataset for slicing:
ts_series_input = ts_20largest.copy()



#call function for drift detection & retraining:
retraining_results_tuple_adwin_switch_backshfit2_weight_range = dft.drift_detection_retraining(model_instance = cplxMLP_model_new, org_ts_series=ts_series_input, 
                                                    model_name = model_name, detector_type = 'ADWIN', 
                                                    update_retrain_switch=True, first_forecast_range_days=168,
                                                    n_epochs_retrain = 10, n_epochs_weight = 10, 
                                                    overwrite_params = True,
                                                    weight_update_backshift=2,
                                                    end_of_dataset_date = '2011-12-31 23:00:00',                
                                                    make_preds_with_weight_range = True,
                                                    verbosity = 2)
                               

                                                                                         


# Very first predictions are made for next 168 days..
## Assigned Dates are double checked..
 ++ Number of days contained in train_set used for scaling:  729
selected years for training:  ['2009', '2010']
year_list given:  ['2009', '2010', '2011-01-01 00:00:00', None]
#### Make predictions model: complex_MLP_2H_128_32_batch512_drop03_clip_norm_shuffle_scaling_std_W168_20largest_areas_live_drift_retraining_switch_backshift2_weight_range_adwin ####
Shape of org. dataset after shift:  (4033, 20)
Shape of org. dataset after shift:  (1, 20)
## Avg. RMSE of recent predictions: 
[63.25613309572718]
## area  237
Share of wrongly classified observations:  0.16265807091495166
## area  237
Share of wrongly classified observations:  0.16265807091495166
## area  161
Share of wrongly classified observations:  0.2392759732209273
## area  161
Share of wrongly classified observations:  0.23927597322092736
## area  230
Share of wrongly classified observations:  0.25663277956855934
## area  230
Share of 

## Change detected in area binary170, index: 1631
date: 2011-03-09 23:00:00
Drift detected at:  2011-03-09 23:00:00
>> Current Time:  23/01/2020 12:31:04
 ->> update_weights_flag set to "True" , delta of drift dates: -68
 >> delta of last start trainset & current drift:  -798
## ++ previous detected dates:  [Timestamp('2011-01-01 00:00:00'), Timestamp('2011-03-09 23:00:00')]
## ++ last training dates:  [Timestamp('2009-01-01 00:00:00')]
 ++ Number of days contained in train_set used for scaling/retraining:  759
#### Current dates: 
#### training_start_date:  2009-02-09 22:00:00
#### start_valid_set:  None
#### start_test_set:  None
### ### Model weights are updated based on Switching Scheme
selected years for training:  [Timestamp('2009-02-09 22:00:00'), Timestamp('2011-03-09 23:00:00')]
year_list given:  [Timestamp('2009-02-09 22:00:00'), Timestamp('2011-03-09 23:00:00'), None, None]
#### Train model: complex_MLP_2H_128_32_batch512_drop03_clip_norm_shuffle_scaling_std_W168_20largest_a

## area  141
Share of wrongly classified observations:  0.14510150044130626
## area  249
Share of wrongly classified observations:  0.16716681376875553
## area  249
Share of wrongly classified observations:  0.16716681376875553
## area  138
Share of wrongly classified observations:  0.3754633715798764
## area  138
Share of wrongly classified observations:  0.3754633715798764
## area  90
Share of wrongly classified observations:  0.15922330097087378
## area  90
Share of wrongly classified observations:  0.15922330097087378
## converted_stream_flag used:  True
## Shape of streaming_df:  (4033, 20)
## Head of streaming_df:                       binary237  binary161  binary230  binary79  binary236  \
date                                                                        
2011-03-10 00:00:00          1          0          0         1          1   
2011-03-10 01:00:00          1          0          0         1          1   
2011-03-10 02:00:00          1          1          1         0 

final concatenated shape of X_train :  (350420, 203)
#Clipping Norm applied
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
## Only training history & model are returned
Training data for weight updating is used to make predictions with updated model 
## Predictions with retrained model are made..
>> Current Number of weight updates based on Switching Scheme:  2
>> Current Number of retrainings:  0
# Very first predictions are made for next 168 days..
## Assigned Dates are double checked..
# >> ceiling_flag_test set to "True", end of dataset is reached with preds of test_set or in next iteration
 ++ Number of days contained in train_set used for scaling:  759
selected years for training:  [Timestamp('2009-04-06 22:00:00'), Timestamp('2011-05-04 23:00:00')]
year_list given:  [Timestamp('2009-04-06 22:00:00'), Timestamp('2011-05-04 23:00:00'), Timestamp('2011-05-05 00:00:00'), None]
#### Make predictions model: complex_MLP_2H

## Change detected in area binary162, index: 3839
date: 2011-10-11 23:00:00
Drift detected at:  2011-10-11 23:00:00
>> Current Time:  23/01/2020 12:34:05
 ->> update_weights_flag set to "True" , delta of drift dates: -160
 >> delta of last start trainset & current drift:  -1014
## ++ previous detected dates:  [Timestamp('2011-05-04 23:00:00'), Timestamp('2011-10-11 23:00:00')]
## ++ last training dates:  [Timestamp('2009-01-01 00:00:00')]
 ++ Number of days contained in train_set used for scaling/retraining:  759
#### Current dates: 
#### training_start_date:  2009-09-13 22:00:00
#### start_valid_set:  None
#### start_test_set:  None
### ### Model weights are updated based on Switching Scheme
selected years for training:  [Timestamp('2009-09-13 22:00:00'), Timestamp('2011-10-11 23:00:00')]
year_list given:  [Timestamp('2009-09-13 22:00:00'), Timestamp('2011-10-11 23:00:00'), None, None]
#### Train model: complex_MLP_2H_128_32_batch512_drop03_clip_norm_shuffle_scaling_std_W168_20largest

## area  164
Share of wrongly classified observations:  0.1882420091324201
## area  141
Share of wrongly classified observations:  0.14737442922374433
## area  141
Share of wrongly classified observations:  0.1473744292237443
## area  249
Share of wrongly classified observations:  0.17500000000000004
## area  249
Share of wrongly classified observations:  0.175
## area  138
Share of wrongly classified observations:  0.37385844748858443
## area  138
Share of wrongly classified observations:  0.3738584474885845
## area  90
Share of wrongly classified observations:  0.1670091324200913
## area  90
Share of wrongly classified observations:  0.16700913242009133
## converted_stream_flag used:  True
## Shape of streaming_df:  (1944, 20)
## Head of streaming_df:                       binary237  binary161  binary230  binary79  binary236  \
date                                                                        
2011-10-12 00:00:00          0          1          0         1          1   
2011

### Strategy: Switching Scheme 
- Params: backshift = 2, weight_range == True , weight_range_update = 4
- Params for test purpose only: number of epochs = 10 instead of 150, end of dataset: 2011

In [24]:
#Apply switching of retraining scheme: update weights & retrain model

#set model_name based on used params:
model_name = 'complex_MLP_2H_128_32_batch512_drop03_clip_norm_shuffle_scaling_std_W168_20largest_areas_live_drift_retraining_switch_backshift2_weight_range_w_update_r4_adwin'


#create instance of class:
cplxMLP_model_new = create_model_instance('ComplexMLP')

#update instance with model from disk:
prediction_model = load_pretrained_model_from_disk('ComplexMLP')
cplxMLP_model_new.load_model(prediction_model)

#set dataset for slicing:
ts_series_input = ts_20largest.copy()



#call function for drift detection & retraining:
retraining_results_tuple_adwin_switch_backshfit2_weight_range_w_update_r4 = dft.drift_detection_retraining(model_instance = cplxMLP_model_new, org_ts_series=ts_series_input, 
                                                    model_name = model_name, detector_type = 'ADWIN', 
                                                    update_retrain_switch=True, first_forecast_range_days=168,
                                                    n_epochs_retrain = 10, n_epochs_weight = 10,
                                                    overwrite_params = True,
                                                    end_of_dataset_date = '2011-12-31 23:00:00',  
                                                    weight_update_backshift=2,
                                                    weight_update_range = [4],
                                                    adjust_lags_flag = True,
                                                    make_preds_with_weight_range = True,
                                                    verbosity = 0)
      

# Very first predictions are made for next 168 days..
## Assigned Dates are double checked..
selected years for training:  ['2009', '2010']
year_list given:  ['2009', '2010', '2011-01-01 00:00:00', None]
#### Make predictions model: complex_MLP_2H_128_32_batch512_drop03_clip_norm_shuffle_scaling_std_W168_20largest_areas_live_drift_retraining_switch_backshift2_weight_range_w_update_r4_adwin ####
Shape of org. dataset after shift:  (4033, 20)
Shape of org. dataset after shift:  (1, 20)
## Start Drift Detection
new detectors are created for each area...
## Change detected in area binary170, index: 1631
date: 2011-03-09 23:00:00
Drift detected at:  2011-03-09 23:00:00
>> Current Time:  23/01/2020 12:46:51
### ### Model weights are updated based on Switching Scheme
selected years for training:  [Timestamp('2009-02-09 22:00:00'), Timestamp('2011-03-09 23:00:00')]
year_list given:  [Timestamp('2009-02-09 22:00:00'), Timestamp('2011-03-09 23:00:00'), None, None]
#### Train model: complex_MLP_2

> No drifts detected!
Make preds for next 7 days...
## Assigned Dates are double checked..
selected years for training:  [Timestamp('2009-04-08 06:00:00'), Timestamp('2011-05-06 07:00:00')]
year_list given:  [Timestamp('2009-04-08 06:00:00'), Timestamp('2011-05-06 07:00:00'), Timestamp('2011-11-04 11:00:00'), None]
#### Make predictions model: complex_MLP_2H_128_32_batch512_drop03_clip_norm_shuffle_scaling_std_W168_20largest_areas_live_drift_retraining_switch_backshift2_weight_range_w_update_r4_adwin ####
Shape of org. dataset after shift:  (169, 20)
Shape of org. dataset after shift:  (1, 20)
## Start Drift Detection
pre-defined detectors are used...
> No drifts detected!
Make preds for next 7 days...
## Assigned Dates are double checked..
selected years for training:  [Timestamp('2009-04-08 06:00:00'), Timestamp('2011-05-06 07:00:00')]
year_list given:  [Timestamp('2009-04-08 06:00:00'), Timestamp('2011-05-06 07:00:00'), Timestamp('2011-11-11 12:00:00'), None]
#### Make predictions m

#### store results

In [28]:
#call function to store results:
df_save_PATH = 'media/...'
model_save_PATH = 'media/...'

#call function to store results:
_ = sv_files.store_retrained_drift_detection_results(retraining_results_tuple_adwin_switch_backshfit2_weight_range_w_update_r4[1], 
                                            retraining_results_tuple_adwin_switch_backshfit2_weight_range_w_update_r4[0], 
                                            df_save_PATH)



#call function to store models & history:
#call function to store model:
_ = sv_files.store_model_and_history_on_disk(retraining_results_tuple_adwin_switch_backshfit2_weight_range_w_update_r4[0], 
                                model_save_PATH, df_save_PATH)



## store dates:
#call function to store dates at which change was detected:
dates_df = sv_files.store_detected_change_dates(retraining_results_tuple_adwin_switch_backshfit2_weight_range_w_update_r4[2],
                                           df_save_PATH)



## store dates with dates as index:
_ = sv_files.store_detected_change_dates_with_index(retraining_results_tuple_adwin_switch_backshfit2_weight_range_w_update_r4[2],
                                           retraining_results_tuple_adwin_switch_backshfit2_weight_range_w_update_r4[4],
                                           switch_updating_dates_list=retraining_results_tuple_adwin_switch_backshfit2_weight_range_w_update_r4[5],
                                           df_store_PATH=df_save_PATH)



predictions stored on disk!
Saved model to disk
Save history_df on disk done
Saved model to disk
Save history_df on disk done
Saved model to disk
Save history_df on disk done
Saved model to disk
Save history_df on disk done
Saved model to disk
Save history_df on disk done
Saved model to disk
Save history_df on disk done
Saved model to disk
Save history_df on disk done
Saved model to disk
Save history_df on disk done
Saved model to disk
Save history_df on disk done
Saved model to disk
Save history_df on disk done
Saved model to disk
Save history_df on disk done
Saved model to disk
Save history_df on disk done
Saved model to disk
Save history_df on disk done
Saved model to disk
Save history_df on disk done
Saved model to disk
Save history_df on disk done
Saved model to disk
Save history_df on disk done
Saved model to disk
Save history_df on disk done
Saved model to disk
Save history_df on disk done
Saved model to disk
Save history_df on disk done
Saved model to disk
Save history_df on di

## HDDDM Diff

### Strategy: training of new model if drift is detected
- Params for test purpose only: number of epochs = 10 instead of 150, end of dataset: 2011

In [25]:
#set model_name based on used params:
model_name = 'complex_MLP_2H_128_32_batch512_drop03_clip_norm_shuffle_scaling_std_W168_20largest_areas_live_drift_retraining_hdddm_diff'



#create instance of class:
cplxMLP_model_new = create_model_instance('ComplexMLP')

#update instance with model from disk:
prediction_model = load_pretrained_model_from_disk('ComplexMLP')
cplxMLP_model_new.load_model(prediction_model)

#set dataset for slicing:
ts_series_input = ts_20largest.copy()



#call function for drift detection & retraining:
retraining_results_tuple_hdddm_diff = dft.drift_detection_retraining(model_instance = cplxMLP_model_new, org_ts_series=ts_series_input, 
                                                    model_name = model_name, detector_type = 'HDDDM_diff', 
                                                    use_differenced_ts=True,
                                                    update_retrain_switch=False, first_forecast_range_days=168,
                                                    n_epochs_retrain = 10, 
                                                    overwrite_params = True,
                                                    end_of_dataset_date = '2011-12-31 23:00:00', 
                                                    verbosity = 0)
                               

#assign results:
all_MODELS_dict_MLP_hdddm_diff = retraining_results_tuple_hdddm_diff[0]
all_model_RESULTS_dict_MLP_hdddm_diff = retraining_results_tuple_hdddm_diff[1] 
all_detected_DATES_dict_MLP_hdddm_diff = retraining_results_tuple_hdddm_diff[2]
avg_rmse_resulst_all_hdddm_diff = retraining_results_tuple_hdddm_diff[3]
all_retraining_dates_hdddm_diff = retraining_results_tuple_hdddm_diff[4]
all_weight_update_dates_hdddm_diff = retraining_results_tuple_hdddm_diff[5]





## >> Streaming data is differenced...
# Very first predictions are made for next 168 days..
## Assigned Dates are double checked..
selected years for training:  ['2009', '2010']
year_list given:  ['2009', '2010', '2011-01-01 00:00:00', None]
#### Make predictions model: complex_MLP_2H_128_32_batch512_drop03_clip_norm_shuffle_scaling_std_W168_20largest_areas_live_drift_retraining_hdddm_diff ####
Shape of org. dataset after shift:  (4033, 20)
Shape of org. dataset after shift:  (1, 20)
## Start Drift Detection
new detectors are created for each area...


  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  keepdims=keepdims)
  arrmean, rcount, out=arrmean, casting='unsafe', subok=False)
  ret = ret.dtype.type(ret / rcount)


> No drifts detected!
Make preds for next 7 days...
## Assigned Dates are double checked..
selected years for training:  ['2009', '2010']
year_list given:  ['2009', '2010', Timestamp('2011-06-18 01:00:00'), None]
#### Make predictions model: complex_MLP_2H_128_32_batch512_drop03_clip_norm_shuffle_scaling_std_W168_20largest_areas_live_drift_retraining_hdddm_diff ####
Shape of org. dataset after shift:  (169, 20)
Shape of org. dataset after shift:  (1, 20)
## Start Drift Detection
pre-defined detectors are used...
> No drifts detected!
Make preds for next 7 days...
## Assigned Dates are double checked..
selected years for training:  ['2009', '2010']
year_list given:  ['2009', '2010', Timestamp('2011-06-25 02:00:00'), None]
#### Make predictions model: complex_MLP_2H_128_32_batch512_drop03_clip_norm_shuffle_scaling_std_W168_20largest_areas_live_drift_retraining_hdddm_diff ####
Shape of org. dataset after shift:  (169, 20)
Shape of org. dataset after shift:  (1, 20)
## Start Drift Detectio

create MLP Model:
#Dropout applied
#Clipping Norm applied
Train on 336960 samples, validate on 20 samples
Epoch 1/10
#Current LearningRate:  0.001
Epoch 2/10
#Current LearningRate:  0.001
Epoch 3/10
#Current LearningRate:  0.001
Epoch 4/10
#Current LearningRate:  0.001
Epoch 5/10
#Current LearningRate:  0.001
Epoch 6/10
#Current LearningRate:  0.001
Epoch 7/10
#Current LearningRate:  0.001
Epoch 8/10
#Current LearningRate:  0.001
Epoch 9/10
#Current LearningRate:  0.001
Epoch 10/10
#Current LearningRate:  0.001
## Only training history & model are returned
## Predictions with retrained model are made..
>> Current Number of weight updates based on Switching Scheme:  0
>> Current Number of retrainings:  1
# Very first predictions are made for next 168 days..
## Assigned Dates are double checked..
selected years for training:  [Timestamp('2009-09-09 23:00:00'), Timestamp('2011-09-09 23:00:00')]
year_list given:  [Timestamp('2009-09-09 23:00:00'), Timestamp('2011-09-09 23:00:00'), Timestam

  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  keepdims=keepdims)
  arrmean, rcount, out=arrmean, casting='unsafe', subok=False)
  ret = ret.dtype.type(ret / rcount)


### Strategy: incremental training/updating of model if drift is detected
- Params for test purpose only: number of epochs = 10 instead of 150, end of dataset: 2011

In [26]:
#set model_name based on used params:
model_name = 'complex_MLP_2H_128_32_batch512_drop03_clip_norm_shuffle_scaling_std_W168_20largest_areas_live_drift_weight_update_hdddm_diff'



#create instance of class:
cplxMLP_model_new = create_model_instance('ComplexMLP')

#update instance with model from disk:
prediction_model = load_pretrained_model_from_disk('ComplexMLP')
cplxMLP_model_new.load_model(prediction_model)

#set dataset for slicing:
ts_series_input = ts_20largest.copy()



#call function for drift detection & retraining:
weight_update_results_tuple_hdddm_diff = dft.drift_detection_retraining(model_instance = cplxMLP_model_new, org_ts_series=ts_series_input, 
                                                    model_name = model_name, detector_type = 'HDDDM_diff', 
                                                    use_differenced_ts=True,
                                                    update_weights_flag = True,
                                                    first_forecast_range_days=168,
                                                    n_epochs_weight = 10,
                                                    overwrite_params = True,
                                                    end_of_dataset_date = '2011-12-31 23:00:00', 
                                                    verbosity = 0)
                               




## >> Streaming data is differenced...
# Very first predictions are made for next 168 days..
## Assigned Dates are double checked..
selected years for training:  ['2009', '2010']
year_list given:  ['2009', '2010', '2011-01-01 00:00:00', None]
#### Make predictions model: complex_MLP_2H_128_32_batch512_drop03_clip_norm_shuffle_scaling_std_W168_20largest_areas_live_drift_weight_update_hdddm_diff ####
Shape of org. dataset after shift:  (4033, 20)
Shape of org. dataset after shift:  (1, 20)
## Start Drift Detection
new detectors are created for each area...


  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  keepdims=keepdims)
  arrmean, rcount, out=arrmean, casting='unsafe', subok=False)
  ret = ret.dtype.type(ret / rcount)


> No drifts detected!
Make preds for next 7 days...
## Assigned Dates are double checked..
selected years for training:  ['2009', '2010']
year_list given:  ['2009', '2010', Timestamp('2011-06-18 01:00:00'), None]
#### Make predictions model: complex_MLP_2H_128_32_batch512_drop03_clip_norm_shuffle_scaling_std_W168_20largest_areas_live_drift_weight_update_hdddm_diff ####
Shape of org. dataset after shift:  (169, 20)
Shape of org. dataset after shift:  (1, 20)
## Start Drift Detection
pre-defined detectors are used...
> No drifts detected!
Make preds for next 7 days...
## Assigned Dates are double checked..
selected years for training:  ['2009', '2010']
year_list given:  ['2009', '2010', Timestamp('2011-06-25 02:00:00'), None]
#### Make predictions model: complex_MLP_2H_128_32_batch512_drop03_clip_norm_shuffle_scaling_std_W168_20largest_areas_live_drift_weight_update_hdddm_diff ####
Shape of org. dataset after shift:  (169, 20)
Shape of org. dataset after shift:  (1, 20)
## Start Drift De

#Clipping Norm applied
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
## Only training history & model are returned
## Predictions with retrained model are made..
>> Current Number of weight updates based on Switching Scheme:  0
>> Current Number of retrainings:  1
# Very first predictions are made for next 168 days..
## Assigned Dates are double checked..
selected years for training:  [Timestamp('2009-09-09 23:00:00'), Timestamp('2011-09-09 23:00:00')]
year_list given:  [Timestamp('2009-09-09 23:00:00'), Timestamp('2011-09-09 23:00:00'), Timestamp('2011-09-10 00:00:00'), None]
#### Make predictions model: complex_MLP_2H_128_32_batch512_drop03_clip_norm_shuffle_scaling_std_W168_20largest_areas_live_drift_weight_update_hdddm_diff ####
Shape of org. dataset after shift:  (2712, 20)
Shape of org. dataset after shift:  (1, 20)
## Start Drift Detection
new detectors are created for each area...
> No drifts detected!
Make preds 

  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  keepdims=keepdims)
  arrmean, rcount, out=arrmean, casting='unsafe', subok=False)
  ret = ret.dtype.type(ret / rcount)


### Strategy: Switching Scheme 
- Params: backshift = 2, weight range == True, weight_update_range = [1,4.5]
- Params for test purpose only: number of epochs = 10 instead of 150, end of dataset: 2011

In [28]:
#set model_name based on used params:
model_name = 'complex_MLP_2H_128_32_batch512_drop03_clip_norm_shuffle_scaling_std_W168_20largest_areas_live_drift_switch_backshift2_weight_range_w_update_r1__4_5_hdddm_diff'



#create instance of class:
cplxMLP_model_new = create_model_instance('ComplexMLP')

#update instance with model from disk:
prediction_model = load_pretrained_model_from_disk('ComplexMLP')
cplxMLP_model_new.load_model(prediction_model)

#set dataset for slicing:
ts_series_input = ts_20largest.copy()



#call function for drift detection & retraining:
results_tuple_switch_backshift2_weight_range_w_updater1__4_5_hdddm_diff = dft.drift_detection_retraining(model_instance = cplxMLP_model_new, org_ts_series=ts_series_input, 
                                                    model_name = model_name, detector_type = 'HDDDM_diff', 
                                                    use_differenced_ts=True,
                                                    update_retrain_switch=True, first_forecast_range_days=168,
                                                    make_preds_with_weight_range = True,
                                                    weight_update_backshift=2,
                                                    weight_update_range = [3,4.5],
                                                    n_epochs_retrain = 10, n_epochs_weight = 10,
                                                    overwrite_params = True,
                                                    end_of_dataset_date = '2012-12-31 23:00:00', 
                                                    verbosity = 2)
                               




## >> Streaming data is differenced...
# Very first predictions are made for next 168 days..
## Assigned Dates are double checked..
 ++ Number of days contained in train_set used for scaling:  729
selected years for training:  ['2009', '2010']
year_list given:  ['2009', '2010', '2011-01-01 00:00:00', None]
#### Make predictions model: complex_MLP_2H_128_32_batch512_drop03_clip_norm_shuffle_scaling_std_W168_20largest_areas_live_drift_switch_backshift2_weight_range_w_update_r1__4_5_hdddm_diff ####
Shape of org. dataset after shift:  (4033, 20)
Shape of org. dataset after shift:  (1, 20)
## Avg. RMSE of recent predictions: 
[63.25613309572718]
## converted_stream_flag used:  False
## Shape of streaming_df:  (4033, 20)
## Head of streaming_df:                         237    161    230     79    236    162    170   234  \
date                                                                         
2011-01-01 00:00:00 -106.0 -388.0 -467.0 -438.0  -99.0 -262.0  -81.0  51.0   
2011-01-01 01:0

  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  keepdims=keepdims)
  arrmean, rcount, out=arrmean, casting='unsafe', subok=False)
  ret = ret.dtype.type(ret / rcount)


> No drifts detected!
Make preds for next 7 days...
## Assigned Dates are double checked..
 ++ Number of days contained in train_set used for scaling:  729
selected years for training:  ['2009', '2010']
year_list given:  ['2009', '2010', Timestamp('2011-06-18 01:00:00'), None]
#### Make predictions model: complex_MLP_2H_128_32_batch512_drop03_clip_norm_shuffle_scaling_std_W168_20largest_areas_live_drift_switch_backshift2_weight_range_w_update_r1__4_5_hdddm_diff ####
Shape of org. dataset after shift:  (169, 20)
Shape of org. dataset after shift:  (1, 20)
## Avg. RMSE of recent predictions: 
[55.150346178085684]
## converted_stream_flag used:  False
## Shape of streaming_df:  (169, 20)
## Head of streaming_df:                        237   161    230     79   236   162    170    234  \
date                                                                      
2011-06-18 01:00:00 -21.0 -56.0 -269.0 -139.0  17.0 -48.0 -139.0  121.0   
2011-06-18 02:00:00  -4.0  -1.0  -40.0  -81.0  -3.0 -43

Shape of org. dataset after shift:  (169, 20)
Shape of org. dataset after shift:  (1, 20)
## Avg. RMSE of recent predictions: 
[61.881492602237145]
## converted_stream_flag used:  False
## Shape of streaming_df:  (169, 20)
## Head of streaming_df:                         237    161    230     79    236    162    170    234  \
date                                                                          
2011-07-02 03:00:00  -62.0  -48.0 -104.0 -473.0  -27.0  -44.0  -98.0  -17.0   
2011-07-02 04:00:00   -9.0    2.0  -23.0  -83.0   -7.0  -46.0  -20.0  -32.0   
2011-07-02 05:00:00    2.0    5.0   20.0  -27.0   22.0   12.0   82.0   15.0   
2011-07-02 06:00:00  105.0  200.0  164.0  -13.0  176.0  207.0  164.0  122.0   
2011-07-02 07:00:00  262.0  177.0   93.0  -88.0  259.0  121.0  224.0  142.0   

                        48    186    142    107   163     68    239    164  \
date                                                                         
2011-07-02 03:00:00  -78.0  -17.0  -10.0 

Shape of org. dataset after shift:  (169, 20)
Shape of org. dataset after shift:  (1, 20)
## Avg. RMSE of recent predictions: 
[57.588020147887164]
## converted_stream_flag used:  False
## Shape of streaming_df:  (169, 20)
## Head of streaming_df:                         237    161    230     79    236    162    170    234  \
date                                                                          
2011-07-23 06:00:00  -75.0 -144.0  -47.0  -49.0 -177.0  -67.0 -197.0  -33.0   
2011-07-23 07:00:00 -154.0 -144.0 -185.0  165.0    9.0  -80.0 -156.0 -140.0   
2011-07-23 08:00:00 -219.0 -211.0 -122.0  210.0   57.0 -277.0 -294.0 -185.0   
2011-07-23 09:00:00 -243.0 -109.0 -129.0  -42.0 -186.0 -272.0 -246.0 -168.0   
2011-07-23 10:00:00  -27.0 -130.0 -189.0  -62.0 -141.0  -34.0  -77.0 -137.0   

                        48    186   142    107   163    68    239    164  \
date                                                                       
2011-07-23 06:00:00 -127.0 -152.0 -17.0  -47.

Shape of org. dataset after shift:  (169, 20)
Shape of org. dataset after shift:  (1, 20)
## Avg. RMSE of recent predictions: 
[62.62939731528265]
## converted_stream_flag used:  False
## Shape of streaming_df:  (169, 20)
## Head of streaming_df:                         237   161    230    79   236    162    170   234  \
date                                                                      
2011-08-13 09:00:00   54.0  82.0  -32.0 -24.0  75.0  109.0   61.0   2.0   
2011-08-13 10:00:00   81.0  53.0   22.0  64.0  60.0    5.0  -37.0   1.0   
2011-08-13 11:00:00    5.0 -34.0  105.0  58.0  37.0  -60.0   33.0 -46.0   
2011-08-13 12:00:00  151.0  39.0   32.0 -77.0  55.0  117.0   78.0 -71.0   
2011-08-13 13:00:00  -96.0 -20.0 -125.0  -9.0  59.0  108.0  113.0  34.0   

                        48    186   142    107   163    68   239   164   141  \
date                                                                           
2011-08-13 09:00:00  108.0   12.0  38.0  108.0  31.0  -4.0 -51.0  

Shape of org. dataset after shift:  (169, 20)
Shape of org. dataset after shift:  (1, 20)
## Avg. RMSE of recent predictions: 
[74.52488693938635]
## converted_stream_flag used:  False
## Shape of streaming_df:  (169, 20)
## Head of streaming_df:                         237    161    230     79    236    162    170    234  \
date                                                                          
2011-09-03 12:00:00  508.0  605.0  525.0  434.0  339.0  448.0  357.0  514.0   
2011-09-03 13:00:00  594.0  655.0  597.0  458.0  365.0  400.0  376.0  568.0   
2011-09-03 14:00:00  686.0  699.0  507.0  391.0  394.0  356.0  401.0  672.0   
2011-09-03 15:00:00  538.0  689.0  541.0  414.0  271.0  362.0  390.0  515.0   
2011-09-03 16:00:00  674.0  550.0  607.0  464.0  409.0  592.0  451.0  452.0   

                        48    186    142    107    163     68    239    164  \
date                                                                          
2011-09-03 12:00:00  407.0  552.0  429.0

Training data for weight updating is used to make predictions with updated model 
## Predictions with retrained model are made..
>> Current Number of weight updates based on Switching Scheme:  1
>> Current Number of retrainings:  0
# Very first predictions are made for next 168 days..
## Assigned Dates are double checked..
 ++ Number of days contained in train_set used for scaling:  759
selected years for training:  [Timestamp('2009-08-12 22:00:00'), Timestamp('2011-09-09 23:00:00')]
year_list given:  [Timestamp('2009-08-12 22:00:00'), Timestamp('2011-09-09 23:00:00'), Timestamp('2011-09-10 00:00:00'), None]
#### Make predictions model: complex_MLP_2H_128_32_batch512_drop03_clip_norm_shuffle_scaling_std_W168_20largest_areas_live_drift_switch_backshift2_weight_range_w_update_r1__4_5_hdddm_diff ####
Shape of org. dataset after shift:  (4033, 20)
Shape of org. dataset after shift:  (1, 20)
## Avg. RMSE of recent predictions: 
[63.497815252256395]
## converted_stream_flag used:  False
## S

  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  keepdims=keepdims)
  arrmean, rcount, out=arrmean, casting='unsafe', subok=False)
  ret = ret.dtype.type(ret / rcount)


> No drifts detected!
Make preds for next 7 days...
## Assigned Dates are double checked..
 ++ Number of days contained in train_set used for scaling:  759
selected years for training:  [Timestamp('2009-08-12 22:00:00'), Timestamp('2011-09-09 23:00:00')]
year_list given:  [Timestamp('2009-08-12 22:00:00'), Timestamp('2011-09-09 23:00:00'), Timestamp('2012-02-25 01:00:00'), None]
#### Make predictions model: complex_MLP_2H_128_32_batch512_drop03_clip_norm_shuffle_scaling_std_W168_20largest_areas_live_drift_switch_backshift2_weight_range_w_update_r1__4_5_hdddm_diff ####
Shape of org. dataset after shift:  (169, 20)
Shape of org. dataset after shift:  (1, 20)
## Avg. RMSE of recent predictions: 
[61.34384444288703]
## converted_stream_flag used:  False
## Shape of streaming_df:  (169, 20)
## Head of streaming_df:                        237   161    230     79   236   162   170   234     48  \
date                                                                           
2012-02-25 01:00:

Shape of org. dataset after shift:  (169, 20)
Shape of org. dataset after shift:  (1, 20)
## Avg. RMSE of recent predictions: 
[77.39820326609942]
## converted_stream_flag used:  False
## Shape of streaming_df:  (169, 20)
## Head of streaming_df:                        237   161   230     79   236   162   170    234    48  \
date                                                                          
2012-03-10 03:00:00  -7.0  14.0  91.0  308.0  10.0  24.0  57.0   86.0  -2.0   
2012-03-10 04:00:00  12.0  -8.0  27.0   39.0  10.0  -2.0  52.0   47.0  99.0   
2012-03-10 05:00:00  14.0  -1.0  83.0  -22.0 -11.0 -14.0 -17.0   -4.0  30.0   
2012-03-10 06:00:00  50.0  -5.0  52.0  -30.0   0.0  87.0  62.0   -1.0  23.0   
2012-03-10 07:00:00 -34.0  14.0 -48.0  127.0  97.0  -5.0 -48.0 -129.0  27.0   

                       186   142   107   163    68   239   164   141   249  \
date                                                                         
2012-03-10 03:00:00   13.0  29.0  38.0  26

Shape of org. dataset after shift:  (169, 20)
Shape of org. dataset after shift:  (1, 20)
## Avg. RMSE of recent predictions: 
[62.65332876508116]
## converted_stream_flag used:  False
## Shape of streaming_df:  (169, 20)
## Head of streaming_df:                         237   161    230     79    236    162    170    234  \
date                                                                         
2012-03-31 06:00:00  -76.0  -8.0   92.0   40.0 -121.0  -12.0  -45.0  -29.0   
2012-03-31 07:00:00  -70.0  14.0   26.0   27.0  -65.0   57.0  -52.0  -75.0   
2012-03-31 08:00:00  -44.0  56.0  -48.0   64.0 -117.0  146.0   92.0   37.0   
2012-03-31 09:00:00   45.0  77.0  136.0  158.0  -77.0  150.0   57.0   80.0   
2012-03-31 10:00:00  136.0   1.0  258.0  107.0   84.0  112.0  108.0  162.0   

                        48   186    142   107    163     68    239    164  \
date                                                                        
2012-03-31 06:00:00  -88.0 -54.0   -6.0  16.0  -70.

Shape of org. dataset after shift:  (1, 20)
## Avg. RMSE of recent predictions: 
[56.217963332648786]
## converted_stream_flag used:  False
## Shape of streaming_df:  (169, 20)
## Head of streaming_df:                         237    161    230     79    236    162    170    234  \
date                                                                          
2012-04-14 08:00:00  114.0  -90.0 -139.0   94.0  139.0   20.0  -72.0  -34.0   
2012-04-14 09:00:00  104.0    5.0   22.0  114.0  347.0   10.0   86.0   26.0   
2012-04-14 10:00:00  147.0 -132.0    9.0  -27.0  301.0  111.0   64.0  153.0   
2012-04-14 11:00:00  132.0  -32.0   29.0  188.0  193.0  -35.0  214.0   56.0   
2012-04-14 12:00:00  153.0  -91.0   17.0  221.0  275.0   29.0  118.0   97.0   

                        48    186    142    107   163     68    239   164  \
date                                                                        
2012-04-14 08:00:00   -4.0    1.0  -60.0  165.0   9.0   37.0  317.0 -40.0   
2012-04-14 0

Shape of org. dataset after shift:  (169, 20)
Shape of org. dataset after shift:  (1, 20)
## Avg. RMSE of recent predictions: 
[56.18890664109949]
## converted_stream_flag used:  False
## Shape of streaming_df:  (169, 20)
## Head of streaming_df:                         237    161    230     79   236    162    170    234  \
date                                                                         
2012-05-05 11:00:00  -77.0    3.0  -99.0  107.0  -1.0  -14.0   27.0  118.0   
2012-05-05 12:00:00 -128.0    1.0  131.0   83.0   1.0  -70.0   62.0   30.0   
2012-05-05 13:00:00 -175.0 -226.0  -32.0   66.0  47.0  -49.0 -127.0   -1.0   
2012-05-05 14:00:00  -13.0  -77.0   19.0   75.0  -8.0 -148.0  179.0  -26.0   
2012-05-05 15:00:00 -102.0   -5.0   13.0  147.0  37.0  -24.0  159.0  -77.0   

                       48   186    142    107   163    68   239   164    141  \
date                                                                           
2012-05-05 11:00:00 -24.0  93.0   16.0  145.0

Shape of org. dataset after shift:  (169, 20)
Shape of org. dataset after shift:  (1, 20)
## Avg. RMSE of recent predictions: 
[60.84799623479]
## converted_stream_flag used:  False
## Shape of streaming_df:  (169, 20)
## Head of streaming_df:                         237    161    230     79    236    162    170    234  \
date                                                                          
2012-05-26 14:00:00 -160.0  -88.0  -12.0   17.0 -136.0 -212.0 -209.0  104.0   
2012-05-26 15:00:00  -80.0  230.0  217.0   85.0  -14.0 -132.0 -135.0   90.0   
2012-05-26 16:00:00 -144.0  149.0  253.0  184.0  126.0 -340.0  -78.0  131.0   
2012-05-26 17:00:00 -159.0 -209.0  280.0    7.0   96.0 -412.0 -164.0  103.0   
2012-05-26 18:00:00   11.0 -257.0  344.0  -43.0  178.0 -341.0   13.0  224.0   

                        48    186    142    107    163     68    239   164  \
date                                                                         
2012-05-26 14:00:00   13.0    9.0 -127.0 -230

## Avg. RMSE of recent predictions: 
[57.31865315065353]
## converted_stream_flag used:  False
## Shape of streaming_df:  (169, 20)
## Head of streaming_df:                         237    161    230     79    236    162    170    234  \
date                                                                          
2012-06-09 16:00:00   78.0  165.0  212.0   68.0  -49.0   94.0  152.0  121.0   
2012-06-09 17:00:00  -14.0  230.0  287.0  -28.0   32.0   98.0  166.0   97.0   
2012-06-09 18:00:00   27.0  108.0  202.0   31.0  -50.0  181.0   13.0   63.0   
2012-06-09 19:00:00  -80.0   49.0  209.0  178.0 -141.0 -109.0   43.0  147.0   
2012-06-09 20:00:00 -372.0  -65.0  114.0 -180.0   59.0  -35.0  -54.0   84.0   

                        48    186    142   107    163     68    239    164  \
date                                                                         
2012-06-09 16:00:00  112.0   18.0   71.0  40.0  182.0   68.0   11.0   60.0   
2012-06-09 17:00:00   90.0  362.0  191.0 -36.0   65.0 

Shape of org. dataset after shift:  (169, 20)
Shape of org. dataset after shift:  (1, 20)
## Avg. RMSE of recent predictions: 
[71.3018936894146]
## converted_stream_flag used:  False
## Shape of streaming_df:  (169, 20)
## Head of streaming_df:                         237    161    230     79    236    162    170    234  \
date                                                                          
2012-06-30 19:00:00  -85.0    4.0  -50.0   -2.0 -145.0 -149.0  -83.0   15.0   
2012-06-30 20:00:00  -12.0 -124.0  102.0  367.0  -18.0   37.0 -154.0  102.0   
2012-06-30 21:00:00 -100.0    4.0  259.0   52.0 -107.0   -7.0 -197.0    0.0   
2012-06-30 22:00:00  -96.0 -105.0   17.0   27.0 -114.0 -118.0 -343.0    8.0   
2012-06-30 23:00:00  -98.0  -95.0   63.0   14.0  -35.0 -181.0 -362.0  -15.0   

                        48    186    142    107   163     68    239    164  \
date                                                                         
2012-06-30 19:00:00 -119.0  -57.0 -139.0   

Shape of org. dataset after shift:  (1, 20)
## Avg. RMSE of recent predictions: 
[61.85375616951668]
## converted_stream_flag used:  False
## Shape of streaming_df:  (169, 20)
## Head of streaming_df:                         237   161    230     79    236    162    170   234  \
date                                                                        
2012-07-14 21:00:00  -34.0 -27.0   -4.0  275.0 -288.0   37.0  -13.0  52.0   
2012-07-14 22:00:00 -118.0 -55.0  -89.0  232.0 -263.0 -112.0 -103.0 -89.0   
2012-07-14 23:00:00   -5.0 -39.0 -203.0  126.0  -82.0 -107.0  -63.0 -30.0   
2012-07-15 00:00:00   27.0 -54.0   58.0  362.0  -16.0 -120.0 -156.0  83.0   
2012-07-15 01:00:00   -1.0  13.0  168.0  318.0   -5.0   44.0   -9.0 -49.0   

                        48    186    142    107    163     68    239    164  \
date                                                                          
2012-07-14 21:00:00  -26.0  -12.0  253.0 -138.0   25.0  252.0 -214.0   45.0   
2012-07-14 22:00:00 -

Shape of org. dataset after shift:  (169, 20)
Shape of org. dataset after shift:  (1, 20)
## Avg. RMSE of recent predictions: 
[51.79386427960477]
## converted_stream_flag used:  False
## Shape of streaming_df:  (169, 20)
## Head of streaming_df:                        237   161   230     79   236   162    170   234     48  \
date                                                                           
2012-08-05 00:00:00  -4.0  55.0   0.0 -179.0  52.0  25.0  192.0  99.0  173.0   
2012-08-05 01:00:00  14.0 -85.0 -33.0 -248.0   2.0 -54.0   26.0  20.0  110.0   
2012-08-05 02:00:00  14.0  -1.0   4.0  -72.0  -7.0  18.0  -22.0  48.0   49.0   
2012-08-05 03:00:00 -23.0  -8.0  76.0   37.0  -8.0  27.0  -23.0   1.0   88.0   
2012-08-05 04:00:00  -7.0 -13.0 -23.0  -65.0  17.0  -7.0  -50.0 -25.0  -11.0   

                      186   142   107   163     68   239   164   141   249  \
date                                                                         
2012-08-05 00:00:00  42.0  50.0  82

#Current LearningRate:  0.001
Epoch 8/10
#Current LearningRate:  0.001
Epoch 9/10
#Current LearningRate:  0.001
Epoch 10/10
#Current LearningRate:  0.001
## Only training history & model are returned
## Predictions with retrained model are made..
>> Current Number of weight updates based on Switching Scheme:  1
>> Current Number of retrainings:  1
# Very first predictions are made for next 168 days..
## Assigned Dates are double checked..
# >> end of dataset is reached with preds of valid_set --> get last predictions with model
current valid date:  2012-08-11 00:00:00
current test date:  None
 ++ Number of days contained in train_set used for scaling:  731
selected years for training:  [Timestamp('2010-08-10 23:00:00'), Timestamp('2012-08-10 23:00:00')]
year_list given:  [Timestamp('2010-08-10 23:00:00'), Timestamp('2012-08-10 23:00:00'), Timestamp('2012-08-11 00:00:00'), None]
#### Make predictions model: complex_MLP_2H_128_32_batch512_drop03_clip_norm_shuffle_scaling_std_W168_20large

  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  keepdims=keepdims)
  arrmean, rcount, out=arrmean, casting='unsafe', subok=False)
  ret = ret.dtype.type(ret / rcount)


#### store results

In [23]:
#call function to store results:
df_save_PATH = 'media/...'
model_save_PATH = 'media/...'

#call function to store results:
_ = sv_files.store_retrained_drift_detection_results(results_tuple_switch_backshift2_weight_range_w_updater5__4_5_hdddm_diff[1], 
                                            results_tuple_switch_backshift2_weight_range_w_updater5__4_5_hdddm_diff[0], 
                                            df_save_PATH)



#call function to store models & history:
#call function to store model:
_ = sv_files.store_model_and_history_on_disk(results_tuple_switch_backshift2_weight_range_w_updater5__4_5_hdddm_diff[0], 
                                model_save_PATH, df_save_PATH)



## store dates:
#call function to store dates at which change was detected:
dates_df = sv_files.store_detected_change_dates(results_tuple_switch_backshift2_weight_range_w_updater5__4_5_hdddm_diff[2],
                                           df_save_PATH)



## store dates with dates as index:
_ = sv_files.store_detected_change_dates_with_index(results_tuple_switch_backshift2_weight_range_w_updater5__4_5_hdddm_diff[2],
                                           results_tuple_switch_backshift2_weight_range_w_updater5__4_5_hdddm_diff[4],
                                           switch_updating_dates_list=results_tuple_switch_backshift2_weight_range_w_updater5__4_5_hdddm_diff[5],
                                           df_store_PATH=df_save_PATH)



predictions stored on disk!
Saved model to disk
Save history_df on disk done
Saved model to disk
Save history_df on disk done
Saved model to disk
Save history_df on disk done
Saved model to disk
Save history_df on disk done
Saved model to disk
Save history_df on disk done
Saved model to disk
Save history_df on disk done
Saved model to disk
Save history_df on disk done
Saved model to disk
Save history_df on disk done
Saved model to disk
Save history_df on disk done
Saved model to disk
Save history_df on disk done
