In [1]:
import numpy as np
import pandas as pd
from importlib import reload

import munch
import datetime

import shared
import config
import provider_yfinance as provider

reload(shared)
reload(config)
reload(provider)

cfg = config.get_config('^GDAXI')

# overwrite download_end_dt: use cached data
#config.overwrite_end_dt(cfg, '2019-12-19')
#config.save_config(cfg)

config> created config from file: './config.json'
config> config
        - base:
            - config_file_path: /mnt/c/notebooks/sandbox/config.json
        - datasets:
            - stocks: 30
            - benchmarks: 69
        - prepare:
            - data_start_dt: 2018-02-13
            - data_end_dt: 2020-01-07
            - cache_dir: /mnt/c/notebooks/sandbox/cache/20200107/
        - train:            
            - window_trading_days: [3, 5, 21, 35, 50]
            - lag_trading_days: [1, 2, 3, 4, 5]
            - label_max_high_weight: 3.0
            - label_max_close_weight: 1.0
            - settings: 12
        - model:
            - max_samples: 40
            - batch_size: 32
            - lstm_hidden_size: 256
            - early_stopping_patience: 10
            - validaion_monitor: val_mean_squared_error
            - max_epochs: 1000
            - base_dir: /mnt/c/notebooks/sandbox/model/20200107/            
        


In [2]:
%%time 
# 344 ms
cfg_stocks, data_stocks = provider.load_stocks(cfg)
cfg_benchmarks, data_benchmarks = provider.load_benchmarks(cfg)

CPU times: user 188 ms, sys: 78.1 ms, total: 266 ms
Wall time: 307 ms


In [3]:
data_benchmarks.tickers['^GDAXI'].history.head()

Unnamed: 0_level_0,open,high,low,close,volume,dividends,stock_splits
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1987-12-30,1005.19,1005.19,1005.19,1005.19,0,0,0
1988-01-04,956.49,956.49,956.49,956.49,0,0,0
1988-01-05,996.1,996.1,996.1,996.1,0,0,0
1988-01-06,1006.01,1006.01,1006.01,1006.01,0,0,0
1988-01-07,1014.47,1014.47,1014.47,1014.47,0,0,0


In [4]:
%%time 
# total: 2.55 s
prep_stocks = provider.prepare_stocks(cfg, data_stocks)
prep_benchmarks = provider.prepare_benchmarks(cfg, data_benchmarks)

CPU times: user 93.8 ms, sys: 93.8 ms, total: 188 ms
Wall time: 182 ms


In [5]:
prep_benchmarks['^GDAXI'].head()

Unnamed: 0_level_0,open,high,low,close,volume,diff_prev,diff_oc,diff_hl,rolling_3d,rolling_5d,rolling_21d,rolling_35d,rolling_50d,lag_1d,lag_2d,lag_3d,lag_4d,lag_5d
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
2018-02-07,12478.68,12651.28,12414.84,12590.43,153471100,,111.75,236.44,,,,,,,,,,
2018-02-08,12506.18,12541.32,12187.45,12260.29,153991900,-84.25,-245.89,353.87,,,,,,-0.026571,,,,
2018-02-09,12263.1,12296.18,12003.36,12107.48,175812900,2.81,-155.62,292.82,12319.4,,,,,-0.012542,-0.039114,,,
2018-02-12,12238.63,12379.16,12222.34,12282.77,120024900,131.15,44.14,156.82,12216.846667,,,,,0.014374,0.001832,-0.02474,,
2018-02-13,12282.76,12300.63,12196.22,12196.5,94601100,-0.01,-86.26,104.41,12195.583333,12287.494,,,,-0.007048,0.007326,-0.005217,-0.031788,


In [6]:
%%time
# total: 1.2 s
enc_stocks = provider.encode_stocks(cfg, prep_stocks)
enc_benchmarks = provider.encode_benchmarks(cfg, prep_benchmarks, prep_stocks)

CPU times: user 31.2 ms, sys: 62.5 ms, total: 93.8 ms
Wall time: 77.7 ms


In [7]:
enc_benchmarks['^GDAXI'].head()

Unnamed: 0_level_0,open,high,low,close,volume,diff_prev,diff_oc,diff_hl,rolling_3d,rolling_5d,rolling_21d,rolling_35d,rolling_50d,lag_1d,lag_2d,lag_3d,lag_4d,lag_5d,scaled_volume
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
2018-02-07,12478.68,12651.28,12414.84,12590.43,153471100.0,-84.25,111.75,236.44,12319.4,12287.494,12327.277143,12279.01,12285.1056,-0.026571,-0.039114,-0.02474,-0.031788,-0.020159,0.383519
2018-02-08,12506.18,12541.32,12187.45,12260.29,153991900.0,-84.25,-245.89,353.87,12319.4,12287.494,12327.277143,12279.01,12285.1056,-0.026571,-0.039114,-0.02474,-0.031788,-0.020159,0.384821
2018-02-09,12263.1,12296.18,12003.36,12107.48,175812900.0,2.81,-155.62,292.82,12319.4,12287.494,12327.277143,12279.01,12285.1056,-0.012542,-0.039114,-0.02474,-0.031788,-0.020159,0.439351
2018-02-12,12238.63,12379.16,12222.34,12282.77,120024900.0,131.15,44.14,156.82,12216.846667,12287.494,12327.277143,12279.01,12285.1056,0.014374,0.001832,-0.02474,-0.031788,-0.020159,0.299938
2018-02-13,12282.76,12300.63,12196.22,12196.5,94601100.0,-0.01,-86.26,104.41,12195.583333,12287.494,12327.277143,12279.01,12285.1056,-0.007048,0.007326,-0.005217,-0.031788,-0.020159,0.236405


In [8]:
%%time 
# total: 20min - 50min

for submodel_settings in cfg.train.settings:
    print(f"sm-{submodel_settings.id}> preparing submodel data ...")
    model_data = provider.prepare_submodel_data(cfg, submodel_settings, enc_stocks, enc_benchmarks)
    # update num_features setting (informational)
    submodel_settings.num_features = len(model_data.X[0][0][0][0])
config.save_config(cfg)

sm-lookback_3-label_1> preparing submodel data ...
sm-lookback_3-label_2> preparing submodel data ...
sm-lookback_5-label_1> preparing submodel data ...
sm-lookback_5-label_3> preparing submodel data ...
sm-lookback_7-label_1> preparing submodel data ...
sm-lookback_7-label_3> preparing submodel data ...
sm-lookback_10-label_3> preparing submodel data ...
sm-lookback_14-label_3> preparing submodel data ...
sm-lookback_21-label_5> preparing submodel data ...
sm-lookback_36-label_7> preparing submodel data ...
sm-lookback_64-label_7> preparing submodel data ...
sm-lookback_100-label_7> preparing submodel data ...
config> saved config to '/mnt/c/notebooks/sandbox/config.json'
CPU times: user 1.72 s, sys: 8.84 s, total: 10.6 s
Wall time: 14.7 s


In [9]:
# rel_benchmarks_data = provider.generate_relative_benchmarks_data(cfg, cfg.train.settings[0], enc_benchmarks)

In [10]:
submodel_settings = cfg.train.settings[11]
model_data = provider.prepare_submodel_data(cfg, submodel_settings)
model_data

Unnamed: 0,ticker,date,X,y
0,1COV.DE,2018-12-28,[[[[86.54078674 88.04163361 85.76615906 ... -1...,9.525539
1,1COV.DE,2019-01-02,[[[[87.59746552 89.23001862 86.57407379 ... -2...,10.246101
2,1COV.DE,2019-01-03,[[[[87.76258087 88.10454559 84.68490601 ... -0...,10.515388
3,1COV.DE,2019-01-04,[[[[74.77519226 75.09799194 70.41733551 ... -1...,4.323265
4,1COV.DE,2019-01-07,[[[[71.70030212 75.38854218 71.30596161 ... -0...,4.952447
...,...,...,...,...
3894,WDI.DE,2019-12-20,[[[[42.65335083 43.93723297 41.13171768 ... -3...,9.403233
3895,WDI.DE,2019-12-20,[[[[42.65335083 43.93723297 41.13171768 ... -3...,9.403233
3896,WDI.DE,2019-12-20,[[[[42.65335083 43.93723297 41.13171768 ... -3...,9.403233
3897,WDI.DE,2019-12-20,[[[[42.65335083 43.93723297 41.13171768 ... -3...,9.403233


In [11]:
ticker_name = '1COV.DE'
ticker_data = enc_stocks[ticker_name] 
samples_iter = provider.generate_samples_iterator(cfg, submodel_settings, ticker_data)
samples_iter[:10]

[Munch({'seq_nr': 1, 'lookback_start_date': Timestamp('2018-08-06 00:00:00'), 'lookback_end_date': Timestamp('2018-12-28 00:00:00'), 'label_start_date': Timestamp('2019-01-02 00:00:00'), 'label_end_date': Timestamp('2019-01-10 00:00:00')}),
 Munch({'seq_nr': 2, 'lookback_start_date': Timestamp('2018-08-07 00:00:00'), 'lookback_end_date': Timestamp('2019-01-02 00:00:00'), 'label_start_date': Timestamp('2019-01-03 00:00:00'), 'label_end_date': Timestamp('2019-01-11 00:00:00')}),
 Munch({'seq_nr': 3, 'lookback_start_date': Timestamp('2018-08-08 00:00:00'), 'lookback_end_date': Timestamp('2019-01-03 00:00:00'), 'label_start_date': Timestamp('2019-01-04 00:00:00'), 'label_end_date': Timestamp('2019-01-14 00:00:00')}),
 Munch({'seq_nr': 4, 'lookback_start_date': Timestamp('2018-08-09 00:00:00'), 'lookback_end_date': Timestamp('2019-01-04 00:00:00'), 'label_start_date': Timestamp('2019-01-07 00:00:00'), 'label_end_date': Timestamp('2019-01-15 00:00:00')}),
 Munch({'seq_nr': 5, 'lookback_start

In [12]:
model_data

Unnamed: 0,ticker,date,X,y
0,1COV.DE,2018-12-28,[[[[86.54078674 88.04163361 85.76615906 ... -1...,9.525539
1,1COV.DE,2019-01-02,[[[[87.59746552 89.23001862 86.57407379 ... -2...,10.246101
2,1COV.DE,2019-01-03,[[[[87.76258087 88.10454559 84.68490601 ... -0...,10.515388
3,1COV.DE,2019-01-04,[[[[74.77519226 75.09799194 70.41733551 ... -1...,4.323265
4,1COV.DE,2019-01-07,[[[[71.70030212 75.38854218 71.30596161 ... -0...,4.952447
...,...,...,...,...
3894,WDI.DE,2019-12-20,[[[[42.65335083 43.93723297 41.13171768 ... -3...,9.403233
3895,WDI.DE,2019-12-20,[[[[42.65335083 43.93723297 41.13171768 ... -3...,9.403233
3896,WDI.DE,2019-12-20,[[[[42.65335083 43.93723297 41.13171768 ... -3...,9.403233
3897,WDI.DE,2019-12-20,[[[[42.65335083 43.93723297 41.13171768 ... -3...,9.403233


In [13]:
data_stocks.tickers['1COV.DE'].history.tail(10).close

date
2019-12-19    41.85
2019-12-20    41.77
2019-12-23    41.81
2019-12-27    41.82
2019-12-30    41.45
2020-01-02    42.18
2020-01-03    41.90
2020-01-06    39.89
2020-01-07    40.13
2020-01-08    40.80
Name: close, dtype: float64

In [14]:
submodel_settings

Munch({'ensemble_weight': 1.0, 'float_precision': 100.0, 'id': 'lookback_100-label_7', 'label_days': 7, 'lookback_days': 100, 'max_samples': 26, 'prev_year_samples_after': 12, 'prev_year_samples_before': 5, 'sample_manifolds': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 4, 5, 6, 7], 'num_features': 1319})

In [15]:
si = samples_iter[0]
base_price = ticker_data.loc[si.lookback_end_date].close
label_data = ticker_data.iloc[(ticker_data.index >= si.label_start_date) & (ticker_data.index <= si.label_end_date)]    
lookback_data = ticker_data.iloc[(ticker_data.index >= si.lookback_start_date) & (ticker_data.index <= si.lookback_end_date)]

y = ((label_data.high.max() * cfg.train.label_max_high_weight) + (label_data.close.max() * cfg.train.label_max_close_weight)) / (cfg.train.label_max_close_weight + cfg.train.label_max_high_weight)
y = (y - base_price) / base_price * submodel_settings.float_precision
y    

9.525538610505919

In [16]:
((label_data.high.max() * cfg.train.label_max_high_weight) + (label_data.close.max() * cfg.train.label_max_close_weight)) / (cfg.train.label_max_close_weight + cfg.train.label_max_high_weight)

45.245

In [17]:
label_data.close.max()

44.75

In [18]:
base_price

41.31