In [1]:
# set env via .env file
import sys
import pandas as pd
import datetime
import time
import munch
import shared
import config
import model
import provider_yfinance as provider


In [2]:
print('init> initialize environment')
config.print_env()

init> initialize environment
TF version: 2.1.0
Keras version: 2.2.4-tf
Executing op RandomUniform in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op Sub in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op Mul in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op Add in device /job:localhost/replica:0/task:0/device:CPU:0
Instructions for updating:
Use `tf.config.list_physical_devices('GPU')` instead.
Is there a GPU available: False
Is the Tensor on GPU #0: False
Device name: /job:localhost/replica:0/task:0/device:CPU:0
Eager Execution enabled: True


In [3]:
cfg = config.get_config('^GDAXI', True)
config.overwrite_end_dt(cfg, '2020-02-03')
config.save_config(cfg)

config> current directory:/mnt/d/notebooks/sandbox
config> data period: from '2018-03-07' to '2020-02-05'
config> download period: from '2017-12-20' to '2020-02-05'
shared> parsing stocks from web '^GDAXI' ...
config> benchmarks: '['000001.SS', 'AUDUSD=X', 'BTCUSD=X', 'CNY=X', 'ETHUSD=X', 'EURCAD=X', 'EURCHF=X', 'EURGBP=X', 'EURHUF=X', 'EURJPY=X', 'EURJPY=X', 'EURSEK=X', 'EURUSD=X', 'GBPJPY=X', 'GBPUSD=X', 'GC=F', 'HKD=X', 'IDR=X', 'IMOEX.ME', 'INR=X', 'JPY=X', 'KW=F', 'LC=F', 'LH=F', 'MXN=X', 'MYR=X', 'NZDUSD=X', 'PA=F', 'PHP=X', 'PL=F', 'RUB=X', 'S=F', 'SGD=X', 'SI=F', 'SM=F', 'THB=X', 'ZAR=X', '^AORD', '^AXJO', '^BFX', '^BSESN', '^BUK100P', '^BVSP', '^DJI', '^FCHI', '^FTSE', '^GDAXI', '^GSPC', '^GSPTSE', '^HSI', '^IPSA', '^IXIC', '^JKSE', '^JN0U.JO', '^KLSE', '^KS11', '^MERV', '^MXX', '^N100', '^N225', '^NYA', '^NZ50', '^RUT', '^STI', '^STOXX50E', '^TA125.TA', '^TWII', '^VIX', '^XAX']'
config> stocks: '['1COV.DE', 'ADS.DE', 'ALV.DE', 'BAS.DE', 'BAYN.DE', 'BEI.DE', 'BMW.DE', 'CON.DE'

In [4]:
%%time
# total: 1h
# compact: 5m

start_ts = time.time()
print(f'download> start downloading data {cfg.prepare.data_end_dt} ...')

cfg_stocks, data_stocks = provider.load_stocks(cfg, compact=True)
cfg_benchmarks, data_benchmarks = provider.load_benchmarks(cfg, compact=True)

print(f'download> download finished, duration: {time.time() - start_ts:.2f} s')

download> start downloading data 2020-02-03 ...
download> download finished, duration: 1.19 s
CPU times: user 578 ms, sys: 203 ms, total: 781 ms
Wall time: 1.19 s


In [5]:
%%time 
# 5 s

start_ts = time.time()
print(f'prepare> preparing stock and benchmark data ...')

cfg_stocks, data_stocks = provider.load_stocks(cfg)
cfg_benchmarks, data_benchmarks = provider.load_benchmarks(cfg)

prep_stocks = provider.prepare_stocks(cfg, data_stocks)
prep_benchmarks = provider.prepare_benchmarks(cfg, data_benchmarks)

enc_stocks = provider.encode_stocks(cfg, prep_stocks)
enc_benchmarks = provider.encode_benchmarks(cfg, prep_benchmarks, prep_stocks)

prepare> preparing stock and benchmark data ...
shared> prepared stock data loaded from cache: '/mnt/d/notebooks/sandbox/cache/20200203//stocks_prep.pkl'
shared> prepared benchmark data loaded from cache: '/mnt/d/notebooks/sandbox/cache/20200203//benchmarks_prep.pkl'
shared> encoded stock data loaded from cache: '/mnt/d/notebooks/sandbox/cache/20200203//stocks_enc.pkl'
shared> encoded benchmark data loaded from cache: '/mnt/d/notebooks/sandbox/cache/20200203//benchmarks_enc.pkl'
CPU times: user 1.05 s, sys: 219 ms, total: 1.27 s
Wall time: 1.32 s


In [6]:
%%time 
# total: 20min - 50min
# compact: 20min

print(f'prepare> preparing submodel data ...')
for submodel_settings in cfg.train.settings:
    print(f"sm-{submodel_settings.id}> preparing submodel data ...")
    model_data = provider.prepare_submodel_data(cfg, submodel_settings, enc_stocks, enc_benchmarks)
    # update num_features setting (informational)
    submodel_settings.num_features = len(model_data.X[0][0][0][0])
config.save_config(cfg)

prepare> preparing submodel data ...
sm-lookback_3-label_1> preparing submodel data ...
sm-lookback_3-label_1> submodel data loaded from cache: '/mnt/d/notebooks/sandbox/cache/20200203/lookback_3-label_1/submodel_data.pkl'
sm-lookback_3-label_2> preparing submodel data ...
sm-lookback_3-label_2> submodel data loaded from cache: '/mnt/d/notebooks/sandbox/cache/20200203/lookback_3-label_2/submodel_data.pkl'
sm-lookback_5-label_1> preparing submodel data ...
sm-lookback_5-label_1> submodel data loaded from cache: '/mnt/d/notebooks/sandbox/cache/20200203/lookback_5-label_1/submodel_data.pkl'
sm-lookback_21-label_5> preparing submodel data ...
sm-lookback_21-label_5> submodel data loaded from cache: '/mnt/d/notebooks/sandbox/cache/20200203/lookback_21-label_5/submodel_data.pkl'
sm-lookback_100-label_7> preparing submodel data ...
sm-lookback_100-label_7> submodel data loaded from cache: '/mnt/d/notebooks/sandbox/cache/20200203/lookback_100-label_7/submodel_data.pkl'
config> saved config to 

In [7]:
duration = time.time() - start_ts
print(f'prepare> preparation finished, duration: {duration:.2f} s')

prepare> preparation finished, duration: 14.35 s


In [None]:
%%time

start_ts = time.time()
print(f'train> training started ...')

print('train> list all submodule settings')
[(i, s.id) for i, s in enumerate(cfg.train.settings)]

model.train_full(cfg, start_settings_idx=0)

duration = time.time() - start_ts
print(f'train> training finished, duration: {duration:.2f} s')

train> training started ...
train> list all submodule settings
sm-lookback_3-label_1> training submodel ...
sm-lookback_3-label_1> submodel data loaded from cache: '/mnt/d/notebooks/sandbox/cache/20200203/lookback_3-label_1/submodel_data.pkl'
Executing op Fill in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op VarHandleOp in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op VarIsInitializedOp in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op LogicalNot in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op Assert in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op AssignVariableOp in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op VarHandleOp in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op RandomStandardNormal in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op Qr in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op DiagPart in device /job:l

  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)


Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
batch_normalization (BatchNo (None, 3, 1319)           5276      
_________________________________________________________________
masking (Masking)            (None, 3, 1319)           0         
_________________________________________________________________
lstm (LSTM)                  (None, 3, 256)            1613824   
_________________________________________________________________
lstm_1 (LSTM)                (None, 3, 256)            525312    
_________________________________________________________________
lstm_2 (LSTM)                (None, 256)               525312    
_________________________________________________________________
dense (Dense)                (None, 1)                 257       
Total params: 2,669,981
Trainable params: 2,667,343
Non-trainable params: 2,638
__________________________________________

In [None]:
import os

all_subdirs = [os.path.join('/mnt/d/notebooks/sandbox/model/', d) for d in os.listdir('/mnt/d/notebooks/sandbox/model/') if os.path.isdir(os.path.join('/mnt/d/notebooks/sandbox/model/', d)) and not d.startswith('.')]
latest_subdir = max(all_subdirs, key=os.path.getmtime)
latest_subdir

In [None]:
%%time

start_ts = time.time()
print(f'predict> prediction started ...')

predictions = model.predict(cfg)

duration = time.time() - start_ts
print(f'predict> prediction finished, duration: {duration:.2f} s')

In [None]:
os.listdir('/mnt/d/notebooks/sandbox/model/')