$\textbf{*** This version uses the }\texttt{Dataloader, Asset, Portfolio}\textbf{ classes to load data}$

In [1]:
import sys
import importlib
sys.path.append('../')

from IPython.display import display, Math, Latex

# From data_preparation classes
from src.finance_ml.data_preparation.data_preparation import DataLoader

from src.finance_ml.indicators.indicators import Indicators

In [2]:
# Import required packages
import pandas as pd
import numpy as np

## Reading the Data File

In [3]:
# Defining time_index_col (must be the same column in all inputs) and keep_cols refering to the columns that will remain in the dataset
dataloader = DataLoader(time_index_col= 'DATE', 
                    keep_cols = ['VOLUME','OPEN', 'HIGHT', 'LOW', 'CLOSE', 'VW','TRANSACTIONS'])

Here we will select $N=10,000$ ticks from our database for simplicity. We also select the equity $\textbf{USDBRL}$ from  the data available.

In [4]:
# Example Files
fname_RUBEUR = 'FX/RUBEUR_2020-04-07_2022-04-06.parquet'
fname_USDBRL = 'FX/USDBRL_2020-04-07_2022-04-06.parquet'
fname_AAPL = 'equities/AAPL_2020-04-07_2022-04-06.parquet'

# No. of Records from example dataset
N = 10000

In [5]:
# Dataset chosen in this simulation
ticker = 'USDBRL'
fname = fname_USDBRL

In [6]:
# loading assets into to an unique df
df = dataloader.load_dataset({ticker:'../data/'+fname}).iloc[:N]
display(df)

Unnamed: 0_level_0,USDBRL_VOLUME,USDBRL_OPEN,USDBRL_HIGHT,USDBRL_LOW,USDBRL_CLOSE,USDBRL_VW,USDBRL_TRANSACTIONS
DATE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2020-04-07 18:54:00,2,5.22120,5.22125,5.22120,5.22125,5.2212,2
2020-04-07 18:55:00,3,5.22115,5.22125,5.22115,5.22125,5.2212,3
2020-04-07 18:59:00,2,5.22110,5.22115,5.22110,5.22115,5.2211,2
2020-04-07 19:02:00,1,5.22120,5.22120,5.22120,5.22120,5.2212,1
2020-04-07 19:03:00,3,5.22135,5.22135,5.22105,5.22105,5.2212,3
...,...,...,...,...,...,...,...
2020-04-27 14:14:00,10,5.69465,5.69465,5.69190,5.69190,5.6937,10
2020-04-27 14:15:00,9,5.69065,5.70145,5.69065,5.70081,5.6986,9
2020-04-27 14:16:00,10,5.70066,5.70295,5.70066,5.70270,5.7019,10
2020-04-27 14:17:00,9,5.70255,5.70255,5.69845,5.69845,5.7003,9


In [7]:
data_cols = ['DATE', 'OPEN', 'HIGHT', 'LOW', 'CLOSE', 'VW', 'VOLUME']

# Columns of indicators with default parameters
var_cols = ['CLOSE_returns',
       'CLOSE_log_returns', 'CLOSE_price_change',
       'CLOSE_pct_change', 'RA_5', 'RA_10', 'AMPL',
       'OPNCLS', 'MA_5', 'MA_10', 'V_MA_5', 'V_MA_10',
       'V_MA_20', 'KAMA_w10p(02,30)', 'PPO_w(26,12)',
       'PVO_w(26,12)s09', 'PVOH_w(26,12)s09',
       'PVOsgn_w(26,12)s09', 'ROC_w12', 'RSI_w14',
       'StRSI_w14s(03,03)', 'StRSId_w14s(03,03)',
       'StRSIk_w14s(03,03)', 'SO_w14s03', 'SOsgn_w14s03',
       'AOI_w(05,34)', 'TSI_w(25,13)',
       'UO_wi(07,14,28)wg(4.0,2.0,1.0)', 'WRI_14', 'ADI',
       'CMF_w20', 'EOM_w14', 'EMV_w14', 'FI_w13',
       'MFI_w14', 'NVI', 'OBV', 'VPT', 'VWAP_w14',
       'ADXP_w14', 'ADXN_w14', 'AROOND_w25', 'AROON_w25',
       'AROONU_w25', 'CCI_w20', 'DPO_w20', 'EMA_w14','SMA_w14',
       'ICHIA_w(09,26,52)', 'ICHIB_w(09,26,52)',
       'ICHIBL_w(09,26,52)', 'ICHICL_w(09,26,52)',
       'KST_r(10,15,20,30)_w(10,10,10,15)',
       'KSTD_r(10,15,20,30)_w(10,10,10,15)',
       'KSTS_r(10,15,20,30)_w(10,10,10,15)', 'MACD_w(26,12,09)',
       'MACDD_w(26,12,09)', 'MACDS_w(26,12,09)', 'MI_w(09,25)',
       'PSAR_s(0.02,0.2)', 'PSARD_s(0.02,0.2)',
       'PSARDI_s(0.02,0.2)', 'PSARU_s(0.02,0.2)',
       'PSARUI_s(0.02,0.2)', 'STC_w(50,23)c10s(03,03)',
       'TRIX_w15', 'VI_w14', 'VIN_w14', 'VIP_w14',
       'WMA_w09']

# For each of calculated column, we add the ticker code, as standardized by class 𝙳𝚊𝚝𝚊𝙻𝚘𝚊𝚍𝚎𝚛
var_cols = [ticker+'_'+col for col in var_cols]
norm_cols = [col + '_norm' for col in var_cols]

# Indicators

## Calculate Indicators

In [8]:
# Process data normalization
norm_data = True

# Select Indicator transformer
ind_processor = Indicators(ticker = ticker, norm_data = True)

# Calculate Indicators over input dataframe
df = ind_processor.fit_transform(df)

if (norm_data):
    df_norm = ind_processor.data_norm

In [9]:
# Display dataframe with Indicators
display(df[var_cols].dropna(axis=0))

Unnamed: 0_level_0,USDBRL_CLOSE_returns,USDBRL_CLOSE_log_returns,USDBRL_CLOSE_price_change,USDBRL_CLOSE_pct_change,USDBRL_RA_5,USDBRL_RA_10,USDBRL_AMPL,USDBRL_OPNCLS,USDBRL_MA_5,USDBRL_MA_10,...,"USDBRL_PSARD_s(0.02,0.2)","USDBRL_PSARDI_s(0.02,0.2)","USDBRL_PSARU_s(0.02,0.2)","USDBRL_PSARUI_s(0.02,0.2)","USDBRL_STC_w(50,23)c10s(03,03)",USDBRL_TRIX_w15,USDBRL_VI_w14,USDBRL_VIN_w14,USDBRL_VIP_w14,USDBRL_WMA_w09
DATE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2020-04-07 22:24:00,0.999981,-0.000019,-0.00010,-0.000019,0.000067,0.000091,0.00005,0.00000,5.221720,5.221650,...,5.221724,0.0,5.221347,0.0,98.743281,0.000123,-1.719069e-12,0.935484,0.935484,5.221693
2020-04-07 22:29:00,0.999952,-0.000048,-0.00025,-0.000048,0.000160,0.000118,0.00015,0.00010,5.221670,5.221635,...,5.221900,1.0,5.221347,0.0,55.106291,0.000120,-5.405405e-02,0.918919,0.864865,5.221641
2020-04-07 22:32:00,1.000010,0.000010,0.00005,0.000010,0.000178,0.000130,0.00010,-0.00010,5.221610,5.221625,...,5.221888,0.0,5.221347,0.0,27.553145,0.000110,2.631579e-02,0.868421,0.894737,5.221602
2020-04-07 22:34:00,0.999981,-0.000019,-0.00010,-0.000019,0.000172,0.000156,0.00005,0.00005,5.221520,5.221600,...,5.221876,0.0,5.221347,0.0,13.776573,0.000092,-2.564103e-02,0.923077,0.897436,5.221547
2020-04-07 22:39:00,1.000019,0.000019,0.00010,0.000019,0.000114,0.000163,0.00005,-0.00005,5.221460,5.221590,...,5.221865,0.0,5.221347,0.0,6.888286,0.000073,-5.263158e-02,0.921053,0.868421,5.221516
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2020-04-27 14:14:00,0.999447,-0.000553,-0.00315,-0.000553,0.001382,0.001399,0.00275,-0.00275,5.694090,5.694032,...,5.695328,0.0,5.691505,0.0,86.086922,0.003425,4.681230e-02,0.948729,0.995542,5.694116
2020-04-27 14:15:00,1.001565,0.001564,0.00891,0.001565,0.003239,0.002403,0.01080,0.01016,5.695542,5.694908,...,5.696850,1.0,5.691505,0.0,87.884251,0.003517,2.865738e-01,0.703927,0.990500,5.695428
2020-04-27 14:16:00,1.000332,0.000331,0.00189,0.000332,0.004477,0.003341,0.00229,0.00204,5.697122,5.695863,...,5.696850,0.0,5.690650,1.0,91.773046,0.003819,6.319056e-01,0.666356,1.298262,5.696947
2020-04-27 14:17:00,0.999255,-0.000746,-0.00425,-0.000745,0.004355,0.003434,0.00410,-0.00410,5.697782,5.696158,...,5.696850,0.0,5.690650,0.0,95.619386,0.004094,4.819960e-01,0.706549,1.188546,5.697456


In [10]:
# Display dataframe with Indicators of normalized data
if (norm_data):
    display(df_norm[norm_cols].dropna(axis=0))

Unnamed: 0_level_0,USDBRL_CLOSE_returns_norm,USDBRL_CLOSE_log_returns_norm,USDBRL_CLOSE_price_change_norm,USDBRL_CLOSE_pct_change_norm,USDBRL_RA_5_norm,USDBRL_RA_10_norm,USDBRL_AMPL_norm,USDBRL_OPNCLS_norm,USDBRL_MA_5_norm,USDBRL_MA_10_norm,...,"USDBRL_PSARD_s(0.02,0.2)_norm","USDBRL_PSARDI_s(0.02,0.2)_norm","USDBRL_PSARU_s(0.02,0.2)_norm","USDBRL_PSARUI_s(0.02,0.2)_norm","USDBRL_STC_w(50,23)c10s(03,03)_norm",USDBRL_TRIX_w15_norm,USDBRL_VI_w14_norm,USDBRL_VIN_w14_norm,USDBRL_VIP_w14_norm,USDBRL_WMA_w09_norm
DATE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2020-04-07 22:24:00,0.973025,-0.272904,-0.240658,-0.277836,-0.996515,-0.996279,-0.998575,-0.355998,-0.505602,-0.504227,...,-0.521042,-1.0,-0.497732,-1.0,0.974866,-0.045720,-0.238125,-0.118972,-0.468332,-0.504733
2020-04-07 22:29:00,0.972969,-0.275597,-0.243296,-0.280521,-0.991663,-0.994800,-0.995726,-0.354066,-0.505747,-0.504271,...,-0.520529,1.0,-0.497732,-1.0,0.102126,-0.045782,-0.266820,-0.136003,-0.518898,-0.504885
2020-04-07 22:32:00,0.973082,-0.270211,-0.238020,-0.275151,-0.990743,-0.994154,-0.997151,-0.357929,-0.505921,-0.504300,...,-0.520563,-1.0,-0.497732,-1.0,-0.448937,-0.045988,-0.224154,-0.187923,-0.497509,-0.504998
2020-04-07 22:34:00,0.973025,-0.272904,-0.240658,-0.277836,-0.991077,-0.992673,-0.998575,-0.355032,-0.506182,-0.504373,...,-0.520598,-1.0,-0.497732,-1.0,-0.724469,-0.046363,-0.251737,-0.131728,-0.495576,-0.505159
2020-04-07 22:39:00,0.973101,-0.269313,-0.237141,-0.274256,-0.994076,-0.992307,-0.998575,-0.356963,-0.506356,-0.504403,...,-0.520631,-1.0,-0.497732,-1.0,-0.862234,-0.046762,-0.266065,-0.133810,-0.516352,-0.505250
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2020-04-27 14:14:00,0.971972,-0.322972,-0.294294,-0.327744,-0.928213,-0.923805,-0.921652,-0.409117,0.863804,0.877409,...,0.857327,-1.0,0.876832,-1.0,0.721738,0.022060,-0.213273,-0.105354,-0.425328,0.869304
2020-04-27 14:15:00,0.976152,-0.124485,-0.082212,-0.129733,-0.831727,-0.868179,-0.692308,-0.159745,0.868014,0.879971,...,0.861757,1.0,0.876832,-1.0,0.757685,0.023967,-0.085992,-0.357050,-0.428938,0.873118
2020-04-27 14:16:00,0.973717,-0.240036,-0.205663,-0.245058,-0.767415,-0.816173,-0.934758,-0.316593,0.872594,0.882764,...,0.861757,-1.0,0.874333,1.0,0.835461,0.030157,0.097334,-0.395678,-0.208567,0.877537
2020-04-27 14:17:00,0.971593,-0.340995,-0.313638,-0.345703,-0.773743,-0.811001,-0.883191,-0.435194,0.874508,0.883627,...,0.861757,-1.0,0.874333,-1.0,0.912388,0.035801,0.017752,-0.354353,-0.287129,0.879019
