In [8]:
import sys
import importlib
sys.path.append('../')

from src.finance_ml.data_preparation.data_preparation import DataLoader
from src.finance_ml.data_preparation.asset_class import Asset
from src.finance_ml.data_preparation.portfolio_class import Portfolio

### 1. Using DataLoader class

In [9]:
# Defining time_index_col (must be the same column in all inputs) and keep_cols refering to the columns that will remain in the dataset
dataloader = DataLoader(time_index_col= 'DATE', 
                    keep_cols = ['VOLUME','OPEN', 'CLOSE', 'LOW', 'TRANSACTIONS'])

In [10]:
# loading assets into to an unique df
dataloader.load_dataset({'GOLD':'../data/commodities/GLD_2020-04-07_2022-04-06.parquet',
                        'BITCOINS':'../data/cryptos/BTCUSD_2020-04-07_2022-04-06.parquet',
                        'SRET':'../data/real_estate/SRET_2020-04-07_2022-04-06.parquet'})

Unnamed: 0_level_0,GOLD_VOLUME,GOLD_OPEN,GOLD_CLOSE,GOLD_LOW,GOLD_TRANSACTIONS,BITCOINS_VOLUME,BITCOINS_OPEN,BITCOINS_CLOSE,BITCOINS_LOW,BITCOINS_TRANSACTIONS,SRET_VOLUME,SRET_OPEN,SRET_CLOSE,SRET_LOW,SRET_TRANSACTIONS
DATE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2020-04-07 15:43:00,21071.0,155.880,155.9260,155.8600,152.0,,,,,,,,,,
2020-04-07 15:44:00,23010.0,155.930,155.9000,155.9000,165.0,,,,,,,,,,
2020-04-07 15:45:00,31544.0,155.920,155.7350,155.7200,292.0,,,,,,,,,,
2020-04-07 15:46:00,42645.0,155.760,155.8400,155.7600,153.0,,,,,,,,,,
2020-04-07 15:47:00,3735.0,155.805,155.8100,155.7950,47.0,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-04-06 23:54:00,204.0,179.540,179.5399,179.5399,3.0,37.006803,43146.71,43144.08,43123.70,683.0,,,,,
2022-04-06 23:55:00,,,,,,65.685080,43144.08,43188.59,43138.37,1232.0,,,,,
2022-04-06 23:56:00,,,,,,25.763792,43188.44,43111.80,43111.80,797.0,,,,,
2022-04-06 23:57:00,,,,,,21.241159,43122.93,43149.90,43115.26,647.0,,,,,


In [11]:
# Some descriptive information about the tickers
dataloader.tablefreq

Unnamed: 0_level_0,GOLD_time,GOLD_time,GOLD_time,BITCOINS_time,BITCOINS_time,BITCOINS_time,SRET_time,SRET_time,SRET_time
Unnamed: 0_level_1,GOLD_min,GOLD_max,GOLD_count,BITCOINS_min,BITCOINS_max,BITCOINS_count,SRET_min,SRET_max,SRET_count
date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2
2020-04-07,15:43:00,23:37:00,298.0,17:48:00,23:59:00,372.0,17:25:00,19:59:00,100.0
2020-04-08,09:36:00,23:58:00,514.0,00:00:00,23:59:00,1440.0,13:30:00,21:56:00,293.0
2020-04-09,08:00:00,23:58:00,591.0,00:00:00,23:59:00,1440.0,12:58:00,19:59:00,344.0
2020-04-10,,,,00:00:00,23:59:00,1440.0,,,
2020-04-11,,,,00:00:00,23:59:00,1440.0,,,
...,...,...,...,...,...,...,...,...,...
2022-04-02,,,,00:00:00,23:59:00,1440.0,,,
2022-04-03,,,,00:00:00,23:59:00,1440.0,,,
2022-04-04,08:01:00,23:59:00,513.0,00:00:00,23:59:00,1440.0,13:30:00,19:59:00,161.0
2022-04-05,08:00:00,23:58:00,489.0,00:00:00,23:58:00,1439.0,13:30:00,21:25:00,170.0


### Using Asset and Portfolio class


In [12]:
# loading an asset
data = dataloader.load_dataset({'GOLD':'../data/commodities/GLD_2020-04-07_2022-04-06.parquet'})
asset1 = Asset(ticker="GOLD", data= data, index_name=dataloader.time_index_col)
asset1

Asset(ticker='GOLD', index_name='DATE')

In [13]:
asset1.data.head()

Unnamed: 0_level_0,GOLD_VOLUME,GOLD_OPEN,GOLD_CLOSE,GOLD_LOW,GOLD_TRANSACTIONS
DATE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2020-04-07 15:43:00,21071.0,155.88,155.926,155.86,152.0
2020-04-07 15:44:00,23010.0,155.93,155.9,155.9,165.0
2020-04-07 15:45:00,31544.0,155.92,155.735,155.72,292.0
2020-04-07 15:46:00,42645.0,155.76,155.84,155.76,153.0
2020-04-07 15:47:00,3735.0,155.805,155.81,155.795,47.0


In [14]:
asset1.data.describe()

Unnamed: 0,GOLD_VOLUME,GOLD_OPEN,GOLD_CLOSE,GOLD_LOW,GOLD_TRANSACTIONS
count,279478.0,279478.0,279478.0,279478.0,279472.0
mean,17791.8,171.28384,171.283486,171.256049,134.606773
std,34896.77,7.415213,7.414995,7.411597,193.52685
min,0.0,154.575,154.5,154.5,1.0
25%,2184.0,166.13,166.13,166.1,17.0
50%,8332.0,169.82,169.815,169.7933,80.0
75%,20459.0,177.048125,177.045,177.02,173.0
max,2766136.0,194.74,194.88,194.74,9355.0


In [15]:
# loading another asset
data = dataloader.load_dataset({'BITCOINS':'../data/cryptos/BTCUSD_2020-04-07_2022-04-06.parquet'})
asset2 = Asset(ticker="BITCOINS", data= data, index_name=dataloader.time_index_col)
asset2

Asset(ticker='BITCOINS', index_name='DATE')

In [16]:
# Using Portfolio class
porfolio1 = Portfolio([asset1, asset2])

In [17]:
porfolio1

Portfolio(assets=[Asset(ticker='GOLD', index_name='DATE'), Asset(ticker='BITCOINS', index_name='DATE')])

In [18]:
data = dataloader.load_dataset({'SRET':'../data/real_estate/SRET_2020-04-07_2022-04-06.parquet'})
asset3 = Asset(ticker="SRET", data= data, index_name=dataloader.time_index_col)
porfolio1.add_asset(asset3)


In [19]:
porfolio1

Portfolio(assets=[Asset(ticker='GOLD', index_name='DATE'), Asset(ticker='BITCOINS', index_name='DATE'), Asset(ticker='SRET', index_name='DATE')])

In [20]:
unified_df_porfolio1= porfolio1.join_data(['GOLD','BITCOINS', 'SRET'])

In [21]:
unified_df_porfolio1.head()

Unnamed: 0_level_0,GOLD_VOLUME,GOLD_OPEN,GOLD_CLOSE,GOLD_LOW,GOLD_TRANSACTIONS,BITCOINS_VOLUME,BITCOINS_OPEN,BITCOINS_CLOSE,BITCOINS_LOW,BITCOINS_TRANSACTIONS,SRET_VOLUME,SRET_OPEN,SRET_CLOSE,SRET_LOW,SRET_TRANSACTIONS
DATE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2020-04-07 15:43:00,21071.0,155.88,155.926,155.86,152.0,,,,,,,,,,
2020-04-07 15:44:00,23010.0,155.93,155.9,155.9,165.0,,,,,,,,,,
2020-04-07 15:45:00,31544.0,155.92,155.735,155.72,292.0,,,,,,,,,,
2020-04-07 15:46:00,42645.0,155.76,155.84,155.76,153.0,,,,,,,,,,
2020-04-07 15:47:00,3735.0,155.805,155.81,155.795,47.0,,,,,,,,,,
