In [42]:
#
import os
import copy
import sys
import warnings
import matplotlib
from pathlib import Path
from typing import Dict

#
sys.path.append("./src/")
sys.path.append("./")
sys.path.append("../")
sys.path.append("../../")
sys.path.append("../../../")

# FinRL
from finrl.config_tickers import DOW_30_TICKER

# FinRL-Meta
from meta.data_processors.yahoofinance import Yahoofinance

#
import yfinance as yf

In [43]:
def config():
    #
    warnings.filterwarnings("ignore", category=UserWarning)  # TODO: zipline problem
    warnings.filterwarnings("ignore", category=DeprecationWarning)
    warnings.filterwarnings("ignore", category=FutureWarning)
    warnings.filterwarnings("ignore", category=RuntimeWarning)

    #
    matplotlib.use("Agg")


config()

In [44]:
#
import numpy as np
import pandas as pd
import tqdm

#
from common.utils import now_time
from configuration.settings import ProjectDir
from rl.data.CompanyInfo import CompanyInfo

In [45]:
#
prj_dir = ProjectDir(root=Path("/Users/zlapik/my-drive-zlapik/0-todo/ai-investing"))

#
_TRAIN_DATA_START = "2000-01-01"
_TRAIN_DATA_END = "2015-12-31"
_TEST_DATA_START = "2016-01-01"
_TEST_DATA_END = "2021-12-31"

#
TICKERS = copy.deepcopy(DOW_30_TICKER)
TICKERS.remove("DOW")  # TODO: I don't have all necessary data

#
prj_dir = ProjectDir(root=Path("/Users/zlapik/my-drive-zlapik/0-todo/ai-investing"))

In [46]:
TICKERS

['AXP',
 'AMGN',
 'AAPL',
 'BA',
 'CAT',
 'CSCO',
 'CVX',
 'GS',
 'HD',
 'HON',
 'IBM',
 'INTC',
 'JNJ',
 'KO',
 'JPM',
 'MCD',
 'MMM',
 'MRK',
 'MSFT',
 'NKE',
 'PG',
 'TRV',
 'UNH',
 'CRM',
 'VZ',
 'V',
 'WBA',
 'WMT',
 'DIS']

In [47]:
prj_dir.root

PosixPath('/Users/zlapik/my-drive-zlapik/0-todo/ai-investing')

In [64]:
base_cols = list(["date", "tic"])

data_cols = list(["open", "high", "low", "close", "volume"])

ratios_cols = list(
    [
        "operatingProfitMargin",
        "netProfitMargin",
        "returnOnAssets",
        "returnOnEquity",
        "currentRatio",
        "quickRatio",
        "cashRatio",
        "inventoryTurnover",
        "receivablesTurnover",
        "payablesTurnover",
        "debtRatio",
        "debtEquityRatio",
        "priceEarningsRatio",
        "priceBookValueRatio",
        "dividendYield",
    ]
)

print(base_cols)
print(data_cols)
print(ratios_cols)

['date', 'tic']
['open', 'high', 'low', 'close', 'volume']
['operatingProfitMargin', 'netProfitMargin', 'returnOnAssets', 'returnOnEquity', 'currentRatio', 'quickRatio', 'cashRatio', 'inventoryTurnover', 'receivablesTurnover', 'payablesTurnover', 'debtRatio', 'debtEquityRatio', 'priceEarningsRatio', 'priceBookValueRatio', 'dividendYield']


In [65]:
# Load All Initial Tickers Data
tickers_data: Dict[str, CompanyInfo] = dict()
for tic in TICKERS:
    data = dict(symbol=tic)
    for f in CompanyInfo.Names.list():
        tic_file = prj_dir.dataset.tickers.joinpath(tic).joinpath(f + ".csv")
        if tic_file.exists():
            data[f] = pd.read_csv(tic_file, index_col=0)
    tickers_data[tic] = CompanyInfo(**data)

In [56]:
for k, v in tickers_data.items():
    print(k)

AXP
AMGN
AAPL
BA
CAT
CSCO
CVX
GS
HD
HON
IBM
INTC
JNJ
KO
JPM
MCD
MMM
MRK
MSFT
NKE
PG
TRV
UNH
CRM
VZ
V
WBA
WMT
DIS


In [70]:
AAPL = tickers_data["AAPL"]

In [74]:
data = AAPL.data_detailed[data_cols]
data

Unnamed: 0,open,high,low,close,volume
2022-12-16,136.685000,137.650000,133.730000,134.510000,159808331.0
2022-12-15,141.110000,141.800000,136.030000,136.500000,98827150.0
2022-12-14,145.350000,146.660000,141.160000,143.210000,82234160.0
2022-12-13,149.500000,149.970000,144.240000,145.470000,93831797.0
2022-12-12,142.700000,144.500000,141.060000,144.490000,70422099.0
...,...,...,...,...,...
1980-12-18,0.118862,0.119420,0.118862,0.118862,73449600.0
1980-12-17,0.115513,0.116071,0.115513,0.115513,86441600.0
1980-12-16,0.113281,0.113281,0.112723,0.112723,105728000.0
1980-12-15,0.122210,0.122210,0.121652,0.121652,175884800.0


In [75]:
# AAPL.financial_ratios.loc[ratios_cols]
ratios = AAPL.financial_ratios.loc[ratios_cols].transpose()
ratios

Unnamed: 0,operatingProfitMargin,netProfitMargin,returnOnAssets,returnOnEquity,currentRatio,quickRatio,cashRatio,inventoryTurnover,receivablesTurnover,payablesTurnover,debtRatio,debtEquityRatio,priceEarningsRatio,priceBookValueRatio,dividendYield
2022-09,0.27615202005635303,0.22986044860559537,0.05874048560615725,0.40892406062519737,0.8793560286267226,0.7094075930952969,0.15356340351469652,10.523857662757784,1.4794525044311693,0.8118381034079388,0.8563535598361469,5.961536943479634,29.094279320957142,47.58940336355801,0.0015355904623124512
2022-06,0.2781615014645789,0.2343567304331055,0.057809930748210725,0.3345896363605073,0.8646292916926536,0.6966267045498294,0.21176072008808605,8.664457942205043,1.963898489654846,0.9737500775706928,0.8272213946103137,4.7877536269296295,29.44196647082215,39.40390742084252,0.001664451729880866
2022-03,0.30817862209338187,0.25709821336787353,0.07132224193097626,0.37107375480348376,0.9268438058788468,0.7600385858142235,0.22036264391253882,10.021794871794873,2.1426872246696034,1.0386659580122244,0.807794970655503,4.2027774892802565,28.628169101859946,42.49264880710499,0.0012552540688802
2021-12,0.3347291137197951,0.2793981201339304,0.09084684580695766,0.4814269031863427,1.0378115386179136,0.875262580129291,0.251528046945939,11.86215112321307,1.8994528987173003,0.9373335843576021,0.811296699030145,4.299324361897347,21.33929842928893,41.09324943592703,0.0012625512469071768
2021-09,0.28534069097888676,0.2465331094049904,0.05854952393433656,0.32574100491361546,1.0745531195957954,0.9096596297447422,0.2784485300563432,7.323100303951367,1.618452219158933,0.8799006628563081,0.8202574344305731,4.563512442542399,29.466787353938763,38.39416369699132,0.0015027116177395326
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1990-12,0.136317517159057,0.0898239331542823,0.04766731067684414,0.09740469872500161,2.3724703907488576,1.4854052037676024,0.6894525785694302,1.8793145654834762,1.9630931458699472,1.8396836808051762,0.5106261679282932,1.0434276098634392,8.398446875000001,3.2721927504692254,0.0027492822590102694
1990-09,0.10383280407650838,0.07274204268517835,0.033101455119803745,0.06808128283107548,2.340116845180136,1.1067185978578384,0.3648490749756573,1.6635724331926864,1.777267357920987,1.736347621843805,0.5137950734281009,1.0567459220348354,9.101845605964465,2.4786612999377935,0.003597195356710257
1990-06,0.13357268464243846,0.08777842907385698,0.041460460287246925,0.07950623838598354,2.637275904837073,1.5452700417183447,0.749802683504341,1.9811579307982186,1.9345145287030474,2.0720171981368685,0.47852569648728155,0.917640031855588,11.653658333611267,3.706154150163604,0.002399527368976417
1990-03,0.1479720695290447,0.09790521467835389,0.04748351767121807,0.08498291314720485,2.9312927805760043,1.6848766627921994,0.8340436523311378,1.80896,2.0434122647237403,2.397370653095844,0.4412580610296502,0.7897349925849507,9.67548076933564,3.2890021675116063,0.002646584793418943


In [89]:
merged_aapl = pd.merge(data, ratios, how="outer", left_index=True, right_index=True)

In [104]:
_d = merged_aapl.bfill(axis="rows")
_d = _d.ffill(axis="rows")
# _d[~_d.index.str.contains('\d{4}-\d{2}-\d{2}')]
clean_d = _d.drop(_d[~_d.index.str.contains("\d{4}-\d{2}-\d{2}")].index)