In [33]:
#
import os
import copy
import sys
import warnings
import matplotlib
from pathlib import Path
from typing import Dict

#
sys.path.append("./src/")
sys.path.append("./")
sys.path.append("../")
sys.path.append("../../")
sys.path.append("../../../")

# FinRL
from finrl.config_tickers import DOW_30_TICKER

# FinRL-Meta
from meta.data_processors.yahoofinance import Yahoofinance

#
import yfinance as yf

In [34]:
def config():
    #
    warnings.filterwarnings("ignore", category=UserWarning)  # TODO: zipline problem
    warnings.filterwarnings("ignore", category=DeprecationWarning)
    warnings.filterwarnings("ignore", category=FutureWarning)
    warnings.filterwarnings("ignore", category=RuntimeWarning)

    #
    matplotlib.use("Agg")


config()

In [35]:
#
import numpy as np
import pandas as pd
import tqdm

#
from common.utils import now_time
from configuration.settings import ProjectDir
from rl.data.CompanyInfo import CompanyInfo

In [36]:
#
prj_dir = ProjectDir(root=Path("/Users/zlapik/my-drive-zlapik/0-todo/ai-investing"))

#
_TRAIN_DATA_START = "2000-01-01"
_TRAIN_DATA_END = "2015-12-31"
_TEST_DATA_START = "2016-01-01"
_TEST_DATA_END = "2021-12-31"

#
TICKERS = copy.deepcopy(DOW_30_TICKER)
TICKERS.remove("DOW")  # TODO: I don't have all necessary data

#
prj_dir = ProjectDir(root=Path("/Users/zlapik/my-drive-zlapik/0-todo/ai-investing"))

In [37]:
TICKERS

['AXP',
 'AMGN',
 'AAPL',
 'BA',
 'CAT',
 'CSCO',
 'CVX',
 'GS',
 'HD',
 'HON',
 'IBM',
 'INTC',
 'JNJ',
 'KO',
 'JPM',
 'MCD',
 'MMM',
 'MRK',
 'MSFT',
 'NKE',
 'PG',
 'TRV',
 'UNH',
 'CRM',
 'VZ',
 'V',
 'WBA',
 'WMT',
 'DIS']

In [38]:
prj_dir.root

PosixPath('/Users/zlapik/my-drive-zlapik/0-todo/ai-investing')

In [39]:
base_cols = list(["date", "tic"])

data_cols = list(["open", "high", "low", "close", "volume"])

ratios_cols = list(
    [
        "operatingProfitMargin",
        "netProfitMargin",
        "returnOnAssets",
        "returnOnEquity",
        "currentRatio",
        "quickRatio",
        "cashRatio",
        "inventoryTurnover",
        "receivablesTurnover",
        "payablesTurnover",
        "debtRatio",
        "debtEquityRatio",
        "priceEarningsRatio",
        "priceBookValueRatio",
        "dividendYield",
    ]
)

print(base_cols)
print(data_cols)
print(ratios_cols)

['date', 'tic']
['open', 'high', 'low', 'close', 'volume']
['operatingProfitMargin', 'netProfitMargin', 'returnOnAssets', 'returnOnEquity', 'currentRatio', 'quickRatio', 'cashRatio', 'inventoryTurnover', 'receivablesTurnover', 'payablesTurnover', 'debtRatio', 'debtEquityRatio', 'priceEarningsRatio', 'priceBookValueRatio', 'dividendYield']


In [40]:
# Load All Initial Tickers Data
tickers_data: Dict[str, CompanyInfo] = dict()
for tic in TICKERS:
    data = dict(symbol=tic)
    for f in CompanyInfo.Names.list():
        tic_file = prj_dir.dataset.tickers.joinpath(tic).joinpath(f + ".csv")
        if tic_file.exists():
            data[f] = pd.read_csv(tic_file, index_col=0)
    tickers_data[tic] = CompanyInfo(**data)

In [41]:
dataset = pd.DataFrame()

# Merge tickers information into one pd.Dataframe
# for k, v in [("DIS", tickers_data["DIS"])]:
for k, v in tickers_data.items():
    # Prices
    data = v.data_detailed[data_cols]
    data.insert(0, "tic", k)

    # Fill before or forward
    data = data.fillna(method="bfill")
    data = data.fillna(method="ffill")

    # Ratios
    ratios = v.financial_ratios.loc[ratios_cols].transpose()

    # Fill 0, where Nan/np.inf
    ratios = ratios.fillna(0)
    ratios = ratios.replace(np.inf, 0)

    #
    merge = pd.merge(data, ratios, how="outer", left_index=True, right_index=True)
    filled = merge.fillna(method="bfill")
    filled = filled.fillna(method="ffill")
    clean = filled.drop(filled[~filled.index.str.contains("\d{4}-\d{2}-\d{2}")].index)
    dataset = pd.concat([clean, dataset])

In [45]:
dataset.isna().any().any() == False  # Can't be any Nan/np.inf values

True

In [47]:
dataset.to_csv(prj_dir.dataset.experiments.joinpath("experiment_same_bigger_fundamental.csv"))