# Feature Database

In [1]:
import numpy as np
import pandas as pd
from datetime import datetime
import yfinance as yf
from fredapi import Fred

## Common Features

In [2]:
start_date = datetime(2020, 1, 1)
end_date = datetime(2024, 8, 31)
NFLX_STOCK = yf.download("NFLX", start_date, end_date)
NFLX_STOCK.columns = NFLX_STOCK.columns.droplevel(1)
NFLX_STOCK.head()

[*********************100%***********************]  1 of 1 completed


Price,Adj Close,Close,High,Low,Open,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2020-01-02,329.809998,329.809998,329.980011,324.779999,326.100006,4485800
2020-01-03,325.899994,325.899994,329.859985,325.529999,326.779999,3806900
2020-01-06,335.829987,335.829987,336.359985,321.200012,323.119995,5663100
2020-01-07,330.75,330.75,336.700012,330.299988,336.470001,4703200
2020-01-08,339.26001,339.26001,342.700012,331.049988,331.48999,7104500


## FRED

In [3]:
fred = Fred(api_key='7b9f7e9c637e4f731da1f888d14b6037')
varList = ['T10Y3M', 'DGS10', 'OBMMIJUMBO30YF',  # term premium 10yr-3mon, 30 yr mortgage jumbo loan
           'DEXUSEU', 'DEXJPUS', 'DEXUSUK', # spot exchange rates to EUR, JPY, GBP 
           'CBBTCUSD', 'CBETHUSD',  # cryptocurrencies
               'T10YIE', 'DCOILBRENTEU', # breakeven inflation + brent oil price 
               'VIXCLS', # implied volatilities
               'DAAA', 'DBAA', # corporate bond yield
              'AMERIBOR', 'T5YIE', 'BAMLH0A0HYM2','BAMLH0A0HYM2EY', 'DGS1', 'DCOILWTICO', 
                              'DHHNGSP'] 

SP500 = fred.get_series('SP500')
SP500.name = 'SP500'
df_fred = SP500

# merge data series
for i in range(0, len(varList)):
    data = fred.get_series(varList[i])
    data.name = varList[i]
    df_fred = pd.merge(df_fred, data, left_index=True, right_index=True)
    
df_fred.index.name = "Date"
df_fred = df_fred[(df_fred.index >= start_date) & (df_fred.index <= end_date)]

In [4]:
df_fred.head()

Unnamed: 0_level_0,SP500,T10Y3M,DGS10,OBMMIJUMBO30YF,DEXUSEU,DEXJPUS,DEXUSUK,CBBTCUSD,CBETHUSD,T10YIE,...,VIXCLS,DAAA,DBAA,AMERIBOR,T5YIE,BAMLH0A0HYM2,BAMLH0A0HYM2EY,DGS1,DCOILWTICO,DHHNGSP
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2020-01-01,,,,,,,,7174.32,130.05,,...,,,,1.604667,,,,,,
2020-01-02,3257.85,0.34,1.88,3.843,1.1166,108.43,1.3128,6945.01,126.8,1.8,...,12.47,3.0,3.86,1.603451,1.72,3.56,5.28,1.56,61.17,2.05
2020-01-03,3234.85,0.28,1.8,3.908,1.1173,107.94,1.3091,7335.54,134.2,1.77,...,14.02,2.94,3.79,1.599492,1.69,3.61,5.25,1.55,63.0,2.06
2020-01-06,3246.28,0.25,1.81,3.944,1.1187,108.36,1.3163,7773.48,144.28,1.75,...,13.85,3.01,3.83,1.599941,1.69,3.57,5.23,1.54,63.27,2.1
2020-01-07,3237.18,0.29,1.83,4.012,1.1138,108.53,1.3127,8169.95,143.13,1.74,...,13.79,3.02,3.86,1.597702,1.67,3.54,5.21,1.53,62.7,2.17


In [5]:
NFLX_STOCK = pd.concat([NFLX_STOCK, df_fred], axis=1)
NFLX_STOCK.head()

Unnamed: 0_level_0,Adj Close,Close,High,Low,Open,Volume,SP500,T10Y3M,DGS10,OBMMIJUMBO30YF,...,VIXCLS,DAAA,DBAA,AMERIBOR,T5YIE,BAMLH0A0HYM2,BAMLH0A0HYM2EY,DGS1,DCOILWTICO,DHHNGSP
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2020-01-01,,,,,,,,,,,...,,,,1.604667,,,,,,
2020-01-02,329.809998,329.809998,329.980011,324.779999,326.100006,4485800.0,3257.85,0.34,1.88,3.843,...,12.47,3.0,3.86,1.603451,1.72,3.56,5.28,1.56,61.17,2.05
2020-01-03,325.899994,325.899994,329.859985,325.529999,326.779999,3806900.0,3234.85,0.28,1.8,3.908,...,14.02,2.94,3.79,1.599492,1.69,3.61,5.25,1.55,63.0,2.06
2020-01-06,335.829987,335.829987,336.359985,321.200012,323.119995,5663100.0,3246.28,0.25,1.81,3.944,...,13.85,3.01,3.83,1.599941,1.69,3.57,5.23,1.54,63.27,2.1
2020-01-07,330.75,330.75,336.700012,330.299988,336.470001,4703200.0,3237.18,0.29,1.83,4.012,...,13.79,3.02,3.86,1.597702,1.67,3.54,5.21,1.53,62.7,2.17


## Fama-French

In [6]:
data_ff5 = pd.read_csv('resources/F-F_Research_Data_5_Factors_2x3_daily.csv')
data_ff5.rename(columns={"Unnamed: 0": "Date"}, inplace=True)
data_ff5["Date"] = pd.to_datetime(data_ff5["Date"], format="%Y%m%d")
data_ff5 = data_ff5[(data_ff5["Date"] >= start_date) & (data_ff5["Date"] <= end_date)]
data_ff5 = data_ff5.set_index('Date')
data_ff5

Unnamed: 0_level_0,Mkt-RF,SMB,HML,RMW,CMA,RF
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2020-01-02,0.86,-0.97,-0.33,0.24,-0.22,0.006
2020-01-03,-0.67,0.30,0.00,-0.13,-0.11,0.006
2020-01-06,0.36,-0.21,-0.55,-0.17,-0.26,0.006
2020-01-07,-0.19,-0.03,-0.25,-0.13,-0.24,0.006
2020-01-08,0.47,-0.16,-0.66,-0.16,-0.18,0.006
...,...,...,...,...,...,...
2024-08-26,-0.34,0.33,0.17,0.13,-0.06,0.022
2024-08-27,0.05,-0.90,0.02,0.27,0.23,0.022
2024-08-28,-0.67,-0.22,1.14,0.55,-0.16,0.022
2024-08-29,0.08,0.67,0.28,-0.15,-1.22,0.022


In [7]:
NFLX_STOCK = pd.concat([NFLX_STOCK, data_ff5], axis=1)
NFLX_STOCK.head()

Unnamed: 0_level_0,Adj Close,Close,High,Low,Open,Volume,SP500,T10Y3M,DGS10,OBMMIJUMBO30YF,...,BAMLH0A0HYM2EY,DGS1,DCOILWTICO,DHHNGSP,Mkt-RF,SMB,HML,RMW,CMA,RF
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2020-01-01,,,,,,,,,,,...,,,,,,,,,,
2020-01-02,329.809998,329.809998,329.980011,324.779999,326.100006,4485800.0,3257.85,0.34,1.88,3.843,...,5.28,1.56,61.17,2.05,0.86,-0.97,-0.33,0.24,-0.22,0.006
2020-01-03,325.899994,325.899994,329.859985,325.529999,326.779999,3806900.0,3234.85,0.28,1.8,3.908,...,5.25,1.55,63.0,2.06,-0.67,0.3,0.0,-0.13,-0.11,0.006
2020-01-06,335.829987,335.829987,336.359985,321.200012,323.119995,5663100.0,3246.28,0.25,1.81,3.944,...,5.23,1.54,63.27,2.1,0.36,-0.21,-0.55,-0.17,-0.26,0.006
2020-01-07,330.75,330.75,336.700012,330.299988,336.470001,4703200.0,3237.18,0.29,1.83,4.012,...,5.21,1.53,62.7,2.17,-0.19,-0.03,-0.25,-0.13,-0.24,0.006


## ADS

In [8]:
ads = pd.read_excel("resources/ADS_Index_Most_Current_Vintage.xlsx")
ads.rename(columns={"Unnamed: 0": "Date"}, inplace=True)
ads["Date"] = pd.to_datetime(ads["Date"], format="%Y:%m:%d")
ads = ads[(ads["Date"] >= start_date) & (ads["Date"] <= end_date)]
ads = ads.set_index("Date")
ads

Unnamed: 0_level_0,ADS_Index
Date,Unnamed: 1_level_1
2020-01-01,-0.301644
2020-01-02,-0.289801
2020-01-03,-0.278009
2020-01-04,-0.266263
2020-01-05,-0.254558
...,...
2024-08-27,-0.114812
2024-08-28,-0.119569
2024-08-29,-0.125109
2024-08-30,-0.131435


In [9]:
NFLX_STOCK = pd.concat([NFLX_STOCK, ads], axis=1)
NFLX_STOCK.head()

Unnamed: 0_level_0,Adj Close,Close,High,Low,Open,Volume,SP500,T10Y3M,DGS10,OBMMIJUMBO30YF,...,DGS1,DCOILWTICO,DHHNGSP,Mkt-RF,SMB,HML,RMW,CMA,RF,ADS_Index
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2020-01-01,,,,,,,,,,,...,,,,,,,,,,-0.301644
2020-01-02,329.809998,329.809998,329.980011,324.779999,326.100006,4485800.0,3257.85,0.34,1.88,3.843,...,1.56,61.17,2.05,0.86,-0.97,-0.33,0.24,-0.22,0.006,-0.289801
2020-01-03,325.899994,325.899994,329.859985,325.529999,326.779999,3806900.0,3234.85,0.28,1.8,3.908,...,1.55,63.0,2.06,-0.67,0.3,0.0,-0.13,-0.11,0.006,-0.278009
2020-01-04,,,,,,,,,,,...,,,,,,,,,,-0.266263
2020-01-05,,,,,,,,,,,...,,,,,,,,,,-0.254558


## Returns (Y target)

In [10]:
NFLX_STOCK["Returns"] = NFLX_STOCK["Adj Close"] - NFLX_STOCK["Adj Close"].shift(1) # difference
NFLX_STOCK["Daily_Return"] = np.log(NFLX_STOCK["Adj Close"]) - np.log(NFLX_STOCK["Adj Close"].shift(1)) # percentage
NFLX_STOCK.head()

Unnamed: 0_level_0,Adj Close,Close,High,Low,Open,Volume,SP500,T10Y3M,DGS10,OBMMIJUMBO30YF,...,DHHNGSP,Mkt-RF,SMB,HML,RMW,CMA,RF,ADS_Index,Returns,Daily_Return
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2020-01-01,,,,,,,,,,,...,,,,,,,,-0.301644,,
2020-01-02,329.809998,329.809998,329.980011,324.779999,326.100006,4485800.0,3257.85,0.34,1.88,3.843,...,2.05,0.86,-0.97,-0.33,0.24,-0.22,0.006,-0.289801,,
2020-01-03,325.899994,325.899994,329.859985,325.529999,326.779999,3806900.0,3234.85,0.28,1.8,3.908,...,2.06,-0.67,0.3,0.0,-0.13,-0.11,0.006,-0.278009,-3.910004,-0.011926
2020-01-04,,,,,,,,,,,...,,,,,,,,-0.266263,,
2020-01-05,,,,,,,,,,,...,,,,,,,,-0.254558,,


## Generate Dataset

In [11]:
filename = f"NFLX_feature_mart.csv"
NFLX_STOCK.dropna(inplace=True)
NFLX_STOCK.head()

Unnamed: 0_level_0,Adj Close,Close,High,Low,Open,Volume,SP500,T10Y3M,DGS10,OBMMIJUMBO30YF,...,DHHNGSP,Mkt-RF,SMB,HML,RMW,CMA,RF,ADS_Index,Returns,Daily_Return
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2020-01-03,325.899994,325.899994,329.859985,325.529999,326.779999,3806900.0,3234.85,0.28,1.8,3.908,...,2.06,-0.67,0.3,0.0,-0.13,-0.11,0.006,-0.278009,-3.910004,-0.011926
2020-01-07,330.75,330.75,336.700012,330.299988,336.470001,4703200.0,3237.18,0.29,1.83,4.012,...,2.17,-0.19,-0.03,-0.25,-0.13,-0.24,0.006,-0.232068,-5.079987,-0.015242
2020-01-08,339.26001,339.26001,342.700012,331.049988,331.48999,7104500.0,3253.05,0.33,1.87,3.889,...,2.09,0.47,-0.16,-0.66,-0.16,-0.18,0.006,-0.221273,8.51001,0.025404
2020-01-09,335.660004,335.660004,343.420013,334.609985,342.0,4709300.0,3274.7,0.31,1.85,4.054,...,2.05,0.65,-0.71,-0.48,-0.14,0.04,0.006,-0.210773,-3.600006,-0.010668
2020-01-10,329.049988,329.049988,338.5,327.269989,337.130005,4718300.0,3265.35,0.29,1.83,3.927,...,2.05,-0.34,-0.27,-0.33,0.04,-0.08,0.006,-0.200564,-6.610016,-0.019889


In [12]:
NFLX_STOCK.to_csv(filename, index=True)