In [3]:
"""
import sys
import os

sys.path.append(os.path.abspath(".."))
"""

In [4]:
import pandas as pd

from scripts.features.compute_returns import compute_log_returns
from scripts.features.compute_volatility import rolling_volatility
from scripts.features.technical_indicators import momentum

In [5]:
prices = pd.read_csv(
    "../data/raw/ko_pep_prices.csv",
    index_col=0,
    parse_dates=True
)

returns = compute_log_returns(prices)
volatility = rolling_volatility(returns)
mom = momentum(prices)

In [7]:
returns.head()

Unnamed: 0_level_0,KO,PEP
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2015-01-05,0.0,-0.007546
2015-01-06,0.007565,-0.007604
2015-01-07,0.012405,0.028822
2015-01-08,0.012023,0.018011
2015-01-09,-0.011093,-0.006794


In [8]:
volatility.tail()

Unnamed: 0_level_0,KO,PEP
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2024-12-24,0.009263,0.00967
2024-12-26,0.009256,0.00965
2024-12-27,0.009256,0.009633
2024-12-30,0.009302,0.009541
2024-12-31,0.009293,0.00962


In [9]:
mom.describe()

Unnamed: 0,KO,PEP
count,2496.0,2496.0
mean,0.006725,0.007154
std,0.045272,0.043738
min,-0.357461,-0.282607
25%,-0.015864,-0.016788
50%,0.010197,0.009321
75%,0.034029,0.034789
max,0.214883,0.294621


In [11]:
feature_matrix = pd.concat(
    [
        returns.add_suffix("_ret"),
        volatility.add_suffix("_vol"),
        mom.add_suffix("_mom")
    ],
    axis=1
).dropna()

In [12]:
feature_matrix.head()

Unnamed: 0_level_0,KO_ret,PEP_ret,KO_vol,PEP_vol,KO_mom,PEP_mom
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2015-02-02,0.01015,0.016393,0.010346,0.012552,-0.013052,0.009424
2015-02-03,0.000961,0.009085,0.010351,0.012544,-0.012103,0.026352
2015-02-04,0.0012,0.004149,0.010183,0.012379,-0.01837,0.038486
2015-02-05,0.002636,0.000103,0.009734,0.010633,-0.027914,0.009087
2015-02-06,-0.008169,0.001035,0.009305,0.009803,-0.047345,-0.007899


In [13]:
feature_matrix.describe()

Unnamed: 0,KO_ret,PEP_ret,KO_vol,PEP_vol,KO_mom,PEP_mom
count,2496.0,2496.0,2496.0,2496.0,2496.0,2496.0
mean,0.000293,0.00031,0.009793,0.010071,0.006725,0.007154
std,0.011262,0.011759,0.005664,0.006261,0.045272,0.043738
min,-0.101728,-0.121358,0.003598,0.003318,-0.357461,-0.282607
25%,-0.004585,-0.00498,0.006753,0.007296,-0.015864,-0.016788
50%,0.000634,0.000476,0.008548,0.009015,0.010197,0.009321
75%,0.005725,0.006039,0.010791,0.011404,0.034029,0.034789
max,0.062783,0.121656,0.054467,0.07009,0.214883,0.294621


In [14]:
feature_matrix.to_csv("../data/processed/feature_matrix.csv")