In [12]:
import pandas as pd
import glob
import os
import re

# Folder where your files are
folder = "../data/processed"

# Pattern to match 'hmm_<asset>' at the start of the filename
pattern = re.compile(r'hmm_([A-Za-z0-9]+)')

# Find all CSV files starting with "hmm_"
files = glob.glob(os.path.join(folder, "hmm_*.csv"))

# Create dictionary
hmm_data = {}
for file in files:
    filename = os.path.basename(file)
    match = pattern.match(filename)
    if match:
        asset = match.group(1)
        hmm_data[asset] = pd.read_csv(
            file,
            index_col=0,       # use first column as index
            parse_dates=True   # parse index as datetime if possible
        )

# Check loaded assets
print("Loaded assets:", list(hmm_data.keys()))


Loaded assets: ['PEP', 'KO']


In [13]:
hmm_data["KO"]

Unnamed: 0_level_0,KO_ret,KO_vol,regime,prob_regime_0,prob_regime_1,p_high_vol,risk_scale
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2015-02-02,0.010150,0.010346,1,1.135441e-21,1.000000,1.135441e-21,1.000000
2015-02-03,0.000961,0.010351,1,4.511391e-05,0.999955,4.511391e-05,0.999955
2015-02-04,0.001200,0.010183,1,4.700527e-05,0.999953,4.700527e-05,0.999953
2015-02-05,0.002636,0.009734,1,5.649322e-05,0.999944,5.649322e-05,0.999944
2015-02-06,-0.008169,0.009305,1,2.571059e-04,0.999743,2.571059e-04,0.999743
...,...,...,...,...,...,...,...
2024-12-24,0.007347,0.009263,1,3.215712e-05,0.999968,3.215712e-05,0.999968
2024-12-26,-0.004306,0.009256,1,2.935911e-05,0.999971,2.935911e-05,0.999971
2024-12-27,-0.001920,0.009256,1,3.219457e-05,0.999968,3.219457e-05,0.999968
2024-12-30,-0.006748,0.009302,1,1.148743e-04,0.999885,1.148743e-04,0.999885


In [14]:
portfolio_returns = pd.DataFrame({
    "raw_returns": pd.concat(
        [df.get(f"{asset}_ret") for asset, df in hmm_data.items() if f"{asset}_ret" in df.columns],
        axis=1
    ).mean(axis=1),
    
    "scaled_returns": pd.concat(
        [df["risk_scale"] * df[f"{asset}_ret"] 
         for asset, df in hmm_data.items() 
         if "risk_scale" in df.columns and f"{asset}_ret" in df.columns],
        axis=1
    ).mean(axis=1)
})

print(portfolio_returns.head())


            raw_returns  scaled_returns
Date                                   
2015-02-02     0.013271        0.013271
2015-02-03     0.005023        0.005022
2015-02-04     0.002675        0.002675
2015-02-05     0.001370        0.001369
2015-02-06    -0.003567       -0.003566


In [15]:
portfolio_returns.to_csv("../data/processed/portfolio_returns.csv")