In [205]:
import pandas as pd

# Example Input DataFrame of strategies
df = pd.DataFrame(columns=["strategy_a", "strategy_b", "strategy_c", "strategy_d", "strategy_e"])

# Example dictionary mapping strategy names to products
product_dict = {
    "strategy_a": "AAPL",
    "strategy_b": "AAPL",
    "strategy_c": "GOOGL",
    "strategy_d": "GOOGL",
    "strategy_e": "MSFT",
}


def generate_group_list(df, product_dict):
    groups = []
    product_counts = {}

    for strategy in df.columns:
        product = product_dict[strategy]
        if product not in product_counts:
            product_counts[product] = 0
        
        # Increment the count and create incremented names like "aapl1"
        product_counts[product] += 1
        strategy_name = f"{product.lower()}{product_counts[product]}"
        groups.append(strategy_name)
    
    return groups


groups = generate_group_list(df, product_dict)
print("Groups:", groups)


def generate_conditional_constraints(groups):
    constraints = []
    # Track products and their corresponding strategies
    product_strategies = {}
    
    # Build a dictionary of strategies by product using the generated groups
    for strategy in groups:
        # Extract the base product name (e.g., "aapl" from "aapl1")
        product = strategy.rstrip('0123456789')  # Remove numeric suffix to get product name
        if product not in product_strategies:
            product_strategies[product] = []
        product_strategies[product].append(strategy)

    # Generate conditional constraints for each product group
    for strategies in product_strategies.values():
        if len(strategies) > 1:
            # If there are multiple strategies, generate the "if active" constraint
            constraint_str = f"{strategies[0]} + {' + '.join(strategies[1:])} > 0 => "
            equal_parts = " = ".join(strategies)
            constraints.append(constraint_str + equal_parts)

    return constraints

# Generate constraints
constraints = generate_conditional_constraints(groups)
print("Constraints:", constraints)


# NOW -- replace this with HASHES, and then fit!

Groups: ['aapl1', 'aapl2', 'googl1', 'googl2', 'msft1']
Constraints: ['aapl1 + aapl2 > 0 => aapl1 = aapl2', 'googl1 + googl2 > 0 => googl1 = googl2']


In [206]:
prod  = pd.read_csv('prod.csv', parse_dates=True, index_col=0)
re = pd.read_csv('incubating_raw.csv', parse_dates=True, index_col=0)
rdf = re.dropna(thresh=int(re.shape[1] * .5)).fillna(0.0)
rdf.head()

Unnamed: 0_level_0,cl_gapcont2020,ec_gcz,ec_ngc,ec_ngf,mbt_ma_dvg_so,mbt_mema3_1,mbt_ngc_1,mbt_ngf_1,mbt_ngf_2,ng_gapcont2020,ng_ngf,ng_ngf_2,zs_ngc_1,zs_ngf_1
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2024-04-01,0.0,0.0,0.0,18.75,127.5,0.0,0.0,-50.0,107.5,0.0,0.0,0.0,0.0,0.0
2024-04-02,20.0,0.0,0.0,0.0,-80.0,0.0,0.0,0.0,120.0,0.0,0.0,0.0,0.0,-187.5
2024-04-03,0.0,0.0,-12.5,0.0,-29.0,-39.5,-0.5,3.5,0.0,230.0,340.0,0.0,0.0,287.5
2024-04-04,-800.0,0.0,0.0,400.0,-1.0,24.5,0.0,-50.0,-49.5,260.0,0.0,0.0,-387.5,-187.5
2024-04-05,0.0,0.0,0.0,400.0,-139.5,2.5,0.0,4.5,116.0,-330.0,0.0,0.0,0.0,287.5


In [207]:
def try_product_dict(df, products=['cl', 'ec', 'mbt','ng','zs']):
    product_dict = {}
    not_found = []
    for strat_name in df.columns:
        found = False
        for product in products:
            if f'{product.lower()}_' in strat_name.lower():
                product_dict[strat_name] = product.lower()
                found = True
                break # break inner loop, done w products
        
        if not found:  # If no product was found, do something here
            print('Not found...', strat_name, products)
            not_found.append(strat_name)  # Example: Append to not_found list for tracking
    return product_dict, not_found

product_dict, unmatched = try_product_dict(rdf)
if unmatched: print(f'FUCK! -- need to match by hand: {unmatched}')

In [208]:

def generate_group_list(df, product_dict):
    groups = []
    product_counts = {}

    for strategy in df.columns:
        product = product_dict[strategy]
        if product not in product_counts:
            product_counts[product] = 0
        
        # Increment the count and create incremented names like cl1, cl2 ... cl<n>
        product_counts[product] += 1
        strategy_name = f"{product.lower()}{product_counts[product]}"
        groups.append(strategy_name)
    
    return groups

groups = generate_group_list(rdf, product_dict)
print("Groups:", groups)

Groups: ['cl1', 'ec1', 'ec2', 'ec3', 'mbt1', 'mbt2', 'mbt3', 'mbt4', 'mbt5', 'ng1', 'ng2', 'ng3', 'zs1', 'zs2']


In [209]:
generic_product_groupings = [i.rstrip('0123456789') for i in groups]
generic_product_groupings

['cl',
 'ec',
 'ec',
 'ec',
 'mbt',
 'mbt',
 'mbt',
 'mbt',
 'mbt',
 'ng',
 'ng',
 'ng',
 'zs',
 'zs']

In [210]:
# def generate_conditional_constraints(groups):
#     constraints = []
#     # Track products and their corresponding strategies
#     product_strategies = {}
#     
#     # Build a dictionary of strategies by product using the generated groups
#     for strategy in groups:
#         # Extract the base product name (e.g., "cl" from "cl1")
#         product = strategy.rstrip('0123456789')  
#         if product not in product_strategies:
#             product_strategies[product] = []
#         product_strategies[product].append(strategy)
# 
#     # Generate conditional constraints for each product group
#     for strategies in product_strategies.values():
#         if len(strategies) > 1:
#             # If there are multiple strategies, generate the "if active" constraint
#             constraint_str = f"{strategies[0]} + {' + '.join(strategies[1:])} > 0 => "
#             equal_parts = " = ".join(strategies)
#             constraints.append(constraint_str + equal_parts)
# 
#     return constraints

def generate_conditional_constraints(groups):
    constraints = []
    # Track products and their corresponding strategies
    product_strategies = {}
    
    # Build a dictionary of strategies by product using the generated groups
    for strategy in groups:
        # Extract the base product name (e.g., "cl" from "cl1")
        product = strategy.rstrip('0123456789')  
        if product not in product_strategies:
            product_strategies[product] = []
        product_strategies[product].append(strategy)

    # Generate conditional constraints for each product group
    for strategies in product_strategies.values():
        if len(strategies) > 1:
            # Ensure sum of weights is non-negative (not strictly needed if always non-negative)
            constraints.append(f"{' + '.join(strategies)} >= 0")

            # Generate pairwise equal constraints to ensure equal weights or zero
            for i in range(len(strategies) - 1):
                constraints.append(f"{strategies[i]} - {strategies[i + 1]} == 0")

    return constraints


def generate_constraints_for_equal_weights(groups):
    constraints = []
    # Track products and their corresponding strategies
    product_strategies = {}
    
    # Build a dictionary of strategies by product using the generated groups
    for strategy in groups:
        # Extract the base product name (e.g., "cl" from "cl1")
        product = strategy.rstrip('0123456789')  
        if product not in product_strategies:
            product_strategies[product] = []
        product_strategies[product].append(strategy)

    # Generate constraints for strict equal weights for each product group
    for strategies in product_strategies.values():
        if len(strategies) > 1:
            # Use the first strategy in the list as the reference
            reference = strategies[0]
            # Generate constraints to make all other strategies equal to the reference
            for i in range(1, len(strategies)):
                constraints.append(f"{reference} - {strategies[i]} == 0")
    
    return constraints

# Example usage:
# groups = ['cl1', 'cl2', 'ec1', 'ec2', 'ec3', 'mbt1', 'ng1', 'zs1', 'zs2']
# constraints = generate_conditional_constraints(groups)
# print("Generated Constraints:", constraints)

equal_wt_or_off = generate_conditional_constraints(groups)


equal_wt = generate_constraints_for_equal_weights(groups)

In [211]:
groups

['cl1',
 'ec1',
 'ec2',
 'ec3',
 'mbt1',
 'mbt2',
 'mbt3',
 'mbt4',
 'mbt5',
 'ng1',
 'ng2',
 'ng3',
 'zs1',
 'zs2']

In [212]:


# $ per Unit Fit

from skfolio import RiskMeasure, Population
from sklearn.model_selection import train_test_split
from skfolio.optimization import MeanRisk, InverseVolatility, EqualWeighted, HierarchicalRiskParity
from utilities import convert_summary_percentage

X_train, X_test = train_test_split(rdf, test_size=.33, shuffle=False)

ewp = EqualWeighted(portfolio_params=dict(name="Equal Weight"))
ewp.fit(X_train)
ewp_pf = ewp.predict(X_test)

hrp = HierarchicalRiskParity()
hrp.fit(X_train)
hrp_pf = hrp.predict(X_test)

ivp = InverseVolatility(portfolio_params=dict(name="Inverse Vola"))
ivp.fit(X_train)
ivp_pf = ivp.predict(X_test)

mvo = MeanRisk(
    portfolio_params=dict(name="L2 MVO"),
    # l2_coef=100,
    min_weights = 0.0,
    max_weights = 10.0,
    
    # can be groups = {"SX5E": ["Equity", "Europe"], "SPX": ["Equity", "US"], "TLT": ["Bond", "US"]}
    # OR:    groups = [["Equity", "Equity", "Bond"], ["Europe", "US", "US"]]
    # we chose the latter.
    groups=[
        groups,
    ],
    
    # linear_constraints=equal_wt_or_off
    # linear_constraints=ewc,
    
    # TODO: NOTE: this is a BUG! cannot solve with fully statics (maybe it doesnt exist)
    linear_constraints=[
        'mbt1 = mbt2',
        'mbt2 = mbt3',
        'mbt3 = mbt4',
        'mbt4 = mbt5',
        'mbt1 = mbt5',
        'mbt2 = mbt4',
        'mbt1 - mbt2 == 0',
        'mbt1 - mbt3 == 0',
        'mbt1 - mbt4 == 0',
        'mbt1 - mbt5 == 0',
    ]
)
mvo.fit(X_train)
mvo_pf = mvo.predict(X_test)

print(f'mvo sortino', mvo_pf.annualized_sortino_ratio)

population = Population([mvo_pf, ivp_pf, ewp_pf, hrp_pf])
unit_mvo = mvo_pf
unit_summary_df = population.summary()
unit_population = population

unit_summary_df.map(convert_summary_percentage)

mvo sortino 17.854169535848357


Unnamed: 0,L2 MVO,Inverse Vola,Equal Weight,HierarchicalRiskParity
Mean,27.8695,26.7775,36.2871,22.1433
Annualized Mean,7023.1043,6747.9382,9144.3553,5580.1035
Variance,1523.7716,1327.3075,3304.7388,1021.258
Annualized Variance,383990.4307,334481.5008,832794.1696,257357.0193
Semi-Variance,614.0142,464.7417,1313.8894,387.0517
Annualized Semi-Variance,154731.568,117114.9208,331100.1203,97537.0204
Standard Deviation,39.0355,36.4322,57.4869,31.9571
Annualized Standard Deviation,619.6696,578.3438,912.5756,507.3037
Semi-Deviation,24.7793,21.5579,36.2476,19.6736
Annualized Semi-Deviation,393.3593,342.2206,575.413,312.3092


In [213]:
population.plot_composition()

In [214]:
contracts_total = 1000
population[0].composition * contracts_total 

Unnamed: 0_level_0,L2 MVO
asset,Unnamed: 1_level_1
mbt_ngc_1,182.045426
mbt_ngf_2,182.045426
mbt_ngf_1,182.045426
mbt_mema3_1,92.693358
mbt_ma_dvg_so,92.693358
ec_gcz,59.743397
zs_ngf_1,59.503003
ec_ngc,56.701092
zs_ngc_1,43.552836
ec_ngf,28.812829


In [215]:
population[1].composition # Inverse Vola almost fits to product basis, on its own (naturally)

Unnamed: 0_level_0,Inverse Vola
asset,Unnamed: 1_level_1
mbt_ngc_1,0.184944
mbt_ngf_2,0.115366
mbt_ngf_1,0.100033
mbt_mema3_1,0.094039
mbt_ma_dvg_so,0.093502
ec_gcz,0.082722
ec_ngc,0.082312
ec_ngf,0.054289
ng_ngf_2,0.048749
zs_ngf_1,0.036987


In [144]:
def generate_constraints_for_equal_weights(groups):
    constraints = []
    product_strategies = {}
    
    for strategy in groups:
        # Extract the base product name (e.g., "cl" from "cl1")
        product = strategy.rstrip('0123456789')  
        if product not in product_strategies:
            product_strategies[product] = []
        product_strategies[product].append(strategy)

    # Generate constraints for strict equal weights for each product group
    for strategies in product_strategies.values():
        if len(strategies) > 1:
            # Use the first strategy in the list as the reference
            reference = strategies[0]
            # Generate constraints to make all other strategies equal to the reference
            for i in range(1, len(strategies)):
                constraints.append(f"{reference} - {strategies[i]} == 0")
                constraints.append(f'{reference} = {strategies[i]}')
    
    return constraints

# This looks right, exhaustive -- but it does not work? strangely. nothing does...
ewc = generate_constraints_for_equal_weights(groups)
ewc

['ec1 - ec2 == 0',
 'ec1 = ec2',
 'ec1 - ec3 == 0',
 'ec1 = ec3',
 'mbt1 - mbt2 == 0',
 'mbt1 = mbt2',
 'mbt1 - mbt3 == 0',
 'mbt1 = mbt3',
 'mbt1 - mbt4 == 0',
 'mbt1 = mbt4',
 'mbt1 - mbt5 == 0',
 'mbt1 = mbt5',
 'ng1 - ng2 == 0',
 'ng1 = ng2',
 'ng1 - ng3 == 0',
 'ng1 = ng3',
 'zs1 - zs2 == 0',
 'zs1 = zs2']

# Regime Sketch...

In [152]:
import numpy as np
import pandas as pd
from hmmlearn.hmm import GaussianHMM
from sklearn.preprocessing import StandardScaler

# Load your DataFrame
# Assuming 'rdf' is your DataFrame of signals by date

# Step 1: Prepare the Data
# Replace NaN with 0 (or another suitable value based on your domain knowledge)
rdf.fillna(0, inplace=True)

# Convert DataFrame to numpy array for HMM input
X = rdf.values

# Step 2: Normalize or Standardize the Data
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Step 3: Fit the HMM
# Initialize HMM with 2 states (for "on" and "off")
model = GaussianHMM(n_components=2, covariance_type="full", n_iter=100)

# Fit the model to the data
model.fit(X_scaled)

# Step 4: Predict the hidden states
hidden_states = model.predict(X_scaled)

# Add the predicted states to the original DataFrame
rdf['Hidden_State'] = hidden_states

# Display the DataFrame with hidden states
print(rdf.head())


Model is not converging.  Current: -959.3618951936475 is not greater than -550.726743316741. Delta is -408.63515187690643


            cl_gapcont2020  ec_gcz  ec_ngc  ec_ngf  mbt_ma_dvg_so  \
date                                                                
2024-04-01             0.0     0.0     0.0   18.75          127.5   
2024-04-02            20.0     0.0     0.0    0.00          -80.0   
2024-04-03             0.0     0.0   -12.5    0.00          -29.0   
2024-04-04          -800.0     0.0     0.0  400.00           -1.0   
2024-04-05             0.0     0.0     0.0  400.00         -139.5   

            mbt_mema3_1  mbt_ngc_1  mbt_ngf_1  mbt_ngf_2  ng_gapcont2020  \
date                                                                       
2024-04-01          0.0        0.0      -50.0      107.5             0.0   
2024-04-02          0.0        0.0        0.0      120.0             0.0   
2024-04-03        -39.5       -0.5        3.5        0.0           230.0   
2024-04-04         24.5        0.0      -50.0      -49.5           260.0   
2024-04-05          2.5        0.0        4.5      116.0    

In [158]:
regime_0 = rdf[rdf.Hidden_State == 0]
regime_1 = rdf[rdf.Hidden_State == 1]

r0_sr = (regime_0.mean() / regime_0.std()) * 252 ** .5
r1_sr = (regime_1.mean() / regime_1.std()) * 252 ** .5
r1_sr / r0_sr

cl_gapcont2020    13.963264
ec_gcz             1.095047
ec_ngc             1.787682
ec_ngf             1.179883
mbt_ma_dvg_so      1.492269
mbt_mema3_1        1.091021
mbt_ngc_1               NaN
mbt_ngf_1         -1.244460
mbt_ngf_2         -0.674409
ng_gapcont2020     4.139737
ng_ngf            -2.804986
ng_ngf_2                NaN
zs_ngc_1           1.251141
zs_ngf_1           0.603327
Hidden_State            NaN
dtype: float64

In [163]:
r0_sr

cl_gapcont2020    0.642540
ec_gcz            1.456764
ec_ngc            2.831405
ec_ngf            4.836151
mbt_ma_dvg_so     4.629288
mbt_mema3_1       2.594877
mbt_ngc_1              NaN
mbt_ngf_1         5.499579
mbt_ngf_2         6.163750
ng_gapcont2020    1.746593
ng_ngf           -2.121445
ng_ngf_2               NaN
zs_ngc_1          2.656953
zs_ngf_1          4.155349
Hidden_State           NaN
dtype: float64

In [164]:
r1_sr

cl_gapcont2020    8.971953
ec_gcz            1.595226
ec_ngc            5.061653
ec_ngf            5.706091
mbt_ma_dvg_so     6.908142
mbt_mema3_1       2.831066
mbt_ngc_1         7.229793
mbt_ngf_1        -6.844006
mbt_ngf_2        -4.156888
ng_gapcont2020    7.230438
ng_ngf            5.950623
ng_ngf_2          4.804168
zs_ngc_1          3.324222
zs_ngf_1          2.507032
Hidden_State           inf
dtype: float64

In [165]:
base_sr = (rdf.mean() / rdf.std()) * 252 ** .5
base_sr

cl_gapcont2020     3.458964
ec_gcz             1.506740
ec_ngc             3.323324
ec_ngf             5.036646
mbt_ma_dvg_so      5.345650
mbt_mema3_1        2.685453
mbt_ngc_1          3.869059
mbt_ngf_1          2.423847
mbt_ngf_2          3.647542
ng_gapcont2020     3.901271
ng_ngf             2.931246
ng_ngf_2           2.666760
zs_ngc_1           2.899734
zs_ngf_1           3.649013
Hidden_State      10.885724
dtype: float64

In [166]:
import numpy as np
import pandas as pd
from hmmlearn.hmm import GaussianHMM
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

# Assuming 'rdf' is your DataFrame of signals by date
# Ensure the DataFrame has numeric data only and no missing values
rdf.fillna(0, inplace=True)  # Fill missing values if any, modify as needed

# Step 1: Prepare the Data
X = rdf.values  # Convert DataFrame to numpy array for HMM input

# Step 2: Normalize or Standardize the Data
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Step 3: Split the Data into Training and Test Sets
# Use time-based splitting (e.g., 70% for training, 30% for testing)
train_size = int(len(X_scaled) * 0.7)
X_train = X_scaled[:train_size]
X_test = X_scaled[train_size:]

# Step 4: Fit the HMM on the Training Set
# Initialize HMM with 2 states (for "on" and "off")
model = GaussianHMM(n_components=2, covariance_type="full", n_iter=100, random_state=42)
model.fit(X_train)

# Step 5: Predict the hidden states on the Test Set
hidden_states_test = model.predict(X_test)

# Step 6: Evaluate the Test Set Predictions
# Add the predicted states to the original DataFrame for the test period
rdf_test = rdf.iloc[train_size:].copy()  # Create a copy for test set period
rdf_test['Hidden_State'] = hidden_states_test

# Step 7: Calculate the average of signals by regime for the Test Set
regime_averages_test = rdf_test.groupby('Hidden_State').mean()

print("Out-of-Sample Average Signals by Regime (Test Set):")
print(regime_averages_test)

Model is not converging.  Current: 88.98463529925762 is not greater than 111.3062470210108. Delta is -22.321611721753186


Out-of-Sample Average Signals by Regime (Test Set):
              cl_gapcont2020     ec_gcz     ec_ngc     ec_ngf  mbt_ma_dvg_so  \
Hidden_State                                                                   
0                 346.666667  26.041667  61.458333   1.562500      10.583333   
1                  15.217391 -14.945652   1.086957  50.271739      36.782609   

              mbt_mema3_1  mbt_ngc_1  mbt_ngf_1  mbt_ngf_2  ng_gapcont2020  \
Hidden_State                                                                 
0               16.833333     31.875 -33.916667  -8.125000      107.500000   
1                4.304348      0.000  79.021739  65.326087       10.434783   

                  ng_ngf   ng_ngf_2    zs_ngc_1   zs_ngf_1  
Hidden_State                                                
0             128.333333  83.333333  180.208333  57.291667  
1             -13.043478   0.000000   40.217391  72.282609  


In [199]:
import numpy as np
import pandas as pd
from hmmlearn.hmm import GaussianHMM
from sklearn.preprocessing import StandardScaler

# Assuming 'rdf' is your DataFrame of signals by date
rdf.fillna(0, inplace=True)  # Fill missing values if any, modify as needed

# Step 1: Prepare the Data
X = rdf.values  # Convert DataFrame to numpy array for HMM input

# Step 2: Normalize or Standardize the Data
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Step 3: Split the Data into Training and Test Sets
train_size = int(len(X_scaled) * 0.7)
X_train = X_scaled[:train_size]
X_test = X_scaled[train_size:]

# Step 4: Fit the HMM on the Training Set
model = GaussianHMM(n_components=2, covariance_type="full", n_iter=100, tol=1e10, random_state=42)
model.fit(X_train)
hidden_states_train = model.predict(X_train)


rdf_train = rdf.iloc[:train_size].copy()
rdf_train['Hidden_State'] = hidden_states_train

# Calculate the average value of each signal by regime
regime_averages_train = rdf_train.groupby('Hidden_State').mean()

# Vectorized approach to determine the "best" regime for each signal
best_regimes = regime_averages_train.idxmax(axis=0).to_dict()

print("Best Regime for Each Signal based on Training Set:")
print(best_regimes)

# Step 6: Predict the hidden states for the Test Set
hidden_states_test = model.predict(X_test)

# Add the predicted states to the original DataFrame for the test period
rdf_test = rdf.iloc[train_size:].copy()  # Create a copy for test set period
rdf_test['Hidden_State'] = hidden_states_test

# Step 7: Evaluate the performance of each signal in its "best" regime on the Test Set
performance_by_regime = {}

for column in rdf.columns:
    if column == 'Hidden_State': continue
    # Mask the DataFrame to select only rows in the "best" regime for the current signal
    best_regime_mask = rdf_test['Hidden_State'] == best_regimes[column]
    # Calculate the mean performance of the signal in its "best" regime
    sliced = rdf_test.loc[best_regime_mask, column]
    
    mean_performance = (sliced.mean() / sliced.std())
    
    
    baseline_mean = rdf_test[column].mean() / rdf_test[column].std()
    performance_by_regime[column] = mean_performance / baseline_mean

print("\nTest Set Mean Performance of Each Signal in Its Best Regime / Baseline (ALL) performance")
print(performance_by_regime)

Best Regime for Each Signal based on Training Set:
{'cl_gapcont2020': 0, 'ec_gcz': 1, 'ec_ngc': 0, 'ec_ngf': 1, 'mbt_ma_dvg_so': 0, 'mbt_mema3_1': 0, 'mbt_ngc_1': 0, 'mbt_ngf_1': 1, 'mbt_ngf_2': 1, 'ng_gapcont2020': 0, 'ng_ngf': 0, 'ng_ngf_2': 0, 'zs_ngc_1': 0, 'zs_ngf_1': 1}

Test Set Mean Performance of Each Signal in Its Best Regime / Baseline (ALL) performance
{'cl_gapcont2020': 2.092948986532784, 'ec_gcz': 14.886722663425061, 'ec_ngc': 1.9055616055651519, 'ec_ngf': 1.2858064278094812, 'mbt_ma_dvg_so': 0.3123199529535943, 'mbt_mema3_1': 0.40933622681830395, 'mbt_ngc_1': 1.6910323522087174, 'mbt_ngf_1': 1.8988733492547787, 'mbt_ngf_2': 1.416707407993053, 'ng_gapcont2020': 2.3125457368981994, 'ng_ngf': 2.189484606080892, 'ng_ngf_2': 1.640825308284734, 'zs_ngc_1': 1.910889129656046, 'zs_ngf_1': 0.9115568514791065}


In [201]:
# pd.DataFrame(performance_by_regime, index=None)
improvement = pd.DataFrame().from_dict(performance_by_regime, orient='index', columns=['Improvement'])
improvement

Unnamed: 0,Improvement
cl_gapcont2020,2.092949
ec_gcz,14.886723
ec_ngc,1.905562
ec_ngf,1.285806
mbt_ma_dvg_so,0.31232
mbt_mema3_1,0.409336
mbt_ngc_1,1.691032
mbt_ngf_1,1.898873
mbt_ngf_2,1.416707
ng_gapcont2020,2.312546


In [202]:
improvement.mean()

Improvement    2.490329
dtype: float64