In [1]:
"""
Script to render the asset pricing table
"""

import pandas as pd
import numpy as np
import statsmodels.formula.api as smf
from scipy.stats import ttest_1samp
from regtabletotext import prettify_result
import warnings
warnings.filterwarnings("ignore")

from environ.constants import (
    DEPENDENT_VARIABLES,
    DEPENDENT_VARIABLES_ASSETPRICING,
    PROCESSED_DATA_PATH,
    STABLE_DICT,
    ALL_NAMING_DICT,
    TABLE_PATH,
)
from environ.process.asset_pricing.double_sorting import calculate_period_return

from environ.process.asset_pricing.assetpricing_functions import (
    reg_fama_macbeth, clean_weekly_panel, univariate_sort, univariate_sort_table, double_sort, double_sort_table, get_dominance_portfolios, significance_stars
    )
                                                                  

In [3]:
PROCESSED_DATA_PATH

WindowsPath('C:/Users/chenb/Desktop/defi-econ/processed_data')

In [4]:
df1 = pd.read_pickle("C:\\Users\\chenb\\Desktop\\defi-econ\\processed_data\\defi-currency-data\\panel_main.pickle.zip", compression="zip"
    )
df2 = pd.read_pickle(
        PROCESSED_DATA_PATH / "panel_main.pickle.zip", compression="zip"
    )

In [6]:
df1.describe()

Unnamed: 0,Volume,Date,Inflow_centrality,Outflow_centrality,TVL,volume_in,volume_out,borrow_rate,Supply_share,Borrow_share,...,vol_in_full_len_share,vol_out_full_len_share,vol_inter_full_len_share,volume_ultimate_share,mcap_share,dollar_exchange_rate_log_return_1,dollar_exchange_rate_log_return_vol_1_30,corr_gas,corr_eth,corr_sp
count,270921.0,270921,59975.0,59975.0,270921.0,270921.0,270921.0,270921.0,270921.0,12915.0,...,270921.0,270921.0,270921.0,270921.0,270921.0,264268.0,259201.0,250631.0,250631.0,250631.0
mean,7435637.0,2021-12-21 14:46:42.520292096,0.03620079,0.03613452,17551440.0,3714018.0,3714018.0,2.973474e+228,0.003488,0.073171,...,0.003488,0.003488,0.003488,0.003488,0.003488,-0.000702,0.128053,-0.012643,0.470409,0.366386
min,0.0,2020-07-01 00:00:00,-1.387779e-16,-1.110223e-16,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,-91.14005,0.0,-0.750179,-0.72227,-0.793529
25%,0.0,2021-06-19 00:00:00,0.0005248727,0.0005691961,166168.3,0.0,0.0,0.0,0.0,0.000171,...,0.0,0.0,0.0,0.0,0.0,-0.041798,0.051096,-0.151499,0.256397,0.171054
50%,0.0,2022-01-10 00:00:00,0.00233611,0.002386071,734014.6,0.0,0.0,0.0,0.0,0.002732,...,0.0,0.0,0.0,0.0,4.4e-05,-0.001605,0.075006,-0.016323,0.514685,0.39641
75%,0.0,2022-07-15 00:00:00,0.01052981,0.01053827,2664867.0,0.0,0.0,0.0,0.0,0.029206,...,0.0,0.0,0.0,0.0,0.000289,0.031225,0.111107,0.124685,0.717577,0.5871
max,4744965000.0,2023-01-31 00:00:00,0.9365245,0.8835949,4162345000.0,2235346000.0,2509619000.0,8.055765e+233,1.0,1.0,...,0.861772,0.562207,0.999657,0.48201,0.702048,90.916136,23.905279,0.749242,0.999667,0.953076
std,84879090.0,,0.1201234,0.1201434,142219400.0,42445240.0,42517990.0,inf,0.030092,0.160596,...,0.030753,0.030819,0.043661,0.030702,0.034371,0.488559,0.483993,0.199472,0.309345,0.281059


In [7]:
df2.describe()

Unnamed: 0,Date,Volume,Inflow_centrality,Outflow_centrality,TVL,volume_in,volume_out,borrow_rate,supply_rates,Borrow_share,...,vol_in_full_len_share,vol_out_full_len_share,vol_inter_full_len_share,volume_ultimate_share,mcap_share,dollar_exchange_rate_log_return_1,dollar_exchange_rate_log_return_vol_1_30,corr_gas,corr_eth,corr_sp
count,270921,270921.0,59975.0,59975.0,270921.0,270921.0,270921.0,270921.0,270921.0,12915.0,...,270921.0,270921.0,270921.0,270921.0,270921.0,264268.0,259201.0,250631.0,250631.0,250631.0
mean,2021-12-21 14:46:42.520292096,7435637.0,0.03620079,0.03613452,17551440.0,3714018.0,3714018.0,2.973474e+228,0.000603,0.073171,...,0.003488,0.003488,0.003488,0.003488,0.003488,-0.000702,0.128053,-0.012643,0.470409,0.366386
min,2020-07-01 00:00:00,0.0,-1.387779e-16,-1.110223e-16,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,-91.14005,0.0,-0.750179,-0.72227,-0.793529
25%,2021-06-19 00:00:00,0.0,0.0005248727,0.0005691961,166168.3,0.0,0.0,0.0,0.0,0.000171,...,0.0,0.0,0.0,0.0,0.0,-0.041798,0.051096,-0.151499,0.256397,0.171054
50%,2022-01-10 00:00:00,0.0,0.00233611,0.002386071,734014.6,0.0,0.0,0.0,0.0,0.002732,...,0.0,0.0,0.0,0.0,4.4e-05,-0.001605,0.075006,-0.016323,0.514685,0.39641
75%,2022-07-15 00:00:00,0.0,0.01052981,0.01053827,2664867.0,0.0,0.0,0.0,0.0,0.029206,...,0.0,0.0,0.0,0.0,0.000289,0.031225,0.111107,0.124685,0.717577,0.5871
max,2023-01-31 00:00:00,4744965000.0,0.9365245,0.8835949,4162345000.0,2235346000.0,2509619000.0,8.055765e+233,0.633147,1.0,...,0.861772,0.562207,0.999657,0.48201,0.702048,90.916136,23.905279,0.749242,0.999667,0.953076
std,,84879090.0,0.1201234,0.1201434,142219400.0,42445240.0,42517990.0,inf,0.006166,0.160596,...,0.030753,0.030819,0.043661,0.030702,0.034371,0.488559,0.483993,0.199472,0.309345,0.281059


In [2]:
import os
os.listdir("C:/Users/chenb/Desktop/data/data_network/v2")

['.DS_Store',
 'betweenness',
 'clustering_ind',
 'eigen_centrality_pool',
 'eigen_centrality_swap',
 'eigen_centrality_undirected',
 'eigen_centrality_undirected_multi',
 'inflow_centrality',
 'inout_flow',
 'network_graph',
 'outflow_centrality',
 'primary_tokens',
 'sankey',
 'total_eigen_centrality_undirected',
 'tvl',
 'tvl_old',
 'tvl_share',
 'tvl_share_old',
 'volume',
 'volume_in',
 'volume_in_share',
 'volume_out',
 'volume_out_share',
 'volume_share',
 'volume_total',
 'vol_inter_full_len',
 'vol_in_full_len',
 'vol_out_full_len']

In [29]:
# load factors
ff3 = pd.read_csv(PROCESSED_DATA_PATH/"FF3.csv") 
ltw3 = pd.read_csv(PROCESSED_DATA_PATH/"LTW3.csv")

# load the regression panel dataset
reg_panel = pd.read_pickle(
    PROCESSED_DATA_PATH / "panel_main.pickle.zip", compression="zip"
)

# stable non-stable info dict
stable_nonstable_info = {
    "stablecoin": reg_panel[reg_panel["Token"].isin(STABLE_DICT.keys())],
    "non-stablecoin": reg_panel[~reg_panel["Token"].isin(STABLE_DICT.keys())],
    "all": reg_panel,
}

# How are returns aggregated for each portfolio
Q = [0,0.33,0.66,1] # [0,0.2,0.4,0.6,0.8,1]# [0,0.25,0.5,0.75,1]#[0,0.2,0.4,0.6,0.8,1]# 
ret_agg = 'value_weight' 
DEPENDENT_VARIABLES_ASSETPRICING = DEPENDENT_VARIABLES_ASSETPRICING[:1] #['volume_ultimate_share']  #,'volume_in_share' , 'volume_out_share']
# ,'eigen_centrality_undirected','total_eigen_centrality_undirected','Volume_share']

### Univariate sorting

In [32]:
for dom_variable in DEPENDENT_VARIABLES_ASSETPRICING[:]:
    for is_boom in [-1]:
        quantiles = Q  
        separate_zero_value=True
        df_panel = clean_weekly_panel(reg_panel, is_stablecoin = 0, is_boom = is_boom)
        df_panel = df_panel[df_panel[dom_variable]>0]
        # Substract risk free rate
        df_panel = pd.merge(df_panel,ff3, on='WeekYear')
        df_panel['ret_lead_1'] = df_panel['ret_lead_1']-df_panel['RF']
        
        df_panel = univariate_sort(df_panel, dom_variable, quantiles=quantiles, separate_zero_value=separate_zero_value)
        summary_table = univariate_sort_table(df_panel, ret_agg = ret_agg)
    
        if is_boom == 1:
            boom_str = " boom"
        elif is_boom == 0:
            boom_str = " bust"
        else:
            boom_str = " alltime"
        summary_table = summary_table.style.set_caption(dom_variable+' '+boom_str)
        display(summary_table)

Unnamed: 0,P1,P2,P3,P3-P1
E[R]--Rf,0.033775,0.02816,0.022073,-0.012149
t,2.350435,2.114748,2.008379,-1.235158
Std,0.164469,0.152989,0.127226,0.112578
SR,1.482894,1.329135,1.252825,-0.779263


In [33]:
df_panel.portfolio.value_counts()

portfolio
P3    2784
P1    2702
P2    2621
Name: count, dtype: int64

In [34]:
df_panel.groupby('portfolio')['mcap'].median()

portfolio
P1    3.509627e+07
P2    7.407528e+07
P3    2.498114e+08
Name: mcap, dtype: float64

In [35]:
reg_panel.columns

Index(['Token', 'Volume', 'Date', 'Inflow_centrality', 'Outflow_centrality',
       'TVL', 'volume_in', 'volume_out', 'borrow_rate', 'Supply_share',
       'Borrow_share', 'supply_rates', 'betweenness_centrality_volume',
       'betweenness_centrality_count', 'vol_in_full_len', 'vol_out_full_len',
       'vol_inter_full_len', 'eigen_centrality_undirected',
       'total_eigen_centrality_undirected', 'volume_ultimate',
       'dollar_exchange_rate', 'stableshare', 'mcap', 'S&P', 'timestamp',
       'gas_price_wei', 'ether_price_usd', 'gas_price_usd', 'S&P_log_return_1',
       'S&P_log_return_vol_1_30', 'ether_price_usd_log_return_1',
       'ether_price_usd_log_return_vol_1_30', 'gas_price_usd_log_return_1',
       'gas_price_usd_log_return_vol_1_30', 'Volume_share', 'TVL_share',
       'volume_in_share', 'volume_out_share', 'vol_in_full_len_share',
       'vol_out_full_len_share', 'vol_inter_full_len_share',
       'volume_ultimate_share', 'mcap_share',
       'dollar_exchange_rate_lo

In [36]:
# def assign_zv_portfolio(x, quantiles, prefix="P", separate_zero_value=True):
#     # Create an empty result Series with the same index as x.
#     result = pd.Series(index=x.index, dtype=object)

#     # combine zero values with bottom portfolio
#     # Identify rows where the value is 0.
#     zero_mask = x == 0
#     if zero_mask.sum() == 0:
#         result = pd.qcut(
#             x,
#             q=quantiles,
#             labels=[f"{prefix}{i}" for i in range(1, len(quantiles))],
#         )
#     else:
#         if separate_zero_value:
#             result[zero_mask] = f"{prefix}0"
#             result[~zero_mask] = pd.qcut(
#                 x[~zero_mask],
#                 q=quantiles,
#                 labels=[f"{prefix}{i}" for i in range(1, len(quantiles))],
#             )
#         else:
#             result[zero_mask] = f"{prefix}1"
#             result[~zero_mask] = pd.qcut(
#                 x[~zero_mask],
#                 q=quantiles,
#                 labels=[f"{prefix}{i}" for i in range(1, len(quantiles))],
#             )
#     return result

# def univariate_zv_sort(
#     df_panel, dom_variable, quantiles=[0, 0.33, 0.67, 1], separate_zero_value=True
# ) -> pd.DataFrame:
#     # Assign portfolio for each WeekYear group.
#     df_panel["portfolio"] = df_panel.groupby("WeekYear")[dom_variable].transform(
#         lambda x: assign_zv_portfolio(
#             x, quantiles=quantiles, prefix="P", separate_zero_value=separate_zero_value
#         )
#     )
#     return df_panel
# def weighted_average_return(group):
#     """
#     Compute the value-weighted return for a group using the token market capitalization.
#     The weighted return is defined as: sum(ret * mcap) / sum(mcap)
#     """
#     return np.average(group["ret_lead_1"], weights=group["mcap"])

# def univariate_zv_sort_table(
#     df_panel, ret_agg="value_weight", annualized=False
# ) -> pd.DataFrame:
#     """
#     Compute the time-series of aggregated portfolio returns for each WeekYear.

#     Parameters:
#     - ret_agg: choose among "mean", "median", or "value_weight" (for value-weighted returns).
#     - annualized: if True, annualize the average return.
#     """
#     if ret_agg == "mean":
#         portfolio_ts = (
#             df_panel.groupby(["WeekYear", "portfolio"])["ret_lead_1"].mean().unstack()
#         )
#     elif ret_agg == "median":
#         portfolio_ts = (
#             df_panel.groupby(["WeekYear", "portfolio"])["ret_lead_1"].median().unstack()
#         )
#     elif ret_agg == "value_weight":
#         portfolio_ts = (
#             df_panel.groupby(["WeekYear", "portfolio"])
#             .apply(weighted_average_return)
#             .unstack()
#         )
#     else:
#         raise ValueError("ret_agg must be one of 'mean', 'median', or 'value_weight'")

#     results = {}

#     # Loop through each portfolio's time series and compute overall statistics across time.
#     for port in portfolio_ts.columns:
#         ret_ts = portfolio_ts[port].dropna()  # drop missing values if any
#         mean_return = ret_ts.mean()
#         std_return = ret_ts.std(ddof=1)
#         t_stat, _ = ttest_1samp(ret_ts, popmean=0)
#         sharpe = (
#             np.sqrt(365 / 7) * mean_return / std_return if std_return != 0 else np.nan
#         )

#         results[port] = {
#             "E[R]--Rf": mean_return * 52 if annualized else mean_return,
#             "t": t_stat,
#             "Std": std_return,
#             "SR": sharpe,
#         }

#     # Determine the number of portfolios (assumes portfolios are labeled like P1, P2, ..., Pn)
#     n_quantiles = portfolio_ts.shape[1]

#     # Compute the spread portfolio as the time series difference: P{n_quantiles} - P1.
#     high_port = portfolio_ts[f"P{n_quantiles}"]
#     low_port = portfolio_ts["P0"]
#     spread_ts = high_port - low_port
#     mean_diff = spread_ts.mean()
#     std_diff = spread_ts.std(ddof=1)
#     t_stat_diff, _ = ttest_1samp(spread_ts.dropna(), popmean=0)
#     sharpe_diff = np.sqrt(365 / 7) * mean_diff / std_diff if std_diff != 0 else np.nan

#     results[f"P{n_quantiles}-P0"] = {
#         "E[R]--Rf": mean_diff,
#         "t": t_stat_diff,
#         "Std": std_diff,
#         "SR": sharpe_diff,
#     }

#     summary_table = pd.DataFrame(results)
#     return summary_table

# for dom_variable in ['betweenness_centrality_volume']:
#     for is_boom in [-1]:
#         quantiles = [0]
#         separate_zero_value=True
#         df_panel = clean_weekly_panel(reg_panel, is_stablecoin = 0, is_boom = is_boom)
#         # df_panel = df_panel[df_panel[dom_variable]>0]
#         df_panel = pd.merge(df_panel,ff3, on='WeekYear')
#         df_panel['ret_lead_1'] = df_panel['ret_lead_1']-df_panel['RF']
        
#         df_panel = univariate_zv_sort(df_panel, dom_variable, quantiles=quantiles, separate_zero_value=separate_zero_value)
#         summary_table = univariate_zv_sort_table(df_panel, ret_agg = ret_agg)
    
#         if is_boom == 1:
#             boom_str = " boom"
#         elif is_boom == 0:
#             boom_str = " bust"
#         else:
#             boom_str = " alltime"
#         summary_table = summary_table.style.set_caption(dom_variable+' '+boom_str)
#         display(summary_table)

In [37]:
df_panel

Unnamed: 0,Token,WeekYear,ret,volatility,mcap,mcap_share,amihud,is_boom,is_stablecoin,gas_price_usd,...,ret_lead_1,ret_rolling_4,Date,MKT,SMB,HML,RF,Week,Year,portfolio
0,$AKC,2022-13,2.837605,74.413035,0.000000e+00,0.000000,2.759577e-07,False,0,0.000209,...,-0.331418,2.981673,2022-04-01,0.0015,0.0101,-0.0415,0.00003,13,2022,P1
1,$AKC,2022-14,-0.331388,1.283833,0.000000e+00,0.000000,2.154088e-07,False,0,0.000212,...,-0.202849,1.662195,2022-04-08,-0.0192,-0.0330,0.0220,0.00003,14,2022,P1
2,$AKC,2022-15,-0.202819,0.920389,0.000000e+00,0.000000,2.229143e-07,False,0,0.000138,...,-0.372408,1.122250,2022-04-14,-0.0172,0.0178,0.0224,0.00003,15,2022,P2
3,$AKC,2022-16,-0.372378,0.445062,0.000000e+00,0.000000,1.675409e-07,False,0,0.000153,...,-0.435399,0.283776,2022-04-22,-0.0323,-0.0039,0.0232,0.00003,16,2022,P1
4,$AKC,2022-17,-0.435369,0.771352,0.000000e+00,0.000000,5.528750e-07,False,0,0.000327,...,-0.468173,-0.811116,2022-04-29,-0.0335,-0.0026,0.0054,0.00003,17,2022,P1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8102,⚗️,2021-45,-0.022914,0.570483,1.079282e+08,0.000119,2.533261e-08,False,0,0.000714,...,-0.368249,-0.250154,2021-11-12,-0.0026,-0.0053,0.0083,0.00001,45,2021,P2
8103,⚗️,2021-46,-0.368239,0.848700,8.796850e+07,0.000103,5.236743e-08,False,0,0.000580,...,-0.017683,-0.503924,2021-11-19,-0.0014,-0.0176,-0.0160,0.00001,46,2021,P2
8104,⚗️,2021-47,-0.017673,0.547065,6.951797e+07,0.000082,5.845556e-08,False,0,0.000560,...,-0.140675,-0.403373,2021-11-26,-0.0256,-0.0226,0.0263,0.00001,47,2021,P1
8105,⚗️,2021-48,-0.140665,0.539667,6.975927e+07,0.000080,5.331698e-08,False,0,0.000575,...,-0.092045,-0.478920,2021-12-03,-0.0218,-0.0168,0.0145,0.00002,48,2021,P1


### Double sort

In [38]:
for secondary_variable in ['mcap']:
    for dom_variable in DEPENDENT_VARIABLES_ASSETPRICING[:]:
        for is_boom in [-1]:
            quantiles = Q
            secondary_quantiles=[0,0.3,0.7,1]
            seprarate_zero_value=True
            df_panel = clean_weekly_panel(reg_panel, is_stablecoin = 0, is_boom = is_boom)
            df_panel = df_panel[df_panel[dom_variable]>0]
            df_panel = pd.merge(df_panel,ff3, on='WeekYear')
            df_panel= double_sort(df_panel, dom_variable, secondary_variable=secondary_variable, quantiles=quantiles, secondary_quantiles=secondary_quantiles, separate_zero_value=separate_zero_value)
            summary_table = double_sort_table(df_panel, ret_agg=ret_agg)
            if is_boom == 1:
                boom_str = " boom"
            elif is_boom == 0:
                boom_str = " bust"
            else:
                boom_str = "alltime"
            summary_table = summary_table.style.set_caption(dom_variable +' '+ boom_str)
            display(summary_table)

primary_portfolio,P1,P2,P3
secondary_portfolio,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Q1,,,
Q2,,,
Q3,,,


In [39]:
summary_table.columns

CategoricalIndex(['P1', 'P2', 'P3'], categories=['P1', 'P2', 'P3'], ordered=True, dtype='category', name='primary_portfolio')

# Factor testing

In [40]:

factor_models = ["MKT", "CMKT", "CMKT+CMOM+CSIZE"]
is_boom = -1

for factor_model in factor_models[2:3]:
    for dom_variable in DEPENDENT_VARIABLES_ASSETPRICING[:2]:
        for is_boom in [-1]:
            # 1. Prepare your data
            quantiles = Q #[0,0.2,0.4,0.6,0.8,1] #[0, 0.33, 0.66, 1] 
            separate_zero_value = False
            df_panel = clean_weekly_panel(reg_panel, is_stablecoin=0, is_boom=is_boom)
            df_panel = df_panel[df_panel[dom_variable] > 0]

            df_panel = univariate_sort(
                df_panel, dom_variable, quantiles, separate_zero_value=separate_zero_value
            )
            dominance_portfolios = get_dominance_portfolios(df_panel, ret_agg=ret_agg)
            dominance_portfolios.rename(columns={dominance_portfolios.columns[-1]: "CDOM"}, inplace=True)
            portfolios = list(dominance_portfolios.columns)

            # 2. Merge all factors into a single DataFrame
            factors_data = pd.merge(dominance_portfolios, ff3, on=["WeekYear"], how="left")
            factors_data = pd.merge(factors_data, ltw3, on=["WeekYear"], how="left")
            for p in portfolios:
                factors_data[p] = factors_data[p]-factors_data['RF']
            # 3. Build a list of factor names from the formula (plus "alpha")
            #    Example: factor_model="MKT + SMB + HML" => ["MKT", "SMB", "HML"]
            #    We'll store "alpha" and then each factor, plus a matching "_t" row for t-stats
            raw_factors = factor_model.replace(" ", "").split("+")
            factor_names = ["alpha"] + raw_factors  # "alpha" is the renamed Intercept
            row_list = []
            for f in factor_names:
                row_list.append(f)      # e.g. "alpha", "MKT", "SMB", ...
                row_list.append(f"{f}_t")  # e.g. "alpha_t", "MKT_t", ...

            # Finally, add R-squared and N at the bottom
            row_list += ["R-squared", "N"]
            final_table = pd.DataFrame(index=row_list, columns=portfolios)

            # 4. Run a separate regression for each portfolio
            for p in portfolios:
                formula = f"{p} ~ {factor_model}"

                # Use Newey–West (HAC) standard errors
                model = smf.ols(formula=formula, data=factors_data).fit(
                    cov_type="HAC", cov_kwds={"maxlags": 4}
                )

                # Extract estimates, t-stats, p-values
                coefs = model.params.copy()
                tvals = model.tvalues.copy()
                pvals = model.pvalues.copy()

                # Rename "Intercept" to "alpha"
                if "Intercept" in coefs.index:
                    coefs.rename({"Intercept": "alpha"}, inplace=True)
                    tvals.rename({"Intercept": "alpha"}, inplace=True)
                    pvals.rename({"Intercept": "alpha"}, inplace=True)

                # Fill each factor row with the coefficient and the next row with the t-stat
                for f in factor_names:
                    if f in coefs.index:
                        star = significance_stars(pvals[f])
                        
                        # Row for coefficient (with stars)
                        final_table.loc[f, p] = f"{coefs[f]:.4f}{star}"

                        # Row for p-value
                        # final_table.loc["p", p] = f"({pvals[f]:.2f})"
                        
                        # Row for t-stat
                        final_table.loc[f"{f}_t", p] = f"({tvals[f]:.2f})"
                    else:
                        # If factor not found in the regression, fill with blanks or zeros
                        final_table.loc[f, p] = ""
                        final_table.loc[f"{f}_t", p] = ""

                # Fill in R-squared and # obs
                final_table.loc["R-squared", p] = f"{model.rsquared:.3f}"
                final_table.loc["N", p]         = f"{int(model.nobs)}"

            # 5. Print or export the final table
            print(f"== Results for {dom_variable} | Model: {factor_model} ")
            print(final_table)
            # final_table.to_latex('panelA.tex', index=True, header=False, column_format='lrrrr', 
            # bold_rows=True).replace('\\toprule\n', '').replace('\\bottomrule\n', '')



== Results for volume_ultimate_share | Model: CMKT+CMOM+CSIZE 
                 P1       P2        P3     CDOM
alpha      0.0341**  0.0260*   0.0239*  -0.0109
alpha_t      (1.99)   (1.91)    (1.94)  (-1.05)
CMKT        -0.1259  -0.1499   -0.0338   0.0906
CMKT_t      (-0.83)  (-0.80)   (-0.30)   (0.91)
CMOM         0.1784   0.3778    0.2221   0.0567
CMOM_t       (0.94)   (1.57)    (1.49)   (0.49)
CSIZE       -0.1244  -0.1982  -0.3445*  -0.2165
CSIZE_t     (-0.63)  (-0.67)   (-1.75)  (-1.60)
R-squared     0.014    0.046     0.047    0.023
N               131      132       134      131


### FAMA MCBETH 

In [41]:
"""
Script to render the table of Fama Macbeth.
"""

from pathlib import Path
import pandas as pd
import numpy as np
from scipy.stats import ttest_1samp
from environ.constants import (
    ALL_NAMING_DICT,
    DEPENDENT_VARIABLES_ASSETPRICING,
    PROCESSED_DATA_PATH,
    TABLE_PATH,
)
from environ.process.asset_pricing.assetpricing_functions import (
    clean_weekly_panel,
    univariate_sort,
    get_dominance_portfolios,
    reg_fama_macbeth,
)


if __name__ == "__main__":
    # compute means for portfolio returns (can change to median)
    ret_agg = "value_weight"
    is_boom = -1
    # load the regression panel dataset
    reg_panel = pd.read_pickle(
        PROCESSED_DATA_PATH / "panel_main.pickle.zip", compression="zip"
    )
    # load factors
    ff3 = pd.read_csv(PROCESSED_DATA_PATH / "FF3.csv")
    ltw3 = pd.read_csv(PROCESSED_DATA_PATH / "LTW3.csv")
    for dom_variable in DEPENDENT_VARIABLES_ASSETPRICING[:1]:
        quantiles = Q
        separate_zero_value = False 
        df_panel = clean_weekly_panel(reg_panel, is_stablecoin=0, is_boom=is_boom)
        df_panel = df_panel[df_panel[dom_variable] > 0]
        df_panel = univariate_sort(
            df_panel, dom_variable, quantiles, separate_zero_value=separate_zero_value
        )
        dominance_factor = get_dominance_portfolios(df_panel)
        dominance_factor.rename(
            columns={dominance_factor.columns[-1]: "CDOM"}, inplace=True
        )
        # Get the test assets
        assets_panel = clean_weekly_panel(reg_panel, is_stablecoin=0, is_boom=-1)

        # Calculate the mean market cap for each token
        mean_market_cap = assets_panel.groupby('Token')['mcap'].mean()

        # Identify tokens with an average market cap above 1 million
        tokens_above_1m = mean_market_cap[mean_market_cap > 1e6].index

        # Filter the original DataFrame to keep only these tokens
        assets_panel = assets_panel[assets_panel['Token'].isin(tokens_above_1m)]

        # Merge all factors
        data_fama_macbeth = pd.merge(dominance_factor, ff3, on=["WeekYear"], how="left")
        data_fama_macbeth = pd.merge(
            data_fama_macbeth, ltw3, on=["WeekYear"], how="left"
        )
        # Merge factors with returns
        data_fama_macbeth = pd.merge(
            data_fama_macbeth, assets_panel, on=["WeekYear"], how="left"
        )
        data_fama_macbeth = data_fama_macbeth.dropna()

        # Run the Fama–MacBeth regression
        data_fama_macbeth["excess_ret"] = (
            data_fama_macbeth["ret"] - data_fama_macbeth["RF"]
        )
        fama_macbeth_results = reg_fama_macbeth(
            data_fama_macbeth, formula="excess_ret ~ CMKT + CMOM + CSIZE + CDOM"
        )
        fama_macbeth_results = fama_macbeth_results.round(3)
        fama_macbeth_results.drop("t_stat", axis=1, inplace=True)
        fama_macbeth_results.rename(
            columns={"factor":"Factor", "risk_premium":"Risk Premium", "t_stat_NW":r"\emph{t}"}, inplace=True
        )
        print(fama_macbeth_results)
        # file_name = (
        #     TABLE_PATH / "assetpricing" / f"assetpricing_famamacbeth_{dom_variable}"
        # )

        # fama_macbeth_results.to_latex(
        #     f"{file_name}.tex",
        #     index=True,
        #     escape=False,
        # )


      Factor  Risk Premium  \emph{t}
0       CDOM        -0.253    -1.629
1       CMKT         1.175     5.724
2       CMOM         0.084     0.686
3      CSIZE         0.384     3.150
4  Intercept         7.602     3.751


### Panel regression for returns

In [42]:
# def clean_weekly_betweenness(reg_panel):
#     # add supply rates
#     reg_panel["daily_supply_return"] = reg_panel["supply_rates"] / 365.2425
#     reg_panel.sort_values(by=["Token", "Date"], ascending=True, inplace=True)

#     # calculate daily returns

#     reg_panel["ret"] = reg_panel.groupby("Token")["dollar_exchange_rate"].pct_change(
#         fill_method=None
#     )
#     reg_panel["ret"] = (1+reg_panel["ret"]) * (1+reg_panel["daily_supply_return"]) - 1
#     # compute amihud illiquidity measure
#     reg_panel["amihud"] = np.where(
#         reg_panel["Volume"] == 0, np.nan, reg_panel["ret"].abs() / reg_panel["Volume"]
#     )
#     reg_panel["is_stablecoin"] = (
#         reg_panel.groupby("Token")["stableshare"].transform("max") > 0
#     ).astype(int)

#     # Add columns for the week and year
#     reg_panel["Week"] = reg_panel["Date"].dt.isocalendar().week.replace(53, 52)
#     reg_panel["Year"] = reg_panel["Date"].dt.isocalendar().year
#     reg_panel["WeekYear"] = (
#         reg_panel["Year"].astype(str) + "-" + reg_panel["Week"].astype(str)
#     )

#     agg_dict = {
#         "ret": ("ret", lambda x: (1 + x).prod() - 1),
#         "mcap": ("mcap", "mean"),
#         "amihud": ("amihud", "mean"),
#         "is_boom": ("is_boom", "last"),
#         "is_stablecoin": ("is_stablecoin", "last"),
#         "gas_price_usd": ("gas_price_usd", "mean"),
#         "stableshare": ("stableshare", "mean"),
#         "gas_price_usd_log_return_vol_1_30": ("gas_price_usd_log_return_vol_1_30", "mean"),
#         "ether_price_usd_log_return_vol_1_30": ("ether_price_usd_log_return_vol_1_30", "mean"),
#         "Supply_share": ("Supply_share", "mean"),
#         "supply_rates": ("supply_rates", "mean"),
#     }
#     for col in DEPENDENT_VARIABLES:
#         agg_dict[col] = (col, "mean")

#     reg_panel = reg_panel.groupby(["Token", "WeekYear"]).agg(**agg_dict).reset_index()

#     # Ensure the DataFrame is sorted by Token and WeekYear
#     reg_panel = reg_panel.sort_values(["Token", "WeekYear"])

#     # Create the lead returns, i.e. returns one week ahead
#     reg_panel["ret_lead_1"] = reg_panel.groupby("Token")["ret"].shift(-1)
#     reg_panel = reg_panel.dropna(subset=["ret_lead_1"])

#     #Winsorize
#     # reg_panel["ret_lead_1"] = reg_panel.groupby(["WeekYear"])["ret_lead_1"].transform(
#     #     lambda x: x.clip(lower=x.quantile(0.005), upper=x.quantile(0.995))
#     # )
#     return reg_panel

In [43]:
# def clean_daily_betweenness(reg_panel):
#     # add supply rates
#     reg_panel["daily_supply_return"] = reg_panel["supply_rates"] / 365.2425
#     reg_panel.sort_values(by=["Token", "Date"], ascending=True, inplace=True)

#     # calculate daily returns

#     reg_panel["ret"] = reg_panel.groupby("Token")["dollar_exchange_rate"].pct_change(
#         fill_method=None
#     )
#     reg_panel["ret"] = (1+reg_panel["ret"]) * (1+reg_panel["daily_supply_return"]) - 1
#     # compute amihud illiquidity measure
#     reg_panel["amihud"] = np.where(
#         reg_panel["Volume"] == 0, np.nan, reg_panel["ret"].abs() / reg_panel["Volume"]
#     )
#     reg_panel["is_stablecoin"] = (
#         reg_panel.groupby("Token")["stableshare"].transform("max") > 0
#     ).astype(int)

#     agg_dict = {
#         "ret": ("ret", lambda x: (1 + x).prod() - 1),
#         "is_boom": ("is_boom", "last"),
#         "is_stablecoin": ("is_stablecoin", "last"),
#     }
#     for col in DEPENDENT_VARIABLES + ["mcap", "amihud", "stableshare", "gas_price_usd","gas_price_usd_log_return_vol_1_30", "ether_price_usd_log_return_vol_1_30"]:
#         agg_dict[col] = (col, "mean")

#     reg_panel = reg_panel.groupby(["Token", "Date"]).agg(**agg_dict).reset_index()

#     # Ensure the DataFrame is sorted by Token and WeekYear
#     reg_panel = reg_panel.sort_values(["Token", "Date"])

#     # Create the lead returns, i.e. returns one week ahead
#     reg_panel["ret_lead_1"] = reg_panel.groupby("Token")["ret"].shift(-1)
#     reg_panel = reg_panel.dropna(subset=["ret_lead_1"])

#     #Winsorize
#     # reg_panel["ret_lead_1"] = reg_panel.groupby(["WeekYear"])["ret_lead_1"].transform(
#     #     lambda x: x.clip(lower=x.quantile(0.005), upper=x.quantile(0.995))
#     # )
#     return reg_panel

# # reg_panel['is_stable'] = (reg_panel.groupby('Token')['stableshare'].transform('max') > 0).astype(int)
# # reg_panel = reg_panel.sort_values(["Token", "Date"])
# # reg_panel["ret"] = reg_panel.groupby("Token")["dollar_exchange_rate"].pct_change(
# # fill_method=None
# # )
# # reg_panel["log_mcap"] = np.log(reg_panel["mcap"])
# from linearmodels.panel import PanelOLS
# stablecoins_list = ["DAI", "USDC", "USDT", "FEI", "FRAX", "PAX"]
# df_panel = clean_daily_betweenness(reg_panel)
# df_panel = df_panel[df_panel["Token"].isin(stablecoins_list)]
# df_panel = df_panel.groupby("Token").filter(
#     lambda group: group["betweenness_centrality_volume"].max() > 0
# )
# df_panel = df_panel[(df_panel['mcap'] > 0)]
# df_panel['log_mcap'] = np.log(df_panel['mcap'])

# # Set a multi-index with the security identifier and date.
# df_panel = df_panel.set_index(['Token', 'Date'])
# model = PanelOLS.from_formula(
#     "ret_lead_1 ~ betweenness_centrality_volume + log_mcap + stableshare + is_boom +gas_price_usd_log_return_vol_1_30+ gas_price_usd",
#     data=df_panel,
# )

# results = model.fit()
# print(results)  

In [44]:
reg_panel.columns

Index(['Token', 'Volume', 'Date', 'Inflow_centrality', 'Outflow_centrality',
       'TVL', 'volume_in', 'volume_out', 'borrow_rate', 'Supply_share',
       'Borrow_share', 'supply_rates', 'betweenness_centrality_volume',
       'betweenness_centrality_count', 'vol_in_full_len', 'vol_out_full_len',
       'vol_inter_full_len', 'eigen_centrality_undirected',
       'total_eigen_centrality_undirected', 'volume_ultimate',
       'dollar_exchange_rate', 'stableshare', 'mcap', 'S&P', 'timestamp',
       'gas_price_wei', 'ether_price_usd', 'gas_price_usd', 'S&P_log_return_1',
       'S&P_log_return_vol_1_30', 'ether_price_usd_log_return_1',
       'ether_price_usd_log_return_vol_1_30', 'gas_price_usd_log_return_1',
       'gas_price_usd_log_return_vol_1_30', 'Volume_share', 'TVL_share',
       'volume_in_share', 'volume_out_share', 'vol_in_full_len_share',
       'vol_out_full_len_share', 'vol_inter_full_len_share',
       'volume_ultimate_share', 'mcap_share',
       'dollar_exchange_rate_lo

In [45]:
# def clean_monthly_panel(reg_panel, is_stablecoin=0, is_boom=-1):

#     # Filter for stablecoins
#     if is_stablecoin == 1:
#         reg_panel = reg_panel[reg_panel["stableshare"] > 0]
#     elif is_stablecoin == 0:
#         reg_panel = reg_panel[reg_panel["stableshare"] == 0]
#     else:
#         pass

#     ## Filter out tokens that existed for less than 2 months
#     # reg_panel = reg_panel[
#     #     reg_panel["Token"].map(reg_panel["Token"].value_counts()) >= 60
#     # ]

#     ## Filter out tokens with low peak market capitalization
#     # reg_panel = reg_panel.groupby("Token").filter(
#     #     lambda group: group["mcap"].max() >= 50e6
#     # )

#     # Add supply rates
#     reg_panel["daily_supply_return"] = reg_panel["supply_rates"] / 365.2425
#     reg_panel.sort_values(by=["Token", "Date"], ascending=True, inplace=True)

#     # Calculate daily returns
#     reg_panel["ret"] = reg_panel.groupby("Token")["dollar_exchange_rate"].pct_change(
#         fill_method=None
#     )

#     # Compute Amihud illiquidity measure
#     reg_panel["amihud"] = np.where(
#         reg_panel["Volume"] == 0, np.nan, reg_panel["ret"].abs() / reg_panel["Volume"]
#     )
#     reg_panel["is_stablecoin"] = (
#         reg_panel.groupby("Token")["stableshare"].transform("max") > 0
#     ).astype(int)

#     # Instead of week, extract month and year for monthly aggregation
#     reg_panel["Month"] = reg_panel["Date"].dt.month
#     reg_panel["Year"] = reg_panel["Date"].dt.year
#     reg_panel["YearMonth"] = reg_panel["Year"].astype(str) + "-" + reg_panel["Month"].astype(str)

#     # Define the aggregation dictionary, unchanged except grouping key is now YearMonth
#     agg_dict = {
#         "ret": ("ret", lambda x: (1 + x).prod() - 1),
#         "mcap": ("mcap", "mean"),
#         "mcap_share": ("mcap_share", "mean"),
#         "amihud": ("amihud", "mean"),
#         "is_boom": ("is_boom", "last"),
#         "is_stablecoin": ("is_stablecoin", "last"),
#         "gas_price_usd": ("gas_price_usd", "mean"),
#         "stableshare": ("stableshare", "mean"),
#         "gas_price_usd_log_return_vol_1_30": (
#             "gas_price_usd_log_return_vol_1_30",
#             "mean",
#         ),
#         "ether_price_usd_log_return_1": ("ether_price_usd_log_return_1", "mean"),
#         "ether_price_usd_log_return_vol_1_30": (
#             "ether_price_usd_log_return_vol_1_30",
#             "mean",
#         ),
#         "S&P_log_return_vol_1_30": ("S&P_log_return_vol_1_30", "mean"),
#         "Supply_share": ("Supply_share", "mean"),
#         "supply_rates": ("supply_rates", "mean"),
#     }
#     for col in DEPENDENT_VARIABLES:
#         agg_dict[col] = (col, "mean")

#     # Group by Token and YearMonth for monthly panel
#     reg_panel = reg_panel.groupby(["Token", "YearMonth"]).agg(**agg_dict).reset_index()

#     # Ensure the DataFrame is sorted by Token and YearMonth
#     reg_panel = reg_panel.sort_values(["Token", "YearMonth"])

#     # Winsorize returns by YearMonth to reduce the impact of extreme values
#     reg_panel["ret"] = reg_panel.groupby(["YearMonth"])["ret"].transform(
#         lambda x: x.clip(lower=x.quantile(0.01), upper=x.quantile(0.99))
#     )

#     # Create lead returns (i.e., one month ahead)
#     reg_panel["ret_lead_1"] = reg_panel.groupby("Token")["ret"].shift(-1)
#     reg_panel = reg_panel.dropna(subset=["ret_lead_1"])

#     # Compute rolling 4-month returns (including current month)
#     # For a given month, ret_rolling_4 = (1+ret[t-3])*(1+ret[t-2])*(1+ret[t-1])*(1+ret[t]) - 1
#     reg_panel["ret_rolling_4"] = reg_panel.groupby("Token")["ret"].transform(
#         lambda x: (1 + x).rolling(window=4, min_periods=1).apply(np.prod, raw=True) - 1
#     )

#     # Boom and bust filtering needs to be done at the end, to prevent wrong shifting in returns
#     if is_boom == 1:
#         reg_panel = reg_panel[reg_panel["is_boom"] == 1]
#     elif is_boom == 0:
#         reg_panel = reg_panel[reg_panel["is_boom"] == 0]
#     else:
#         pass

#     return reg_panel
# import numpy as np
# import pandas as pd
# from linearmodels.panel import PanelOLS

# # -------------------------------
# # 1. Data Cleaning and Preparation
# # -------------------------------
# # (Uncomment or adjust the following lines as needed)
# # reg_panel['is_stable'] = (reg_panel.groupby('Token')['stableshare'].transform('max') > 0).astype(int)
# # reg_panel = reg_panel.sort_values(["Token", "Date"])
# # reg_panel["ret"] = reg_panel.groupby("Token")["dollar_exchange_rate"].pct_change(fill_method=None)
# # reg_panel["log_mcap"] = np.log(reg_panel["mcap"])

# # Clean your panel data using your helper function
# df_panel = clean_monthly_panel(reg_panel, is_stablecoin=-1, is_boom=-1)

# df_panel = df_panel.rename(columns={'S&P_log_return_vol_1_30': 'SP_vol'})
# # (Optional) filter for a specific list of tokens:
# # stablecoins_list = ["DAI", "USDC", "USDT", "FEI", "FRAX", "PAX"]
# # df_panel = df_panel[df_panel["Token"].isin(stablecoins_list)]

# # (Optional) you can filter tokens based on stableshare here if needed
# # df_panel = df_panel.groupby("Token").filter(lambda group: group["stableshare"].max() == 0)

# # Keep observations with positive market cap and create log_mcap
# df_panel = df_panel[(df_panel['mcap'] > 0)].copy()
# df_panel['log_mcap'] = np.log(df_panel['mcap'])




# # Create a date variable from 'WeekYear' and set a multi-index with [Token, YearWeekDay]
# df_panel['YearMonth'] = pd.to_datetime(df_panel['YearMonth'], format='%Y-%M')
# df_panel = df_panel.set_index(['Token', 'YearMonth'])

# # -------------------------------
# # 2. Define Regression Specifications
# # -------------------------------
# # Note: We use "TimeEffects" and "EntityEffects" as markers in the formula.
# # Later, we will replace them with C(YearWeekDay) and C(Token) respectively.
# dom_variable = "eigen_centrality_undirected"  # Change this to your desired variable
# all_reg_specs = [
#     "ret_lead_1 ~ {dom_variable} + log_mcap + is_stablecoin",
#     "ret_lead_1 ~ {dom_variable} + log_mcap + gas_price_usd + is_stablecoin + ether_price_usd_log_return_1",
#     "ret_lead_1 ~ {dom_variable} + log_mcap + gas_price_usd + is_stablecoin + ret + ret_rolling_4",
#     "ret_lead_1 ~ {dom_variable} + log_mcap + gas_price_usd + is_stablecoin + supply_rates + is_boom",
#     "ret_lead_1 ~ {dom_variable} + log_mcap + gas_price_usd + is_stablecoin + supply_rates + is_boom:{dom_variable}",
#     "ret_lead_1 ~ {dom_variable} + log_mcap + gas_price_usd + is_stablecoin + ether_price_usd_log_return_1 + supply_rates + is_boom + ret + ret_rolling_4",
#     "ret_lead_1 ~ {dom_variable} + log_mcap + gas_price_usd + is_stablecoin + TimeEffects + ret + ret_rolling_4",
#     "ret_lead_1 ~ {dom_variable} + log_mcap + gas_price_usd + TimeEffects + EntityEffects + ret + ret_rolling_4"
# ]

# # -------------------------------
# # 3. Run Regressions and Collect Results
# # -------------------------------
# reg_results = []

# for spec in all_reg_specs:
#     # Determine if the specification contains Time and/or Entity effects
#     has_time = "TimeEffects" in spec
#     has_entity = "EntityEffects" in spec

#     if has_time and has_entity:
#         spec_label = "Entity & Time FE"
#     elif has_time:
#         spec_label = "Time FE"
#     elif has_entity:
#         spec_label = "Entity FE"
#     else:
#         spec_label = "No FE"

#     model = PanelOLS.from_formula(spec, data=df_panel)
#     results = model.fit()

#     # Store the fitted results along with the fixed effect flags
#     reg_results.append({
#         "spec_label": spec_label,
#         "results": results,
#         "has_time": has_time,
#         "has_entity": has_entity
#     })

# def produce_latex_table(reg_results, 
#                         table_caption="OLS Regression Results",
#                         table_label="tab:ols_results"):
#     """
#     Produce a LaTeX-formatted table string from a list of regression results.
    
#     Parameters
#     ----------
#     reg_results : list of dict
#         Each element should be a dictionary with:
#           - "spec_label": a string describing the specification (e.g., "Time FE")
#           - "results": the fitted PanelOLSResults object
#           - "has_time": boolean, True if Time Fixed Effects are used.
#           - "has_entity": boolean, True if Entity Fixed Effects are used.
#     table_caption : str
#         The caption for the LaTeX table.
#     table_label : str
#         The label used for referencing the LaTeX table.
        
#     Returns
#     -------
#     latex_str : str
#         A string containing LaTeX code for the regression table.
#     """
    
#     # Gather all variable names that appear in any regression
#     all_vars = set()
#     for item in reg_results:
#         for var in item["results"].params.index:
#             # Skip intercept if you don't want it in the table
#             # if var.lower() in ["intercept", "const"]:
#             #     continue
#             all_vars.add(var)
#     all_vars = list(all_vars)
    
#     # Helper function to assign significance stars based on p-value thresholds
#     def significance_stars(p):
#         if p < 0.01:
#             return "***"
#         elif p < 0.05:
#             return "**"
#         elif p < 0.10:
#             return "*"
#         else:
#             return ""
    
#     n_regs = len(reg_results)
    
#     lines = []
#     lines.append(r"\begin{table}[ht]")
#     lines.append(r"\centering")
#     lines.append(fr"\caption{{{table_caption}}}")
#     lines.append(fr"\label{{{table_label}}}")
#     lines.append(r"\begin{tabular}{l" + "c" * n_regs + "}")
#     lines.append(r"\toprule")
    
#     # Column headers: (1), (2), (3), etc.
#     col_header = " & " + " & ".join([f"({i+1})" for i in range(n_regs)]) + r" \\"
#     lines.append(col_header)
#     lines.append(r"\midrule")
    
#     # For each variable, add a row for coefficients and a row for t-statistics.
#     # Replace underscores in variable names with spaces.
#     for var in all_vars:
#         var_label = var.replace('_', ' ')
#         coef_row = [var_label]
#         tstat_row = [""]
        
#         for item in reg_results:
#             res = item["results"]
#             if var in res.params.index:
#                 coef_val = res.params[var]
#                 p_val    = res.pvalues[var]
#                 t_stat   = res.tstats[var]
                
#                 star_str = significance_stars(p_val)
#                 coef_str = f"{coef_val:.2f}{star_str}"
#                 t_str    = f"[{t_stat:.2f}]"
#             else:
#                 coef_str = ""
#                 t_str    = ""
            
#             coef_row.append(coef_str)
#             tstat_row.append(t_str)
        
#         lines.append(" & ".join(coef_row) + r" \\")
#         lines.append(" & ".join(tstat_row) + r" \\")
    
#     # Additional regression statistics: R-squared row
#     r2_row = ["R-squared"]
#     for item in reg_results:
#         r2_val = item["results"].rsquared
#         r2_row.append(f"{r2_val:.3f}")
#     lines.append(" & ".join(r2_row) + r" \\")
    
#     # Observations row
#     nobs_row = ["Observations"]
#     for item in reg_results:
#         nobs_val = item["results"].nobs
#         nobs_row.append(f"{nobs_val:d}")
#     lines.append(" & ".join(nobs_row) + r" \\")
    
#     # --- Modified Fixed Effects Rows ---
#     # Entity Fixed Effects row
#     entity_fe_row = ["Entity FE"]
#     # Time Fixed Effects row
#     time_fe_row   = ["Time FE"]
    
#     for item in reg_results:
#         entity_fe_value = "YES" if item.get("has_entity") else "NO"
#         time_fe_value   = "YES" if item.get("has_time") else "NO"
#         entity_fe_row.append(entity_fe_value)
#         time_fe_row.append(time_fe_value)
    
#     lines.append(" & ".join(entity_fe_row) + r" \\")
#     lines.append(" & ".join(time_fe_row) + r" \\")
    
#     lines.append(r"\bottomrule")
#     lines.append(r"\end{tabular}")
#     lines.append(r"\end{table}")
    
#     return "\n".join(lines)

# # Generate and print the LaTeX table code
# table_code = produce_latex_table(reg_results)
# print(table_code)


# Weekly

In [None]:
# ---------------------------------
# 0. Imports
# ---------------------------------
import numpy as np
import pandas as pd
from linearmodels.panel import PanelOLS

# ---------------------------------
# 1. Data prep
# ---------------------------------
dom_variable = "betweenness_centrality_volume"   # default; loop will override

# Clean your panel data using your helper function
df_panel = clean_weekly_panel(reg_panel, is_stablecoin=-1, is_boom=-1)
df_panel = df_panel[df_panel[dom_variable] > 0]
df_panel = df_panel.rename(columns={'S&P_log_return_vol_1_30': 'SP_vol'})

# (Optional) filter for a specific list of tokens:
# stablecoins_list = ["DAI", "USDC", "USDT", "FEI", "FRAX", "PAX"]
# df_panel = df_panel[df_panel["Token"].isin(stablecoins_list)]

# (Optional) filter tokens on stableshare
# df_panel = df_panel.groupby("Token").filter(lambda g: g["stableshare"].max() == 0)

# Keep observations with positive market cap and create log_mcap
df_panel = df_panel[df_panel["mcap"] > 0].copy()
df_panel["log_mcap"] = np.log(df_panel["mcap"])

# Construct Δ-log-change columns for every dependent variable
for dom_variable in DEPENDENT_VARIABLES:
    df_panel[f"{dom_variable}_logchange"] = (
        np.log(df_panel[dom_variable]) - np.log(df_panel[dom_variable].shift(1))
    )

# TVL log-change (specific example that doesn’t sit in DEPENDENT_VARIABLES)
df_panel["tvl_logchange"] = np.log(df_panel["TVL"]) - np.log(df_panel["TVL"].shift(1))

# Indexing
df_panel["YearWeekDay"] = pd.to_datetime(df_panel["WeekYear"] + "-1", format="%Y-%W-%w")
df_panel = df_panel.set_index(["Token", "YearWeekDay"])
df_panel = df_panel.dropna(subset=["ret_lead_1"])

# ---------------------------------
# 2. Regression specs
# ---------------------------------
all_reg_specs = []
for dom_variable in DEPENDENT_VARIABLES:
    indepvar = f"{dom_variable}_logchange"
    all_reg_specs.append(
        f"tvl_logchange ~ {indepvar} + log_mcap + is_stablecoin + gas_price_usd + is_boom + volatility"
    )

# ---------------------------------
# 3. Run regressions
# ---------------------------------
reg_results = []

for spec in all_reg_specs:
    has_time   = "TimeEffects"   in spec
    has_entity = "EntityEffects" in spec

    spec_label = (
        "Entity & Time FE" if (has_time and has_entity)
        else "Time FE"     if has_time
        else "Entity FE"   if has_entity
        else "No FE"
    )

    model   = PanelOLS.from_formula(spec, data=df_panel)
    results = model.fit()

    reg_results.append(
        {
            "spec_label": spec_label,
            "results": results,
            "has_time": has_time,
            "has_entity": has_entity,
        }
    )

# ---------------------------------
# 4. Helpers for LaTeX output
# ---------------------------------
def _latex_friendly_name(var_name: str) -> str:
    """
    Turn an internal regression variable name into a LaTeX-ready label.

    • Any variable that ends with '_logchange' becomes
      '$\\Delta$ <pretty base name>'.
    • Otherwise, underscores are simply replaced by spaces.
    """
    if var_name.endswith("_logchange"):
        base = var_name[:-10]                   # drop '_logchange'
        return r"$\Delta$ " + base.replace("_", " ")
    else:
        return var_name.replace("_", " ")


def produce_latex_table(
    reg_results,
    table_caption="TVL change on dominance change",
    table_label="tab:ols_results",
):
    """
    Build a LaTeX table from a list of PanelOLS regression results.
    All *_logchange regressors are printed as 'Δ <variable>'.
    """

    # Collect every parameter name that appears in any model
    all_vars = set()
    for item in reg_results:
        all_vars.update(item["results"].params.index)
    all_vars = list(all_vars)

    # Significance symbols
    def stars(p):
        return "***" if p < 0.01 else "**" if p < 0.05 else "*" if p < 0.10 else ""

    n_regs = len(reg_results)
    lines = []
    lines.append(r"\begin{table}[ht]")
    lines.append(r"\centering")
    lines.append(fr"\caption{{{table_caption}}}")
    lines.append(fr"\label{{{table_label}}}")
    lines.append(r"\begin{tabular}{l" + "c" * n_regs + "}")
    lines.append(r"\toprule")
    lines.append(" & " + " & ".join(f"({i+1})" for i in range(n_regs)) + r" \\")
    lines.append(r"\midrule")

    # Coefficients and t-statistics
    for var in all_vars:
        pretty = _latex_friendly_name(var)
        coef_row, t_row = [pretty], [""]

        for item in reg_results:
            res = item["results"]
            if var in res.params.index:
                coef = res.params[var]
                pval = res.pvalues[var]
                tval = res.tstats[var]
                coef_row.append(f"{coef:.2f}{stars(pval)}")
                t_row.append(f"[{tval:.2f}]")
            else:
                coef_row.append("")
                t_row.append("")

        lines.append(" & ".join(coef_row) + r" \\")
        lines.append(" & ".join(t_row)   + r" \\")

    # Additional statistics
    r2_row   = ["R-squared"]    + [f"{x['results'].rsquared:.3f}" for x in reg_results]
    nobs_row = ["Observations"] + [f"{x['results'].nobs:d}"      for x in reg_results]
    ent_row  = ["Entity FE"]    + ["YES" if x["has_entity"] else "NO" for x in reg_results]
    time_row = ["Time FE"]      + ["YES" if x["has_time"]   else "NO" for x in reg_results]

    for row in (r2_row, nobs_row, ent_row, time_row):
        lines.append(" & ".join(row) + r" \\")

    lines.append(r"\bottomrule")
    lines.append(r"\end{tabular}")
    lines.append(r"\end{table}")

    return "\n".join(lines)


# ---------------------------------
# 5. Generate & display table code
# ---------------------------------
table_code = produce_latex_table(reg_results)
print(table_code)


\begin{table}[ht]
\centering
\caption{OLS Regression Results}
\label{tab:ols_results}
\begin{tabular}{lccccccccc}
\toprule
 & (1) & (2) & (3) & (4) & (5) & (6) & (7) & (8) & (9) \\
\midrule
log mcap & -0.00 & -0.00 & -0.00 & -0.00 & -0.00 & -0.00 & -0.00 & -0.00 & -0.00 \\
 & [-0.64] & [-0.53] & [-1.00] & [-0.86] & [-1.05] & [-0.52] & [-0.64] & [-0.69] & [-0.62] \\
volatility & -0.02 & -0.02 & 0.06 & 0.05 & 0.06 & -0.03 & -0.03 & -0.02 & -0.03 \\
 & [-0.40] & [-0.35] & [0.97] & [0.90] & [1.02] & [-0.47] & [-0.49] & [-0.39] & [-0.52] \\
$\Delta$ eigen centrality undirected &  & 0.36*** &  &  &  &  &  &  &  \\
 &  & [12.40] &  &  &  &  &  &  &  \\
$\Delta$ total eigen centrality undirected &  &  &  &  &  & 0.41*** &  &  &  \\
 &  &  &  &  &  & [13.40] &  &  &  \\
is boom & -0.03 & -0.04 & -0.02 & -0.02 & -0.03 & -0.04 & -0.02 & -0.02 & -0.02 \\
 & [-0.80] & [-1.12] & [-0.52] & [-0.57] & [-0.58] & [-0.95] & [-0.56] & [-0.55] & [-0.58] \\
$\Delta$ volume out share &  &  &  &  &  &  &  &  &