In [1]:
"""
Script to render the asset pricing table
"""

import pandas as pd
import numpy as np
import statsmodels.formula.api as smf
from scipy.stats import ttest_1samp
from regtabletotext import prettify_result
import warnings
warnings.filterwarnings("ignore")

from environ.constants import (
    DEPENDENT_VARIABLES,
    DEPENDENT_VARIABLES_ASSETPRICING,
    PROCESSED_DATA_PATH,
    STABLE_DICT,
    ALL_NAMING_DICT,
    TABLE_PATH,
)
from environ.process.asset_pricing.double_sorting import calculate_period_return
from environ.process.asset_pricing.assetpricing_functions import (
    reg_fama_macbeth, clean_weekly_panel, univariate_sort, univariate_sort_table, double_sort, double_sort_table, get_dominance_portfolios, significance_stars
    )
                                                                  

In [2]:
# load factors
ff3 = pd.read_csv(PROCESSED_DATA_PATH/"FF3.csv") 
ltw3 = pd.read_csv(PROCESSED_DATA_PATH/"LTW3.csv")

# load the regression panel dataset
reg_panel = pd.read_pickle(
    PROCESSED_DATA_PATH / "panel_main.pickle.zip", compression="zip"
)

# stable non-stable info dict
stable_nonstable_info = {
    "stablecoin": reg_panel[reg_panel["Token"].isin(STABLE_DICT.keys())],
    "non-stablecoin": reg_panel[~reg_panel["Token"].isin(STABLE_DICT.keys())],
    "all": reg_panel,
}

# How are returns aggregated for each portfolio
ret_agg = 'mean'

DEPENDENT_VARIABLES_ASSETPRICING =['volume_ultimate_share']  #,'volume_in_share' , 'volume_out_share']
# ,'eigen_centrality_undirected','total_eigen_centrality_undirected','Volume_share']

### Betweeness charts

In [3]:
# import pandas as pd
# import numpy as np
# import matplotlib.pyplot as plt

# dom_variable = 'betweenness_centrality_count'

# df_panel = calculate_period_return(reg_panel[reg_panel.Token.isin(['USDT','USDC','DAI','FEI', 'RAI', 'FRAX', 'PAX'])],
#     freq=1,
#     date_col= "Date",
#     daily_supply_rate_col= "supply_rates",
#     simple_dollar_ret= -1)

# df_panel["ret"] = df_panel.groupby(["Date"])['ret'].transform(lambda x: x.clip(lower=x.quantile(0.01), upper=x.quantile(0.99)))

# # Compute average statistics by currency (used in multiple charts)
# avg_stats = df_panel.groupby('Token').agg({'ret': 'mean', dom_variable: 'mean'}).reset_index()

# # Sorted Bar Chart: Average Return by Sorted Dominance ------------------
# # Sort currencies by average dominance (highest first)
# avg_stats_sorted = avg_stats.sort_values(dom_variable, ascending=False)

# plt.figure(figsize=(10,6))
# plt.bar(avg_stats_sorted['Token'], avg_stats_sorted['ret'])
# plt.xlabel('Token (sorted by factor)')
# plt.ylabel('Average Return')
# plt.title('Bar Chart: Average Return by Sorted Betweeness Cenrality')
# plt.xticks(rotation=45)
# plt.show()



### Univariate sorting

In [4]:
for dom_variable in DEPENDENT_VARIABLES_ASSETPRICING:
    for is_boom in [-1]:
        quantiles, separate_zero_value = [0,0.3,0.7,1], False #[0,0.9,0.95,1] #
        df_panel = clean_weekly_panel(reg_panel, is_stablecoin = 0, is_boom = is_boom)

        # Substract risk free rate
        df_panel = pd.merge(df_panel,ff3, on='WeekYear')
        df_panel['ret'] = df_panel['ret']-df_panel['RF']

        df_panel = univariate_sort(df_panel, dom_variable, quantiles=quantiles, separate_zero_value=separate_zero_value)
        summary_table = univariate_sort_table(df_panel, ret_agg = ret_agg)
    
        if is_boom == 1:
            boom_str = " boom"
        elif is_boom == 0:
            boom_str = " bust"
        else:
            boom_str = " alltime"
        summary_table = summary_table.style.set_caption(dom_variable+' '+boom_str)
        display(summary_table)

Unnamed: 0,P1,P2,P3,P3-P1
Mean,0.019116,0.00881,0.005646,-0.01347
t-Stat,1.609453,0.792199,0.60453,-2.033038
StdDev,0.137491,0.128728,0.10812,0.076695
Sharpe,1.003976,0.494173,0.377105,-1.268208


In [5]:
df_panel.portfolio.value_counts()

portfolio
P1    26517
P2     2940
P3     2260
Name: count, dtype: int64

In [6]:
test = clean_weekly_panel(reg_panel, is_stablecoin = 0, is_boom = -1)
test.describe()

Unnamed: 0,ret,volume_ultimate_share,eigen_centrality_undirected,vol_inter_full_len_share,betweenness_centrality_volume,betweenness_centrality_count,total_eigen_centrality_undirected,Volume_share,volume_in_share,volume_out_share,mcap,amihud,ret_lead_1
count,31717.0,31717.0,31717.0,31717.0,31717.0,31717.0,31717.0,31717.0,31717.0,31717.0,31717.0,7451.0,31717.0
mean,0.002189,0.004126,0.00953,0.004207,0.001994,0.002726,0.009403,0.004133,0.004133,0.004133,1789099000.0,9.693595e-06,0.004515
std,0.235466,0.03364,0.063393,0.047683,0.033558,0.041681,0.063652,0.0343,0.034246,0.034371,18233030000.0,0.0002443119,0.238711
min,-0.800279,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.800279
25%,-0.11499,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6019431.0,8.541736e-09,-0.114536
50%,-0.011518,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,40425720.0,3.372519e-08,-0.011403
75%,0.070594,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,192412100.0,1.145171e-07,0.07244
max,4.410082,0.467854,0.705255,0.993955,0.979208,0.97726,0.706829,0.495497,0.50516,0.514906,556027500000.0,0.0128355,4.410082


In [7]:
df_panel.groupby('portfolio')['mcap'].median()

portfolio
P1    2.733091e+07
P2    1.427323e+08
P3    1.431262e+09
Name: mcap, dtype: float64

### Double sort

In [8]:
for secondary_variable in ['mcap']:
    for dom_variable in DEPENDENT_VARIABLES_ASSETPRICING:
        for is_boom in [-1]:
            quantiles, separate_zero_value = [0,0.3,0.7,1], False #[0,0.25,0.5,0.75,1] 
            df_panel = clean_weekly_panel(reg_panel, is_stablecoin = 0, is_boom = is_boom)
            df_panel = pd.merge(df_panel,ff3, on='WeekYear')
            df_panel= double_sort(df_panel, dom_variable, secondary_variable=secondary_variable, quantiles=quantiles, separate_zero_value=separate_zero_value)
            summary_table = double_sort_table(df_panel, ret_agg="mean")
            if is_boom == 1:
                boom_str = " boom"
            elif is_boom == 0:
                boom_str = " bust"
            else:
                boom_str = "alltime"
            summary_table = summary_table.style.set_caption(dom_variable +' '+ boom_str)
            display(summary_table)

primary_portfolio,P1,P2,P3
secondary_portfolio,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Q1,0.011587,0.01004,-0.004693
Q2,-0.000819,-0.005455,0.003763
Q3,0.002562,0.016821,0.011601


# Factor testing

In [9]:

factor_models = ["MKT + SMB + HML", "CMKT", "CMKT + CMOM + CSIZE"]
is_boom = -1

for factor_model in factor_models:
    for dom_variable in DEPENDENT_VARIABLES_ASSETPRICING:
        for is_boom in [-1]:
            # 1. Prepare your data
            quantiles, separate_zero_value = [0, 0.3, 0.7, 1], False
            df_panel = clean_weekly_panel(reg_panel, is_stablecoin=0, is_boom=is_boom)
            df_panel = univariate_sort(
                df_panel, dom_variable, quantiles, separate_zero_value=separate_zero_value
            )
            dominance_portfolios = get_dominance_portfolios(df_panel)
            portfolios = list(dominance_portfolios.columns)

            # 2. Merge all factors into a single DataFrame
            factors_data = pd.merge(dominance_portfolios, ff3, on=["WeekYear"], how="left")
            factors_data = pd.merge(factors_data, ltw3, on=["WeekYear"], how="left")
            factors_data = factors_data.dropna()

            # 3. Build a list of factor names from the formula (plus "alpha")
            #    Example: factor_model="MKT + SMB + HML" => ["MKT", "SMB", "HML"]
            #    We'll store "alpha" and then each factor, plus a matching "_t" row for t-stats
            raw_factors = factor_model.replace(" ", "").split("+")
            factor_names = ["alpha"] + raw_factors  # "alpha" is the renamed Intercept
            row_list = []
            for f in factor_names:
                row_list.append(f)      # e.g. "alpha", "MKT", "SMB", ...
                row_list.append(" ")  # e.g. "alpha_t", "MKT_t", ...

            # Finally, add R-squared and N at the bottom
            row_list += ["R-squared", "N"]
            final_table = pd.DataFrame(index=row_list, columns=portfolios)

            # 4. Run a separate regression for each portfolio
            for p in portfolios:
                formula = f"{p} ~ {factor_model}"

                # Use Newey–West (HAC) standard errors
                model = smf.ols(formula=formula, data=factors_data).fit(
                    cov_type="HAC", cov_kwds={"maxlags": 4}
                )

                # Extract estimates, t-stats, p-values
                coefs = model.params.copy()
                tvals = model.tvalues.copy()
                pvals = model.pvalues.copy()

                # Rename "Intercept" to "alpha"
                if "Intercept" in coefs.index:
                    coefs.rename({"Intercept": "alpha"}, inplace=True)
                    tvals.rename({"Intercept": "alpha"}, inplace=True)
                    pvals.rename({"Intercept": "alpha"}, inplace=True)

                # Fill each factor row with the coefficient and the next row with the t-stat
                for f in factor_names:
                    # If the factor is in the model (sometimes a factor might be missing),
                    # then fill the table; otherwise leave as NaN
                    if f in coefs.index:
                        star = significance_stars(pvals[f])
                        # Row for coefficient (with stars)
                        final_table.loc[f, p] = f"{coefs[f]:.4f}{star}"
                        # Row for t-stat
                        final_table.loc[" ", p] = f"({tvals[f]:.2f})"
                    else:
                        # If factor not found in the regression, fill with blanks or zeros
                        final_table.loc[f, p] = ""
                        final_table.loc[" ", p] = ""

                # Fill in R-squared and # obs
                final_table.loc["R-squared", p] = f"{model.rsquared:.3f}"
                final_table.loc["N", p]         = f"{int(model.nobs)}"

            # 5. Print or export the final table
            print(f"== Results for {dom_variable} | Model: {factor_model} ")
            print(final_table.to_latex())


== Results for volume_ultimate_share | Model: MKT + SMB + HML 
\begin{tabular}{lllll}
\toprule
 & P1 & P2 & P3 & P3-P1 \\
\midrule
alpha & 0.0191 & 0.0104 & 0.0061 & 0.0061 \\
  & (-0.84) & (-1.18) & (-0.70) & (-0.70) \\
MKT & 0.4225 & 0.1291 & 0.2958 & 0.2958 \\
  & (-0.84) & (-1.18) & (-0.70) & (-0.70) \\
SMB & 0.3808 & -0.0246 & -0.3336 & -0.3336 \\
  & (-0.84) & (-1.18) & (-0.70) & (-0.70) \\
HML & -0.3910 & -0.5681 & -0.3066 & -0.3066 \\
  & (-0.84) & (-1.18) & (-0.70) & (-0.70) \\
R-squared & 0.022 & 0.017 & 0.014 & 0.014 \\
N & 134 & 134 & 134 & 134 \\
\bottomrule
\end{tabular}

== Results for volume_ultimate_share | Model: CMKT 
\begin{tabular}{lllll}
\toprule
 & P1 & P2 & P3 & P3-P1 \\
\midrule
alpha & 0.0188 & 0.0090 & 0.0066 & 0.0066 \\
  & (0.21) & (-0.14) & (-0.97) & (-0.97) \\
CMKT & 0.0267 & -0.0154 & -0.0772 & -0.0772 \\
  & (0.21) & (-0.14) & (-0.97) & (-0.97) \\
R-squared & 0.000 & 0.000 & 0.005 & 0.005 \\
N & 134 & 134 & 134 & 134 \\
\bottomrule
\end{tabular}

== Res

### FAMA MCBETH 

In [None]:
factor_models = ["CMKT+CMOM+CSIZE"]
is_boom = -1

for factor_model in factor_models:
    for dom_variable in DEPENDENT_VARIABLES_ASSETPRICING:
        quantiles, separate_zero_value = [0, 0.3, 0.7, 1], False
        df_panel = clean_weekly_panel(reg_panel, is_stablecoin=0, is_boom=is_boom)
        df_panel = univariate_sort(df_panel, dom_variable, quantiles, separate_zero_value=separate_zero_value)
        dominance_factor = get_dominance_portfolios(df_panel)
        dominance_factor.rename(columns={dominance_factor.columns[-1]: "CDOM"}, inplace=True)
        # Get the test assets
        assets_panel = clean_weekly_panel(reg_panel, is_stablecoin=0, is_boom=-1)
        # Merge all factors
        data_fama_macbeth = pd.merge(dominance_factor, ff3, on=["WeekYear"], how="left")
        data_fama_macbeth = pd.merge(data_fama_macbeth, ltw3, on=["WeekYear"], how="left")
        # Merge factors with returns
        data_fama_macbeth = pd.merge(data_fama_macbeth, assets_panel, on=["WeekYear"], how="left")
        data_fama_macbeth = data_fama_macbeth.dropna()

        # Run the Fama–MacBeth regression
        data_fama_macbeth['excess_ret'] = data_fama_macbeth['ret'] - data_fama_macbeth['RF']
        fama_macbeth_results = reg_fama_macbeth(data_fama_macbeth, formula="excess_ret ~ CMKT + CMOM + CSIZE + CDOM")

        # Set caption based on is_boom value
        if is_boom == 1:
            boom_str = " boom"
        elif is_boom == 0:
            boom_str = " bust"
        else:
            boom_str = " alltime"
        caption_str = dom_variable + boom_str

        # Convert regression results DataFrame to a LaTeX table string
        # latex_table = fama_macbeth_results.to_latex(index=False)

        # # Wrap the table with a caption and table environment
        # latex_table = (
        #     "\\begin{table}[ht]\n"
        #     "\\centering\n"
        #     "\\caption{" + caption_str + "}\n"
        #     + latex_table +
        #     "\n\\end{table}"
        # )

        # # Print the LaTeX table code
        # print(latex_table)
        file_name = (
                TABLE_PATH
                / "assetpricing"
                / f"assetpricing_famamacbeth_{dom_variable}_{factor_model}"
            )
        fama_macbeth_results.to_latex(
            f"{file_name}.tex",
            index=True,
            escape=False,
        )
