In [158]:
import pandas as pd
import numpy as np
import seaborn as sns
from pandasql import sqldf
from sklearn import linear_model
import statsmodels.api as sm
from scipy import stats
from finance_byu.fama_macbeth import fama_macbeth, fm_summary

In [159]:
CRSP = pd.read_sas('crsp.sas7bdat', encoding='latin-1') #fp3v1
COMP = pd.read_sas('comp.sas7bdat', encoding='latin-1') #fp3v2
FF4 = pd.read_sas('ff4data.sas7bdat', encoding='latin-1')
COMP_ANNUAL = pd.read_sas('comp_annual.sas7bdat', encoding='latin-1') # fp3v3
INDUSTRY =  pd.read_csv('industries.csv')

In [None]:
def fill_zero(df, var):
    temp = df[var].isna().sum()
    df[var].fillna(0, inplace=True)
    print("NAN values for " + str(var) + ": " + str(temp))
    temp = df[var].isna().sum()
    print("NAN values for " + str(var) + " filled: " + str(temp) + " remaining")

def calculate_credit_metrics(compustat_data):
    credit_metrics_df = pd.DataFrame()

    # Rename 
    credit_metrics_df["Total_Assets"] = compustat_data["AT"]
    credit_metrics_df["Current_Liabilities"] = compustat_data["LCO"]
    credit_metrics_df["Long_Term_Debt"] = compustat_data["DLTT"]
    credit_metrics_df["Total_Liabilities"] = compustat_data["LT"]
    credit_metrics_df["Preferred_Stock"] = compustat_data["PSTK"]
    credit_metrics_df["Interest_Expense"] = compustat_data["XINT"]
    credit_metrics_df["Cash_Short_Term_Investments"] = compustat_data["CHE"]
    credit_metrics_df["Net_Income"] = compustat_data["NI"]
    credit_metrics_df["Inventory"] = compustat_data["INVT"]
    credit_metrics_df["Total_Equity"] = compustat_data["CEQ"]

    # Calculate financial ratios
    credit_metrics_df["Debt_Equity_Ratio"] = credit_metrics_df["Total_Liabilities"] / credit_metrics_df["Total_Equity"]
    credit_metrics_df["Debt_Ratio"] = credit_metrics_df["Total_Liabilities"] / credit_metrics_df["Total_Assets"]
    credit_metrics_df["Current_Ratio"] = credit_metrics_df["Current_Liabilities"] / credit_metrics_df["Total_Assets"]
    credit_metrics_df["Quick_Ratio"] = (credit_metrics_df["Current_Liabilities"] - credit_metrics_df["Inventory"]) / credit_metrics_df["Total_Assets"]
    credit_metrics_df["Interest_Coverage_Ratio"] = credit_metrics_df["Net_Income"] / credit_metrics_df["Interest_Expense"]
    credit_metrics_df["Return_On_Assets"] = credit_metrics_df["Net_Income"] / credit_metrics_df["Total_Assets"]
    credit_metrics_df["Return_On_Equity"] = credit_metrics_df["Net_Income"] / credit_metrics_df["Total_Equity"]
    credit_metrics_df["Asset_Turnover_Ratio"] = credit_metrics_df["Net_Income"] / credit_metrics_df["Total_Assets"]
    credit_metrics_df["Inventory_Turnover_Ratio"] = credit_metrics_df["Net_Income"] / credit_metrics_df["Inventory"]

    return credit_metrics_df

def calculate_credit_score(credit_metrics_df):
    # Define weights for each financial metric
    weights = {
        "Debt_Equity_Ratio": 0.2,
        "Debt_Ratio": 0.1,
        "Current_Ratio": 0.1,
        "Quick_Ratio": 0.1,
        "Interest_Coverage_Ratio": 0.1,
        "Return_On_Assets": 0.1,
        "Return_On_Equity": 0.1,
        "Asset_Turnover_Ratio": 0.1,
        "Inventory_Turnover_Ratio": 0.1
    }

    # Calculate the weighted sum of normalized metrics
    credit_score = (credit_metrics_df * pd.Series(weights)).sum(axis=1)

    return credit_score

In [None]:
class Factors:
  @staticmethod
  def _create_hedge_portfolio(data: pd.DataFrame, factor_col: str, q=10, direction=1) -> pd.DataFrame:
    """
    Creates a hedge portfolio, using D10 - D1 returns by default.

    Args:
      data (pd.DataFrame): Data, must contain columns ['monthid', 'RET', factor_col]
      factor_col (str): Name of factor column, i.e. ep1
      q (int, optional): Defaults to deciles (10).
      direction (int, optional): Either 1 or -1. If 1, we do High - Low. Otherwise, Low - High.

    Returns: Return of hedge portfolio at each monthid (i.e. the factor)
    """
    factor = []
    index = []  # monthid - 1
    mth_grp = data.groupby('monthid')
    data['rank'] = mth_grp[factor_col].transform(lambda x: pd.qcut(x.rank(method="first"), 10, labels=False) if not np.isnan(x).all() else x)  # if statement in case all NaN
    for monthid, mdata in mth_grp:
      # TODO: can add value weighting of returns here if anyone needs it
      d1 = mdata[mdata['rank'] == 0]['RET'].mean()
      d10 = mdata[mdata['rank'] == q-1]['RET'].mean()
      factor.append(direction * (d10 - d1))
      index.append(monthid - 1)
    return pd.DataFrame(factor, index=index)
  
  @staticmethod
  def dNoa(data):
    data = data.copy()

    factor = []
    annual_data = sqldf("SELECT a.*, b.* \
                            FROM COMP_ANNUAL as a \
                            INNER JOIN INDUSTRY as b \
                            ON a.SIC >= b.lhs and a.SIC <= b.rhs")
    annual_data['year'] = annual_data['FYEAR'].astype(int)
    annual_data['LPERMNO'] = annual_data['LPERMNO'].astype(int)
    annual_data['XSGA'].fillna(0, inplace=True)
    annual_data.sort_values(by=['LPERMNO', 'year', 'XSGA'], inplace=True)
    annual_data.drop_duplicates(subset=['LPERMNO', 'year'], keep='last', inplace=True)
    
    # annual_data = COMP_ANNUAL
    fill_zero(annual_data, 'DLC')
    fill_zero(annual_data, 'DLTT')
    fill_zero(annual_data, 'MIB')
    fill_zero(annual_data, 'PSTK')

    operating_assets = annual_data["AT"] - annual_data["CHE"]
    operating_liabilities = annual_data["AT"] - annual_data["DLC"] - annual_data["DLTT"] - annual_data["MIB"] - annual_data["PSTK"] - annual_data["CEQ"]

    Noa = operating_assets - operating_liabilities

    lagged_total_assets = annual_data["AT"].shift(1)  # 1-year-lagged total assets
    dNoa = (Noa - Noa.shift(1)) / lagged_total_assets

    annual_data["dNoa"] = dNoa
    annual_data["dNoa"].fillna(0, inplace=True)

    data = sqldf("SELECT a.*, b.dNoa \
                  FROM data as a \
                  INNER JOIN annual_data as b \
                  ON a.PERMNO = b.LPERMNO and \
                  ((a.year = b.year and a.month >= 7) or \
                    (a.year = b.year + 1 and a.month <= 6))")
    mth_grp = data.groupby('monthid')
    data['rank'] = mth_grp['dNoa'].transform(lambda x: pd.qcut(x.rank(method='first'), 10, labels=False))
    for _, mdata in mth_grp:
      d1 = mdata[mdata['rank'] == 0]['RET'].mean()
      d10 = mdata[mdata['rank'] == 9]['RET'].mean()
      factor.append(d10 - d1)
    return factor

  @staticmethod
  def Nsi(data):
    data = data.copy()

    factor = []
    annual_data = sqldf("SELECT a.*, b.* \
                            FROM COMP_ANNUAL as a \
                            INNER JOIN INDUSTRY as b \
                            ON a.SIC >= b.lhs and a.SIC <= b.rhs")
    annual_data['year'] = annual_data['FYEAR'].astype(int)
    annual_data['LPERMNO'] = annual_data['LPERMNO'].astype(int)
    annual_data['XSGA'].fillna(0, inplace=True)
    annual_data.sort_values(by=['LPERMNO', 'year', 'XSGA'], inplace=True)
    annual_data.drop_duplicates(subset=['LPERMNO', 'year'], keep='last', inplace=True)

    annual_data["CSHO_t_minus_1_adjusted"] = annual_data["CSHO"] * annual_data["AJEX"]

    test_shifted = annual_data.shift(periods=1)
    annual_data["CSHO_t_minus_2_adjusted"] = test_shifted["CSHO"] * test_shifted["AJEX"]

    annual_data["Nsi_u"] = np.log(annual_data["CSHO_t_minus_1_adjusted"] / annual_data["CSHO_t_minus_2_adjusted"])
    annual_data["Nsi_u"].fillna(0, inplace=True)

    annual_data["Nsi"] = np.where(annual_data["Nsi_u"] < 0, np.where(annual_data["Nsi_u"] < annual_data["Nsi_u"].quantile(0.5), 1, 2),
                             np.where(annual_data["Nsi_u"] == 0, 3,
                                      np.where(annual_data["Nsi_u"] <= annual_data["Nsi_u"].quantile(0.1), 4,
                                               np.where(annual_data["Nsi_u"] <= annual_data["Nsi_u"].quantile(0.2), 5,
                                                        np.where(annual_data["Nsi_u"] <= annual_data["Nsi_u"].quantile(0.3), 6,
                                                                 np.where(annual_data["Nsi_u"] <= annual_data["Nsi_u"].quantile(0.4), 7,
                                                                          np.where(annual_data["Nsi_u"] <= annual_data["Nsi_u"].quantile(0.5), 8,
                                                                                   np.where(annual_data["Nsi_u"] <= annual_data["Nsi_u"].quantile(0.6), 9, 10))))))))
    annual_data["Nsi"]

    data = sqldf("SELECT a.*, b.Nsi \
                  FROM data as a \
                  INNER JOIN annual_data as b \
                  ON a.PERMNO = b.LPERMNO and \
                  ((a.year = b.year and a.month >= 7) or \
                    (a.year = b.year + 1 and a.month <= 6))")
    mth_grp = data.groupby('monthid')
    data['rank'] = mth_grp['Nsi'].transform(lambda x: pd.qcut(x.rank(method='first'), 10, labels=False))
    for _, mdata in mth_grp:
      d1 = mdata[mdata['rank'] == 0]['RET'].mean()
      d10 = mdata[mdata['rank'] == 9]['RET'].mean()
      factor.append(d10 - d1)
    return factor

  @staticmethod
  def dNca(data):
    data = data.copy()

    factor = []
    annual_data = sqldf("SELECT a.*, b.* \
                            FROM COMP_ANNUAL as a \
                            INNER JOIN INDUSTRY as b \
                            ON a.SIC >= b.lhs and a.SIC <= b.rhs")
    annual_data['year'] = annual_data['FYEAR'].astype(int)
    annual_data['LPERMNO'] = annual_data['LPERMNO'].astype(int)
    annual_data['XSGA'].fillna(0, inplace=True)
    annual_data.sort_values(by=['LPERMNO', 'year', 'XSGA'], inplace=True)
    annual_data.drop_duplicates(subset=['LPERMNO', 'year'], keep='last', inplace=True)
    
    fill_zero(annual_data, "IVAO")

    annual_data["Nca"] = annual_data["AT"] - annual_data["ACT"] - annual_data["IVAO"]
    annual_data["dNca_o"] = annual_data["Nca"].diff()
    annual_data["dNca_o"].fillna(0, inplace=True)

    total_assets_t_minus_2 = annual_data['AT'].iloc[-3]

    annual_data["dNca"] = annual_data["dNca_o"] / total_assets_t_minus_2

    data = sqldf("SELECT a.*, b.dNca \
                  FROM data as a \
                  INNER JOIN annual_data as b \
                  ON a.PERMNO = b.LPERMNO and \
                  ((a.year = b.year and a.month >= 7) or \
                    (a.year = b.year + 1 and a.month <= 6))")
    mth_grp = data.groupby('monthid')
    data['rank'] = mth_grp['dNca'].transform(lambda x: pd.qcut(x.rank(method='first'), 10, labels=False))
    for _, mdata in mth_grp:
      d1 = mdata[mdata['rank'] == 0]['RET'].mean()
      d10 = mdata[mdata['rank'] == 9]['RET'].mean()
      factor.append(d10 - d1)
    return factor

  @staticmethod
  def dFnl(data):
    data = data.copy()

    factor = []
    annual_data = sqldf("SELECT a.*, b.* \
                            FROM COMP_ANNUAL as a \
                            INNER JOIN INDUSTRY as b \
                            ON a.SIC >= b.lhs and a.SIC <= b.rhs")
    annual_data['year'] = annual_data['FYEAR'].astype(int)
    annual_data['LPERMNO'] = annual_data['LPERMNO'].astype(int)
    annual_data['XSGA'].fillna(0, inplace=True)
    annual_data.sort_values(by=['LPERMNO', 'year', 'XSGA'], inplace=True)
    annual_data.drop_duplicates(subset=['LPERMNO', 'year'], keep='last', inplace=True)
    
    fill_zero(annual_data, "DLTT")
    fill_zero(annual_data, "DLC")
    fill_zero(annual_data, "PSTK")

    annual_data["Fnl"] = annual_data["DLTT"] + annual_data["DLC"] + annual_data["PSTK"]
    annual_data["dFnl_o"] = annual_data["Fnl"].diff()
    annual_data["dFnl_o"].fillna(0, inplace=True)
    total_assets_t_minus_2 = annual_data['AT'].iloc[-3]

    annual_data["dFnl"] = annual_data["dFnl_o"] / total_assets_t_minus_2

    data = sqldf("SELECT a.*, b.dFnl \
                  FROM data as a \
                  INNER JOIN annual_data as b \
                  ON a.PERMNO = b.LPERMNO and \
                  ((a.year = b.year and a.month >= 7) or \
                    (a.year = b.year + 1 and a.month <= 6))")
    mth_grp = data.groupby('monthid')
    data['rank'] = mth_grp['dFnl'].transform(lambda x: pd.qcut(x.rank(method='first'), 10, labels=False))
    for _, mdata in mth_grp:
      d1 = mdata[mdata['rank'] == 0]['RET'].mean()
      d10 = mdata[mdata['rank'] == 9]['RET'].mean()
      factor.append(d10 - d1)
    return factor

  @staticmethod
  def creditrisk(data):
    data = data.copy()

    factor = []
    annual_data = sqldf("SELECT a.*, b.* \
                            FROM COMP_ANNUAL as a \
                            INNER JOIN INDUSTRY as b \
                            ON a.SIC >= b.lhs and a.SIC <= b.rhs")
    annual_data['year'] = annual_data['FYEAR'].astype(int)
    annual_data['LPERMNO'] = annual_data['LPERMNO'].astype(int)
    annual_data['XSGA'].fillna(0, inplace=True)
    annual_data.sort_values(by=['LPERMNO', 'year', 'XSGA'], inplace=True)
    annual_data.drop_duplicates(subset=['LPERMNO', 'year'], keep='last', inplace=True)
    
    credit_metrics = calculate_credit_metrics(annual_data)
    credit_score = calculate_credit_score(credit_metrics)
    annual_data["cdrk"] = credit_score

    data = sqldf("SELECT a.*, b.cdrk \
                  FROM data as a \
                  INNER JOIN annual_data as b \
                  ON a.PERMNO = b.LPERMNO and \
                  ((a.year = b.year and a.month >= 7) or \
                    (a.year = b.year + 1 and a.month <= 6))")
    mth_grp = data.groupby('monthid')
    data['rank'] = mth_grp['cdrk'].transform(lambda x: pd.qcut(x.rank(method='first'), 10, labels=False))
    for _, mdata in mth_grp:
      d1 = mdata[mdata['rank'] == 0]['RET'].mean()
      d10 = mdata[mdata['rank'] == 9]['RET'].mean()
      factor.append(d10 - d1)
    return factor


  @staticmethod
  def epq1(data):
    factor = []
    data = data.copy()
    data = data[data['IBQ'] >= 0]
    data['Epq1'] = data['IBQ'] * 1000000 / data['MKTCAP']
    mth_grp = data.groupby('monthid')
    data['rank'] = mth_grp['Epq1'].transform(lambda x: pd.qcut(x, 10, labels=False))
    for _, mdata in mth_grp:
      d1 = mdata[mdata['rank'] == 0]['RET'].mean()
      d10 = mdata[mdata['rank'] == 9]['RET'].mean()
      factor.append(d1 - d10)
    return factor
  
  @staticmethod
  def ioca(data):
    CPI = pd.read_csv('cpi.csv')
    g = .10
    d = .15
    factor = []
    annual_data = sqldf("SELECT a.*, b.* \
                            FROM COMP_ANNUAL as a \
                            INNER JOIN INDUSTRY as b \
                            ON a.SIC >= b.lhs and a.SIC <= b.rhs")
    annual_data['year'] = annual_data['FYEAR'].astype(int)
    annual_data['LPERMNO'] = annual_data['LPERMNO'].astype(int)
    annual_data['XSGA'].fillna(0, inplace=True)
    annual_data = pd.merge(annual_data, CPI, on='year')
    annual_data.sort_values(by=['LPERMNO', 'year', 'XSGA'], inplace=True)
    annual_data.drop_duplicates(subset=['LPERMNO', 'year'], keep='last', inplace=True)
    grouped = annual_data.groupby('LPERMNO')
    oc = []
    for _, grp in grouped:
      prev = None
      for _, row in grp.iterrows():
        if prev == None:
          prev = row['XSGA'] / (g+d)
        else:
          prev = (1 - d) * prev + row['XSGA'] / row['CPI']
        oc.append(prev)
    annual_data['oc'] = oc
    annual_data = annual_data[(annual_data['oc'] > 0) & (annual_data['AT'] > 0)]
    annual_data['oca'] = annual_data['oc'] / annual_data['AT']
    def yearly_winsorize(srs):
      p1, p99 = np.nanpercentile(srs, [1, 99])
      return np.clip(srs, p1, p99)
    annual_data['ioca'] = annual_data.groupby('year')['oca'].transform(yearly_winsorize)
    annual_data['ioca'] = annual_data.groupby('industry')['ioca'].transform(lambda x: (x - x.mean()) / x.std())
    data = sqldf("SELECT a.*, b.ioca \
                  FROM data as a \
                  INNER JOIN annual_data as b \
                  ON a.PERMNO = b.LPERMNO and \
                  ((a.year = b.year and a.month >= 7) or \
                    (a.year = b.year + 1 and a.month <= 6))")
    mth_grp = data.groupby('monthid')
    data['rank'] = mth_grp['ioca'].transform(lambda x: pd.qcut(x.rank(method='first'), 10, labels=False))
    for _, mdata in mth_grp:
      d1 = mdata[mdata['rank'] == 0]['RET'].mean()
      d10 = mdata[mdata['rank'] == 9]['RET'].mean()
      factor.append(d10 - d1)
    return factor
  
  @staticmethod
  def ra26(data):
    factor = []
    data = data.sort_values(['PERMNO','monthid'])
    datagrp = data.groupby('PERMNO')
    for _, grp in datagrp:
      for mid in grp['monthid']:
        tm = grp[grp['monthid'].isin([mid - 24, mid - 36, mid - 48, mid - 60])]
        if (len(tm) < 4):
          factor.append(np.NaN)
        else:
          factor.append(tm['RET'].mean())
    data['ra26'] = factor
    return Factors._create_hedge_portfolio(data, 'ra26')

  @staticmethod
  def Abr1(data):
    factor = []
    data = data.copy()

    # RET_DAILY = pd.read_sas('ret_daily.sas7bdat', encoding='latin-1')  sas is SLOW
    ret_daily = pd.read_parquet("ret_daily.parquet")
    vwret = pd.read_sas('us_vwret.sas7bdat', encoding='latin-1').set_index("date")

    ret_daily["monthid"] = (ret_daily.DATE.dt.year-1975)*12 + ret_daily.DATE.dt.month

    stock_returns = ret_daily.set_index(["PERMNO", "DATE"]).sort_index()
    del ret_daily  # save memory
    # faster to pivot first and then do the operations, rather than doing a groupby
    stock_returns = stock_returns.pivot_table(values="RET", columns="PERMNO", index="DATE")
    stock_returns = stock_returns.sub(vwret.VWRETD, axis=0)  # return over market

    # get all 4-period returns from T-3 to T+1
    stock_returns = stock_returns.rolling(4).sum().shift(-1)
    stock_returns = pd.DataFrame(stock_returns.stack())
    stock_returns = stock_returns.rename(columns={0:"Abr"})

    # Merge on RDQ date so we only have the returns around earnings date
    rdqs = data[["PERMNO", "RDQ"]].dropna()
    rdqs.RDQ = pd.to_datetime(rdqs.RDQ)
    rdqs = rdqs.drop_duplicates()
    rdqs = rdqs.merge(stock_returns, left_on=["RDQ", "PERMNO"], right_index=True, how="left")
    del stock_returns  # save memory
    # This is the date we use to calculate the monthid to avoid lookahead bias,
    # since Abr is calculated using RDQ-3 to RDQ+1 returns.
    rdqs["RDQ+1"] = rdqs.RDQ + pd.offsets.BDay(1)
    rdqs["monthid"] = (rdqs["RDQ+1"].dt.year-1975)*12 + rdqs["RDQ+1"].dt.month

    rdqs = rdqs.set_index(["PERMNO", "monthid"]).sort_index()

    # duplicates can occur where RDQ is in the same month. We drop the first month-PERMNO duplicate
    rdqs = rdqs[~rdqs.index.duplicated(keep="last")]

    final = pd.DataFrame(index=data.set_index(["PERMNO", "monthid"]).index).sort_index()
    final["Abr1"] = rdqs.Abr
    # forward fill only 6 months to avoid stale data
    # TODO: could possibly be problems if data skips some monthids, but we still have no lookahead bias, just possible stale data
    final = final.groupby(level=0).ffill(limit=6)

    data = data.merge(final, how="left", left_on=["PERMNO", "monthid"], right_index=True).sort_values(by=['PERMNO', 'monthid'])

    return Factors._create_hedge_portfolio(data, "Abr1", q=10, direction=1)
    # return data

  @staticmethod
  def e11(data):
    """
    Calculate ε11 factor (Residual momentum, prior 11-month returns) with 1 month holding period.
    """
    data = data.copy()
    
    # Get a series of rf indexed by date
    rf = data.set_index("monthid")[["RF"]].reset_index().drop_duplicates(subset="monthid")
    rf = rf.set_index("monthid").sort_index().RF

    # Get xret with each permno in a column
    xret = data.loc[:,~data.columns.duplicated()].pivot_table(index="monthid", columns="PERMNO", values="RET").sort_index()
    xret = xret.sub(rf, axis=0)

    # Get ff data with constant
    ff_3 = data[["monthid", "SMB", "HML", "MKTRF"]].drop_duplicates(subset="monthid").set_index("monthid").sort_index()
    ff_3 = sm.add_constant(ff_3)

    from numpy.linalg import pinv

    def last_ff_residual(series: pd.Series, ff: pd.DataFrame) -> float:
      """Computes FF residuals for a series of excess returns.

      Args:
        series (pd.DataFrame): rolling excess returns. Must have no null values.
        ff (pd.DataFrame): factors, must have overlapping index with `series`. All columns are used as factors. Must have constant column added.

      Returns: residual on last date T
      """
      # y = series
      x = ff.loc[series.index]
      
      # ffmodel = sm.OLS(y, x).fit()
      # residual_values = ffmodel.resid

      # sm.OLS is too slow, do it with linear algebra instead
      params = pinv(x).dot(series)

      # calculate last residual
      t_residual = series.iloc[-1] - x.iloc[-1].dot(params)
      
      return t_residual

    residuals = xret.rolling(window=36, min_periods=36).apply(
      lambda series: last_ff_residual(series, ff_3)
    )
    scaled_residuals = residuals / residuals.rolling(36, min_periods=12).std()

    # The time T residual momentum is the sum of residual returns for T-12, T-11, ... T-1
    # We do this with a rolling 11 period sum, and then shift down by 1
    e11 = scaled_residuals.rolling(11).sum().shift()

    # Reshape so the columns are [monthid, PERMNO, e11] and merge into data
    e11 = pd.DataFrame(e11.stack()).rename(columns={0: "e11"})
    data = data.merge(e11, how="left", left_on=["monthid", "PERMNO"], right_index=True)

    return Factors._create_hedge_portfolio(data, "e11", q=10, direction=1)

  @staticmethod
  def Re1(data):
    data = data.copy()

    ibes_link = pd.read_csv("crsp_ibes_link.csv")  # ibes ticker to PERMNO map  https://wrds-www.wharton.upenn.edu/pages/get-data/linking-suite-wrds/ibes-crsp-link/
    ibes_eps = pd.read_csv("ibes_eps_estimate.csv")  # Mean EPS estimates  https://wrds-www.wharton.upenn.edu/pages/get-data/ibes-thomson-reuters/ibes-academic/summary-history/summary-statistics/

    ibes_link = ibes_link.dropna()

    # Only want USD currency
    ibes_eps = ibes_eps[ibes_eps.CURCODE == "USD"]

    # Get the estimates with the date and permno
    eps_est = sqldf(
      """
      select l.permno, e.STATPERS as DATE, e.MEANEST
      from ibes_eps e
      left join ibes_link l
      on e.ticker = l.TICKER and e.STATPERS >= l.sdate and e.STATPERS <= l.edate 
      """
    )

    # Add monthid
    eps_est = eps_est.drop_duplicates(subset=["DATE", "PERMNO"])
    eps_est.DATE = pd.to_datetime(eps_est.DATE)
    eps_est['monthid'] = (eps_est.DATE.dt.year-1975)*12 + eps_est.DATE.dt.month

    # Merge close price
    eps_est = eps_est.merge(data.set_index(["monthid", "PERMNO"]).PRC, on=["monthid", "PERMNO"])
    eps_est = eps_est.set_index(["PERMNO", "monthid"]).sort_index()

    # Calculate factor value for each stock

    # TODO: we should make PRC the split adjusted price as our EPS values are split adjusted
    re = eps_est.reset_index().set_index("monthid").sort_index().groupby(["PERMNO"]).apply(
        # 6 period rolling sum of (f_t - f_t-1) / p_t-1, requiring at least 4 consecutive observations
        lambda df: ((df.MEANEST - df.MEANEST.shift()) / df.PRC.shift()).rolling(window=6, min_periods=4).sum()
    )
    eps_est["Re1"] = re
    
    data = data.merge(eps_est[["Re1"]], how="left", on=["monthid", "PERMNO"])
    
    return Factors._create_hedge_portfolio(data, "Re1", q=10, direction=1)
  

  # D. Investment ==========================================
  @staticmethod 
  def _compute_shifted(data, factor_col, input_col1, input_col2, shift_amt, op='-'):
    """
    for factor calculations that require a shift of data values to avoid
    lookahead bias:
    Input:
      data: DataFrame
      factor_col: name of factor to be computed
      input_col: name of column used in the factor computation
      shift_amt: number of months to lag by for input_col
    Return:
      a dataframe with cols ['monthid', 'PERMNO', 'factor'] for merge
      into the overall data
    """
    permno_grp = data.groupby('PERMNO')
    # construct a mini dataframe of shifted values -> (monthid, permno, dRoa1 computed for the relevant month)
    factor_values = {'monthid': [], 'PERMNO': [], factor_col:[]}

    for _, pdata in permno_grp:
      # sort by monthid
      pdata = pdata.sort_values('monthid') 
      # lag Roa to avoid lookahead bias
      if (op == '-'):
        pdata[factor_col] = pdata[input_col1] - pdata[input_col2].shift(shift_amt)
      else:
        pdata[factor_col] = pdata[input_col1] / pdata[input_col2].shift(shift_amt)
      # add to dataframe
      
      factor_values['PERMNO'].extend(pdata['PERMNO'].tolist())
      factor_values['monthid'].extend(pdata['monthid'].tolist())
      factor_values[factor_col].extend(pdata[factor_col].tolist())

    factor_values = pd.DataFrame(factor_values)
    factor_values['monthid'] = factor_values['monthid'].astype(np.int64)
    factor_values['PERMNO'] = factor_values['PERMNO'].astype(np.int64)

    return pd.DataFrame(factor_values)
    
  @staticmethod
  def droe1(data):
    """dRoe: return on equity minus its value from four quarters ago """
    #   for each month t, sort all stocks into deciles based on their most recent past dRoe
    #   monthly decile returns calculated for the current month t

    # ROE = NET (RDQ) / Equity (CEQQ)   
    data = data.copy()
    data['Roe'] = data['NIQ'] / data['CEQQ']

    # require that earnings announcement date is after the current fiscal quarter end
    data = data[~(data['DATADATE'] > data['RDQ'])]

    # compute factor values while accounting for lagged inputs
    factor_values = Factors._compute_shifted(data, factor_col='dRoe1', input_col1='Roe', input_col2='Roe', shift_amt=12, op='-')
    # merge factor values back into the overall data frame
    data = data.merge(factor_values, on=["monthid", "PERMNO"])
    # then construct decile portfolios
    return Factors._create_hedge_portfolio(data, "dRoe1", q=10)
  
  @staticmethod
  def droa1(data):
    """ droa1 "is return on assets minus its value from four quarters ago" """
    factor = []
    data = data.copy()
    data['Roa'] = data['NIQ'] / data['ATQ']

    # require that earnings announcement date is after the current fiscal quarter end
    data = data[~(data['DATADATE'] > data['RDQ'])]
    # compute factor values while accounting for lagged inputs
    factor_values = Factors._compute_shifted(data, factor_col='dRoa1', input_col1='Roa', input_col2='Roa', shift_amt=12, op='-')
    # merge factor values back into the overall data frame
    data = data.merge(factor_values, on=["monthid", "PERMNO"])
    # then construct decile portfolios
    return Factors._create_hedge_portfolio(data, "dRoa1", q=10)

  @staticmethod
  def rnaq1(data):
    """ Rnaq1: Quarterly return on net operating assets """

    data = data.copy()
    # zero out NAs as specified in the paper
    fill_na_cols = ['IVAOQ', 'DLCQ', 'DLTTQ', 'MIBQ', 'PSTKQ'] 
    data[fill_na_cols] = data[fill_na_cols].fillna(0)
    # operating assets = ATQ - CHEQ - IVAOQ
    data['oa'] = data['ATQ'] - data['CHEQ'] - data['IVAOQ']
    # operating liabilities = ATQ - DLCQ - DLTTQ - MIBQ - PSTKQ - CEQQ
    data['ol'] = data['ATQ'] - data['DLCQ'] - data['DLTTQ'] - data['MIBQ'] - data['PSTKQ'] - data['CEQQ']
    # net operating assets = OA - OL
    data['noa'] = data['oa'] - data['ol']

    # compute factor values while accounting for lagged inputs - 1 quarter lagged noa
    factor_values = Factors._compute_shifted(data, factor_col='rnaq1', input_col1='OIADPQ', input_col2='noa', shift_amt=4, op='/')
    # merge factor values back into the overall data frame
    data = data.merge(factor_values, on=["monthid", "PERMNO"])

    return Factors._create_hedge_portfolio(data, 'rnaq1')

  @staticmethod
  def atoq1(data):
    """ atoq1: quarterly sales divided by 1-quarter-lagged Noa """
    data = data.copy()
    # zero out NAs as specified in the paper
    fill_na_cols = ['IVAOQ', 'DLCQ', 'DLTTQ', 'MIBQ', 'PSTKQ'] 
    data[fill_na_cols] = data[fill_na_cols].fillna(0)
    # operating assets = ATQ - CHEQ - IVAOQ
    data['oa'] = data['ATQ'] - data['CHEQ'] - data['IVAOQ']
    # operating liabilities = ATQ - DLCQ - DLTTQ - MIBQ - PSTKQ - CEQQ
    data['ol'] = data['ATQ'] - data['DLCQ'] - data['DLTTQ'] - data['MIBQ'] - data['PSTKQ'] - data['CEQQ']
    # net operating assets = OA - OL
    data['noa'] = data['oa'] - data['ol']

    # compute factor values while accounting for lagged inputs - 1 quarter lagged noa
    factor_values = Factors._compute_shifted(data, factor_col='atoq1', input_col1='SALEQ', input_col2='noa', shift_amt=4, op='/')
    # merge factor values back into the overall data frame
    data = data.merge(factor_values, on=["monthid", "PERMNO"])

    return Factors._create_hedge_portfolio(data, 'atoq1')
    

In [None]:
class Assets:
  crsp = CRSP.copy()
  comp = COMP.copy()
  ff4 = FF4.copy()
  fact = pd.DataFrame()
  data = pd.DataFrame()
  # train_start = '1975-01-01'
  # train_end = '2005-12-31'
  # test_start = '2006-01-01'
  # test_end = '2020-12-31'
  factors = {
    'epq1': Factors.epq1,
    'ioca': Factors.ioca,
    'ra26': Factors.ra26,
    'e11': Factors.e11,  # comment out for now as this takes ~12min to run
    'Re1': Factors.Re1,
    'Abr1': Factors.Abr,

    # D. Investment =====================================
    'dRoe1': Factors.droe1,
    'dRoa1': Factors.droa1,
    'rnaq1': Factors.rnaq1,
    'atoq1': Factors.atoq1,

    'dNoa': Factors.dNoa,
    'Nsi': Factors.Nsi,
    'dNca': Factors.dNca,
    'dFnl' : Factors.dFnl
    # 'creditrisk': Factors.creditrisk
  }
  factor_t = {}
  
  def __init__(self, start_date=None):

    # Make testing faster by optiSonally limiting dates
    if start_date is not None:
      self.crsp = self.crsp[self.crsp.DATE >= start_date]
      self.comp = self.comp[self.comp.DATADATE >= start_date]

    self.clean_crsp()
    self.clean_comp()
    self.clean_ff4()
    self.illiquidity_filter()
    
    self.merge_data()
    self.gen_factors()
    self.fama_macbeth()
    
  def clean_crsp(self):
    self.crsp['PERMNO'] = self.crsp['PERMNO'].astype(int)
    self.crsp['year'] = self.crsp['DATE'].dt.year
    self.crsp['month'] = self.crsp['DATE'].dt.month
    self.crsp['monthid'] = (self.crsp['year']-1975)*12 + self.crsp['month']
    self.crsp['PRC'] = self.crsp['PRC'].apply(lambda x: x if x > 0 else x * -1)
    
  def clean_comp(self):
    self.comp['qtrid'] = (self.comp['DATADATE'].dt.year-1975)*12 + self.comp['DATADATE'].dt.month
    self.comp['LPERMNO'] = self.comp['LPERMNO'].astype(int)
    self.comp.drop(columns=['CONSOL', 'INDFMT', 'DATAFMT', 'POPSRC', 'DATAFQTR', 'DATACQTR', 'CURCDQ', 'COSTAT'], 
                  inplace=True)
    
  def clean_ff4(self):
    self.ff4['monthid'] = (self.ff4['DATEFF'].dt.year-1975)*12 + self.ff4['DATEFF'].dt.month
    self.fact = self.ff4.copy()
    
  def illiquidity_filter(self):
    self.crsp = self.crsp[self.crsp['PRC'] >= 5]
    self.crsp['MKTCAP'] = self.crsp['PRC'] * self.crsp['SHROUT'] * 1000
    tmp = {}
    grp = self.crsp[(self.crsp['month'] == 1) & (self.crsp['MKTCAP'] >= 100000000)].groupby('year')
    for yr, group in grp:
      tmp[yr] = list(group['PERMNO'])
    liquidity = self.crsp.groupby(['year'])['PERMNO'].transform(lambda x: x.isin(tmp[x.name]))
    self.crsp = self.crsp[liquidity]
    
  def merge_data(self):
    lhs = self.crsp
    rhs = self.comp
    self.data = sqldf("SELECT a.*, b.* \
                       FROM lhs as a \
                       INNER JOIN rhs as b \
                       ON a.PERMNO = b.LPERMNO and a.monthid >= b.qtrid + 4 and a.monthid <= b.qtrid + 6")
    self.data.drop_duplicates(subset=['PERMNO', 'monthid'], keep='last', inplace=True)
    self.data = self.data.loc[:,~self.data.columns.duplicated()].copy()  # remove duplicate col
    self.data = pd.merge(self.data, self.ff4, on='monthid')
  
  def gen_factors(self):
    for factor, func in self.factors.items():
      try:
        self.fact[factor] = func(self.data)
      except Exception as e:
        print(f"Error generating {factor=}: {e}")
        print(f"SKIPPING THIS FACTOR")
    self.fact = pd.merge(self.fact, self.data[['monthid','RET']], on='monthid')
  
  def fama_macbeth(self):
    for factor in self.factors:
      try:
        fmb = self.fact[['monthid','RET',factor,'SMB','HML','MKTRF','UMD']].copy()
        fmb.dropna(inplace=True)
        result = fama_macbeth(fmb,'monthid','RET',[factor,'SMB','HML','MKTRF','UMD'],intercept=True)
        self.factor_t[factor] = fm_summary(result).loc[factor, 'tstat']
      except Exception as e:
        print(f"Error running Fama Macbeth on {factor=}: {e}")

In [None]:
assets = Assets()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.crsp['MKTCAP'] = self.crsp['PRC'] * self.crsp['SHROUT'] * 1000


StatementError: (builtins.MemoryError) 
[SQL: INSERT INTO lhs ("PERMNO", "DATE", "SHRCD", "EXCHCD", "PRC", "RET", "SHROUT", year, month, monthid, "MKTCAP") VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)]
[parameters: [{'PERMNO': 10002, 'DATE': datetime.datetime(1998, 1, 30, 0, 0), 'SHRCD': 11.0, 'EXCHCD': 3.0, 'PRC': 25.0, 'RET': 0.020408162847161293, 'SHROUT': 4246.0, 'year': 1998, 'month': 1, 'monthid': 277, 'MKTCAP': 106150000.0}, {'PERMNO': 10002, 'DATE': datetime.datetime(1998, 2, 27, 0, 0), 'SHRCD': 11.0, 'EXCHCD': 3.0, 'PRC': 26.5, 'RET': 0.05999999865889549, 'SHROUT': 4246.0, 'year': 1998, 'month': 2, 'monthid': 278, 'MKTCAP': 112519000.0}, {'PERMNO': 10002, 'DATE': datetime.datetime(1998, 3, 31, 0, 0), 'SHRCD': 11.0, 'EXCHCD': 3.0, 'PRC': 25.875, 'RET': -0.019433962181210518, 'SHROUT': 4248.0, 'year': 1998, 'month': 3, 'monthid': 279, 'MKTCAP': 109917000.0}, {'PERMNO': 10002, 'DATE': datetime.datetime(1998, 4, 30, 0, 0), 'SHRCD': 11.0, 'EXCHCD': 3.0, 'PRC': 24.5, 'RET': -0.05314009636640549, 'SHROUT': 4248.0, 'year': 1998, 'month': 4, 'monthid': 280, 'MKTCAP': 104076000.0}, {'PERMNO': 10002, 'DATE': datetime.datetime(1998, 5, 29, 0, 0), 'SHRCD': 11.0, 'EXCHCD': 3.0, 'PRC': 27.5, 'RET': 0.12244898080825806, 'SHROUT': 4248.0, 'year': 1998, 'month': 5, 'monthid': 281, 'MKTCAP': 116820000.0}, {'PERMNO': 10002, 'DATE': datetime.datetime(1998, 6, 30, 0, 0), 'SHRCD': 11.0, 'EXCHCD': 3.0, 'PRC': 30.0, 'RET': 0.0955454558134079, 'SHROUT': 4248.0, 'year': 1998, 'month': 6, 'monthid': 282, 'MKTCAP': 127440000.0}, {'PERMNO': 10002, 'DATE': datetime.datetime(1998, 7, 31, 0, 0), 'SHRCD': 11.0, 'EXCHCD': 3.0, 'PRC': 19.75, 'RET': -0.012500000186264515, 'SHROUT': 6372.0, 'year': 1998, 'month': 7, 'monthid': 283, 'MKTCAP': 125847000.0}, {'PERMNO': 10002, 'DATE': datetime.datetime(1998, 8, 31, 0, 0), 'SHRCD': 11.0, 'EXCHCD': 3.0, 'PRC': 16.5, 'RET': -0.16455696523189545, 'SHROUT': 6372.0, 'year': 1998, 'month': 8, 'monthid': 284, 'MKTCAP': 105138000.0}, {'PERMNO': 10002, 'DATE': datetime.datetime(1998, 9, 30, 0, 0), 'SHRCD': 11.0, 'EXCHCD': 3.0, 'PRC': 16.25, 'RET': -0.009999999776482582, 'SHROUT': 6526.0, 'year': 1998, 'month': 9, 'monthid': 285, 'MKTCAP': 106047500.0}, {'PERMNO': 10002, 'DATE': datetime.datetime(1998, 10, 30, 0, 0), 'SHRCD': 11.0, 'EXCHCD': 3.0, 'PRC': 15.0, 'RET': -0.07692307978868484, 'SHROUT': 6526.0, 'year': 1998, 'month': 10, 'monthid': 286, 'MKTCAP': 97890000.0}, {'PERMNO': 10002, 'DATE': datetime.datetime(1998, 11, 30, 0, 0), 'SHRCD': 11.0, 'EXCHCD': 3.0, 'PRC': 15.25, 'RET': 0.01666666753590107, 'SHROUT': 6526.0, 'year': 1998, 'month': 11, 'monthid': 287, 'MKTCAP': 99521500.0}, {'PERMNO': 10002, 'DATE': datetime.datetime(1998, 12, 31, 0, 0), 'SHRCD': 11.0, 'EXCHCD': 3.0, 'PRC': 15.25, 'RET': 0.005573770496994257, 'SHROUT': 7729.0, 'year': 1998, 'month': 12, 'monthid': 288, 'MKTCAP': 117867250.0}, {'PERMNO': 10002, 'DATE': datetime.datetime(1999, 1, 29, 0, 0), 'SHRCD': 11.0, 'EXCHCD': 3.0, 'PRC': 15.5, 'RET': 0.016393441706895828, 'SHROUT': 7729.0, 'year': 1999, 'month': 1, 'monthid': 289, 'MKTCAP': 119799500.0}, {'PERMNO': 10002, 'DATE': datetime.datetime(1999, 2, 26, 0, 0), 'SHRCD': 11.0, 'EXCHCD': 3.0, 'PRC': 14.0, 'RET': -0.09677419066429138, 'SHROUT': 7729.0, 'year': 1999, 'month': 2, 'monthid': 290, 'MKTCAP': 108206000.0}, {'PERMNO': 10002, 'DATE': datetime.datetime(1999, 3, 31, 0, 0), 'SHRCD': 11.0, 'EXCHCD': 3.0, 'PRC': 13.75, 'RET': -0.011785713955760002, 'SHROUT': 7729.0, 'year': 1999, 'month': 3, 'monthid': 291, 'MKTCAP': 106273750.0}, {'PERMNO': 10002, 'DATE': datetime.datetime(1999, 4, 30, 0, 0), 'SHRCD': 11.0, 'EXCHCD': 3.0, 'PRC': 13.0, 'RET': -0.05454545468091965, 'SHROUT': 7729.0, 'year': 1999, 'month': 4, 'monthid': 292, 'MKTCAP': 100477000.0}, {'PERMNO': 10002, 'DATE': datetime.datetime(1999, 5, 28, 0, 0), 'SHRCD': 11.0, 'EXCHCD': 3.0, 'PRC': 13.1875, 'RET': 0.014423076994717121, 'SHROUT': 7729.0, 'year': 1999, 'month': 5, 'monthid': 293, 'MKTCAP': 101926187.5}, {'PERMNO': 10002, 'DATE': datetime.datetime(1999, 6, 30, 0, 0), 'SHRCD': 11.0, 'EXCHCD': 3.0, 'PRC': 13.4375, 'RET': 0.025781990960240364, 'SHROUT': 7729.0, 'year': 1999, 'month': 6, 'monthid': 294, 'MKTCAP': 103858437.5}, {'PERMNO': 10002, 'DATE': datetime.datetime(1999, 7, 30, 0, 0), 'SHRCD': 11.0, 'EXCHCD': 3.0, 'PRC': 14.0, 'RET': 0.041860464960336685, 'SHROUT': 7729.0, 'year': 1999, 'month': 7, 'monthid': 295, 'MKTCAP': 108206000.0}, {'PERMNO': 10002, 'DATE': datetime.datetime(1999, 8, 31, 0, 0), 'SHRCD': 11.0, 'EXCHCD': 3.0, 'PRC': 13.0, 'RET': -0.0714285746216774, 'SHROUT': 7729.0, 'year': 1999, 'month': 8, 'monthid': 296, 'MKTCAP': 100477000.0}, {'PERMNO': 10002, 'DATE': datetime.datetime(1999, 9, 30, 0, 0), 'SHRCD': 11.0, 'EXCHCD': 3.0, 'PRC': 14.90625, 'RET': 0.153557687997818, 'SHROUT': 7729.0, 'year': 1999, 'month': 9, 'monthid': 297, 'MKTCAP': 115210406.25}, {'PERMNO': 10002, 'DATE': datetime.datetime(1999, 10, 29, 0, 0), 'SHRCD': 11.0, 'EXCHCD': 3.0, 'PRC': 13.25, 'RET': -0.1111111119389534, 'SHROUT': 7729.0, 'year': 1999, 'month': 10, 'monthid': 298, 'MKTCAP': 102409250.0}, {'PERMNO': 10002, 'DATE': datetime.datetime(1999, 11, 30, 0, 0), 'SHRCD': 11.0, 'EXCHCD': 3.0, 'PRC': 14.0, 'RET': 0.056603774428367615, 'SHROUT': 7729.0, 'year': 1999, 'month': 11, 'monthid': 299, 'MKTCAP': 108206000.0}, {'PERMNO': 10002, 'DATE': datetime.datetime(1999, 12, 31, 0, 0), 'SHRCD': 11.0, 'EXCHCD': 3.0, 'PRC': 12.1875, 'RET': -0.12303571403026581, 'SHROUT': 7729.0, 'year': 1999, 'month': 12, 'monthid': 300, 'MKTCAP': 94197187.5}, {'PERMNO': 10002, 'DATE': datetime.datetime(2003, 1, 31, 0, 0), 'SHRCD': 11.0, 'EXCHCD': 3.0, 'PRC': 11.8100004196167, 'RET': 0.06396396458148956, 'SHROUT': 8729.0, 'year': 2003, 'month': 1, 'monthid': 337, 'MKTCAP': 103089493.66283417}, {'PERMNO': 10002, 'DATE': datetime.datetime(2003, 2, 28, 0, 0), 'SHRCD': 11.0, 'EXCHCD': 3.0, 'PRC': 13.25, 'RET': 0.12193052470684052, 'SHROUT': 8729.0, 'year': 2003, 'month': 2, 'monthid': 338, 'MKTCAP': 115659250.0}, {'PERMNO': 10002, 'DATE': datetime.datetime(2003, 3, 31, 0, 0), 'SHRCD': 11.0, 'EXCHCD': 3.0, 'PRC': 14.399999618530273, 'RET': 0.09660374373197556, 'SHROUT': 8740.0, 'year': 2003, 'month': 3, 'monthid': 339, 'MKTCAP': 125855996.66595459}, {'PERMNO': 10002, 'DATE': datetime.datetime(2003, 4, 30, 0, 0), 'SHRCD': 11.0, 'EXCHCD': 3.0, 'PRC': 15.901000022888184, 'RET': 0.10423614084720612, 'SHROUT': 8740.0, 'year': 2003, 'month': 4, 'monthid': 340, 'MKTCAP': 138974740.20004272}, {'PERMNO': 10002, 'DATE': datetime.datetime(2003, 5, 30, 0, 0), 'SHRCD': 11.0, 'EXCHCD': 3.0, 'PRC': 15.550000190734863, 'RET': -0.022074073553085327, 'SHROUT': 8741.0, 'year': 2003, 'month': 5, 'monthid': 341, 'MKTCAP': 135922551.66721344}, {'PERMNO': 10002, 'DATE': datetime.datetime(2003, 6, 30, 0, 0), 'SHRCD': 11.0, 'EXCHCD': 3.0, 'PRC': 16.059999465942383, 'RET': 0.04115751013159752, 'SHROUT': 8740.0, 'year': 2003, 'month': 6, 'monthid': 342, 'MKTCAP': 140364395.33233643}, {'PERMNO': 10002, 'DATE': datetime.datetime(2003, 7, 31, 0, 0), 'SHRCD': 11.0, 'EXCHCD': 3.0, 'PRC': 16.049999237060547, 'RET': -0.0006226793047972023, 'SHROUT': 8740.0, 'year': 2003, 'month': 7, 'monthid': 343, 'MKTCAP': 140276993.33190918}, {'PERMNO': 10002, 'DATE': datetime.datetime(2003, 8, 29, 0, 0), 'SHRCD': 11.0, 'EXCHCD': 3.0, 'PRC': 16.09000015258789, 'RET': 0.0024922690354287624, 'SHROUT': 8741.0, 'year': 2003, 'month': 8, 'monthid': 344, 'MKTCAP': 140642691.33377075}, {'PERMNO': 10002, 'DATE': datetime.datetime(2003, 9, 30, 0, 0), 'SHRCD': 11.0, 'EXCHCD': 3.0, 'PRC': 15.100000381469727, 'RET': -0.05344933271408081, 'SHROUT': 8744.0, 'year': 2003, 'month': 9, 'monthid': 345, 'MKTCAP': 132034403.33557129}, {'PERMNO': 10002, 'DATE': datetime.datetime(2003, 10, 31, 0, 0), 'SHRCD': 11.0, 'EXCHCD': 3.0, 'PRC': 15.430000305175781, 'RET': 0.021854298189282417, 'SHROUT': 8744.0, 'year': 2003, 'month': 10, 'monthid': 346, 'MKTCAP': 134919922.66845703}, {'PERMNO': 10002, 'DATE': datetime.datetime(2003, 11, 28, 0, 0), 'SHRCD': 11.0, 'EXCHCD': 3.0, 'PRC': 15.779999732971191, 'RET': 0.022683046758174896, 'SHROUT': 8745.0, 'year': 2003, 'month': 11, 'monthid': 347, 'MKTCAP': 137996097.66483307}, {'PERMNO': 10002, 'DATE': datetime.datetime(2003, 12, 31, 0, 0), 'SHRCD': 11.0, 'EXCHCD': 3.0, 'PRC': 16.020000457763672, 'RET': 0.023447448387742043, 'SHROUT': 8745.0, 'year': 2003, 'month': 12, 'monthid': 348, 'MKTCAP': 140094904.0031433}, {'PERMNO': 10002, 'DATE': datetime.datetime(2004, 1, 30, 0, 0), 'SHRCD': 11.0, 'EXCHCD': 3.0, 'PRC': 16.110000610351562, 'RET': 0.005617986898869276, 'SHROUT': 8745.0, 'year': 2004, 'month': 1, 'monthid': 349, 'MKTCAP': 140881955.3375244}, {'PERMNO': 10002, 'DATE': datetime.datetime(2004, 2, 27, 0, 0), 'SHRCD': 11.0, 'EXCHCD': 3.0, 'PRC': 18.049999237060547, 'RET': 0.12042200565338135, 'SHROUT': 8754.0, 'year': 2004, 'month': 2, 'monthid': 350, 'MKTCAP': 158009693.32122803}, {'PERMNO': 10002, 'DATE': datetime.datetime(2004, 3, 31, 0, 0), 'SHRCD': 11.0, 'EXCHCD': 3.0, 'PRC': 17.489999771118164, 'RET': -0.023822685703635216, 'SHROUT': 10968.0, 'year': 2004, 'month': 3, 'monthid': 351, 'MKTCAP': 191830317.48962402}, {'PERMNO': 10002, 'DATE': datetime.datetime(2004, 4, 30, 0, 0), 'SHRCD': 11.0, 'EXCHCD': 3.0, 'PRC': 16.850000381469727, 'RET': -0.036592304706573486, 'SHROUT': 10968.0, 'year': 2004, 'month': 4, 'monthid': 352, 'MKTCAP': 184810804.18395996}, {'PERMNO': 10002, 'DATE': datetime.datetime(2004, 5, 28, 0, 0), 'SHRCD': 11.0, 'EXCHCD': 3.0, 'PRC': 17.1200008392334, 'RET': 0.01602376624941826, 'SHROUT': 10968.0, 'year': 2004, 'month': 5, 'monthid': 353, 'MKTCAP': 187772169.2047119}, {'PERMNO': 10002, 'DATE': datetime.datetime(2004, 6, 30, 0, 0), 'SHRCD': 11.0, 'EXCHCD': 3.0, 'PRC': 17.520000457763672, 'RET': 0.030957920476794243, 'SHROUT': 10969.0, 'year': 2004, 'month': 6, 'monthid': 354, 'MKTCAP': 192176885.02120972}, {'PERMNO': 10002, 'DATE': datetime.datetime(2004, 7, 30, 0, 0), 'SHRCD': 11.0, 'EXCHCD': 3.0, 'PRC': 17.579999923706055, 'RET': 0.0034246270079165697, 'SHROUT': 10980.0, 'year': 2004, 'month': 7, 'monthid': 355, 'MKTCAP': 193028399.16229248}, {'PERMNO': 10002, 'DATE': datetime.datetime(2004, 8, 31, 0, 0), 'SHRCD': 11.0, 'EXCHCD': 3.0, 'PRC': 17.979999542236328, 'RET': 0.02275310643017292, 'SHROUT': 10980.0, 'year': 2004, 'month': 8, 'monthid': 356, 'MKTCAP': 197420394.97375488}, {'PERMNO': 10002, 'DATE': datetime.datetime(2004, 9, 30, 0, 0), 'SHRCD': 11.0, 'EXCHCD': 3.0, 'PRC': 18.799999237060547, 'RET': 0.05283647030591965, 'SHROUT': 10988.0, 'year': 2004, 'month': 9, 'monthid': 357, 'MKTCAP': 206574391.6168213}, {'PERMNO': 10002, 'DATE': datetime.datetime(2004, 10, 29, 0, 0), 'SHRCD': 11.0, 'EXCHCD': 3.0, 'PRC': 19.0, 'RET': 0.010638338513672352, 'SHROUT': 11009.0, 'year': 2004, 'month': 10, 'monthid': 358, 'MKTCAP': 209171000.0}, {'PERMNO': 10002, 'DATE': datetime.datetime(2004, 11, 30, 0, 0), 'SHRCD': 11.0, 'EXCHCD': 3.0, 'PRC': 21.299999237060547, 'RET': 0.12105259299278259, 'SHROUT': 11009.0, 'year': 2004, 'month': 11, 'monthid': 359, 'MKTCAP': 234491691.60079956}, {'PERMNO': 10002, 'DATE': datetime.datetime(2004, 12, 31, 0, 0), 'SHRCD': 11.0, 'EXCHCD': 3.0, 'PRC': 24.610000610351562, 'RET': 0.1615024209022522, 'SHROUT': 11021.0, 'year': 2004, 'month': 12, 'monthid': 360, 'MKTCAP': 271226816.7266846}, {'PERMNO': 10002, 'DATE': datetime.datetime(2005, 1, 31, 0, 0), 'SHRCD': 11.0, 'EXCHCD': 3.0, 'PRC': 21.350000381469727, 'RET': -0.13246648013591766, 'SHROUT': 11021.0, 'year': 2005, 'month': 1, 'monthid': 361, 'MKTCAP': 235298354.20417786}, {'PERMNO': 10002, 'DATE': datetime.datetime(2005, 2, 28, 0, 0), 'SHRCD': 11.0, 'EXCHCD': 3.0, 'PRC': 20.639999389648438, 'RET': -0.03325531631708145, 'SHROUT': 11021.0, 'year': 2005, 'month': 2, 'monthid': 362, 'MKTCAP': 227473433.27331543} ... 1282463 parameters truncated ... {'PERMNO': 93436, 'DATE': datetime.datetime(2016, 11, 30, 0, 0), 'SHRCD': 11.0, 'EXCHCD': 3.0, 'PRC': 189.39999389648438, 'RET': -0.04212816432118416, 'SHROUT': 149964.0, 'year': 2016, 'month': 11, 'monthid': 503, 'MKTCAP': 28403180684.692383}, {'PERMNO': 93436, 'DATE': datetime.datetime(2016, 12, 30, 0, 0), 'SHRCD': 11.0, 'EXCHCD': 3.0, 'PRC': 213.69000244140625, 'RET': 0.12824714183807373, 'SHROUT': 161561.0, 'year': 2016, 'month': 12, 'monthid': 504, 'MKTCAP': 34523970484.436035}, {'PERMNO': 93436, 'DATE': datetime.datetime(2017, 1, 31, 0, 0), 'SHRCD': 11.0, 'EXCHCD': 3.0, 'PRC': 251.92999267578125, 'RET': 0.17895077168941498, 'SHROUT': 161670.0, 'year': 2017, 'month': 1, 'monthid': 505, 'MKTCAP': 40729521915.893555}, {'PERMNO': 93436, 'DATE': datetime.datetime(2017, 2, 28, 0, 0), 'SHRCD': 11.0, 'EXCHCD': 3.0, 'PRC': 249.99000549316406, 'RET': -0.007700501009821892, 'SHROUT': 161670.0, 'year': 2017, 'month': 2, 'monthid': 506, 'MKTCAP': 40415884188.079834}, {'PERMNO': 93436, 'DATE': datetime.datetime(2017, 3, 31, 0, 0), 'SHRCD': 11.0, 'EXCHCD': 3.0, 'PRC': 278.29998779296875, 'RET': 0.11324445903301239, 'SHROUT': 164164.0, 'year': 2017, 'month': 3, 'monthid': 507, 'MKTCAP': 45686839196.04492}, {'PERMNO': 93436, 'DATE': datetime.datetime(2017, 4, 28, 0, 0), 'SHRCD': 11.0, 'EXCHCD': 3.0, 'PRC': 314.07000732421875, 'RET': 0.12853044271469116, 'SHROUT': 164260.0, 'year': 2017, 'month': 4, 'monthid': 508, 'MKTCAP': 51589139403.07617}, {'PERMNO': 93436, 'DATE': datetime.datetime(2017, 5, 31, 0, 0), 'SHRCD': 11.0, 'EXCHCD': 3.0, 'PRC': 341.010009765625, 'RET': 0.08577705919742584, 'SHROUT': 164260.0, 'year': 2017, 'month': 5, 'monthid': 509, 'MKTCAP': 56014304204.10156}, {'PERMNO': 93436, 'DATE': datetime.datetime(2017, 6, 30, 0, 0), 'SHRCD': 11.0, 'EXCHCD': 3.0, 'PRC': 361.6099853515625, 'RET': 0.060408711433410645, 'SHROUT': 166863.0, 'year': 2017, 'month': 6, 'monthid': 510, 'MKTCAP': 60339326985.71777}, {'PERMNO': 93436, 'DATE': datetime.datetime(2017, 7, 31, 0, 0), 'SHRCD': 11.0, 'EXCHCD': 3.0, 'PRC': 323.4700012207031, 'RET': -0.10547270625829697, 'SHROUT': 166887.0, 'year': 2017, 'month': 7, 'monthid': 511, 'MKTCAP': 53982938093.71948}, {'PERMNO': 93436, 'DATE': datetime.datetime(2017, 8, 31, 0, 0), 'SHRCD': 11.0, 'EXCHCD': 3.0, 'PRC': 355.8999938964844, 'RET': 0.10025656968355179, 'SHROUT': 166887.0, 'year': 2017, 'month': 8, 'monthid': 512, 'MKTCAP': 59395082281.40259}, {'PERMNO': 93436, 'DATE': datetime.datetime(2017, 9, 29, 0, 0), 'SHRCD': 11.0, 'EXCHCD': 3.0, 'PRC': 341.1000061035156, 'RET': -0.04158468171954155, 'SHROUT': 168017.0, 'year': 2017, 'month': 9, 'monthid': 513, 'MKTCAP': 57310599725.494385}, {'PERMNO': 93436, 'DATE': datetime.datetime(2017, 10, 31, 0, 0), 'SHRCD': 11.0, 'EXCHCD': 3.0, 'PRC': 331.5299987792969, 'RET': -0.0280563086271286, 'SHROUT': 168067.0, 'year': 2017, 'month': 10, 'monthid': 514, 'MKTCAP': 55719252304.84009}, {'PERMNO': 93436, 'DATE': datetime.datetime(2017, 11, 30, 0, 0), 'SHRCD': 11.0, 'EXCHCD': 3.0, 'PRC': 308.8500061035156, 'RET': -0.06841007620096207, 'SHROUT': 168067.0, 'year': 2017, 'month': 11, 'monthid': 515, 'MKTCAP': 51907493975.79956}, {'PERMNO': 93436, 'DATE': datetime.datetime(2017, 12, 29, 0, 0), 'SHRCD': 11.0, 'EXCHCD': 3.0, 'PRC': 311.3500061035156, 'RET': 0.008094544522464275, 'SHROUT': 168797.0, 'year': 2017, 'month': 12, 'monthid': 516, 'MKTCAP': 52554946980.25513}, {'PERMNO': 93436, 'DATE': datetime.datetime(2018, 1, 31, 0, 0), 'SHRCD': 11.0, 'EXCHCD': 3.0, 'PRC': 354.30999755859375, 'RET': 0.13797973096370697, 'SHROUT': 168797.0, 'year': 2018, 'month': 1, 'monthid': 517, 'MKTCAP': 59806464657.89795}, {'PERMNO': 93436, 'DATE': datetime.datetime(2018, 2, 28, 0, 0), 'SHRCD': 11.0, 'EXCHCD': 3.0, 'PRC': 343.05999755859375, 'RET': -0.031751856207847595, 'SHROUT': 168920.0, 'year': 2018, 'month': 2, 'monthid': 518, 'MKTCAP': 57949694787.59766}, {'PERMNO': 93436, 'DATE': datetime.datetime(2018, 3, 29, 0, 0), 'SHRCD': 11.0, 'EXCHCD': 3.0, 'PRC': 266.1300048828125, 'RET': -0.22424647212028503, 'SHROUT': 169750.0, 'year': 2018, 'month': 3, 'monthid': 519, 'MKTCAP': 45175568328.85742}, {'PERMNO': 93436, 'DATE': datetime.datetime(2018, 4, 30, 0, 0), 'SHRCD': 11.0, 'EXCHCD': 3.0, 'PRC': 293.8999938964844, 'RET': 0.10434745252132416, 'SHROUT': 169794.0, 'year': 2018, 'month': 4, 'monthid': 520, 'MKTCAP': 49902455563.65967}, {'PERMNO': 93436, 'DATE': datetime.datetime(2018, 5, 31, 0, 0), 'SHRCD': 11.0, 'EXCHCD': 3.0, 'PRC': 284.7300109863281, 'RET': -0.031201031059026718, 'SHROUT': 169794.0, 'year': 2018, 'month': 5, 'monthid': 521, 'MKTCAP': 48345447485.4126}, {'PERMNO': 93436, 'DATE': datetime.datetime(2018, 6, 29, 0, 0), 'SHRCD': 11.0, 'EXCHCD': 3.0, 'PRC': 342.95001220703125, 'RET': 0.20447440445423126, 'SHROUT': 170516.0, 'year': 2018, 'month': 6, 'monthid': 522, 'MKTCAP': 58478464281.49414}, {'PERMNO': 93436, 'DATE': datetime.datetime(2018, 7, 31, 0, 0), 'SHRCD': 11.0, 'EXCHCD': 3.0, 'PRC': 298.1400146484375, 'RET': -0.13066042959690094, 'SHROUT': 170593.0, 'year': 2018, 'month': 7, 'monthid': 523, 'MKTCAP': 50860599518.9209}, {'PERMNO': 93436, 'DATE': datetime.datetime(2018, 8, 31, 0, 0), 'SHRCD': 11.0, 'EXCHCD': 3.0, 'PRC': 301.6600036621094, 'RET': 0.011806496419012547, 'SHROUT': 170593.0, 'year': 2018, 'month': 8, 'monthid': 524, 'MKTCAP': 51461085004.730225}, {'PERMNO': 93436, 'DATE': datetime.datetime(2018, 9, 28, 0, 0), 'SHRCD': 11.0, 'EXCHCD': 3.0, 'PRC': 264.7699890136719, 'RET': -0.12229004502296448, 'SHROUT': 171578.0, 'year': 2018, 'month': 9, 'monthid': 525, 'MKTCAP': 45428705174.98779}, {'PERMNO': 93436, 'DATE': datetime.datetime(2018, 10, 31, 0, 0), 'SHRCD': 11.0, 'EXCHCD': 3.0, 'PRC': 337.32000732421875, 'RET': 0.274011492729187, 'SHROUT': 171733.0, 'year': 2018, 'month': 10, 'monthid': 526, 'MKTCAP': 57928976817.81006}, {'PERMNO': 93436, 'DATE': datetime.datetime(2018, 11, 30, 0, 0), 'SHRCD': 11.0, 'EXCHCD': 3.0, 'PRC': 350.4800109863281, 'RET': 0.03901340812444687, 'SHROUT': 171733.0, 'year': 2018, 'month': 11, 'monthid': 527, 'MKTCAP': 60188983726.71509}, {'PERMNO': 93436, 'DATE': datetime.datetime(2018, 12, 31, 0, 0), 'SHRCD': 11.0, 'EXCHCD': 3.0, 'PRC': 332.79998779296875, 'RET': -0.05044516921043396, 'SHROUT': 172602.0, 'year': 2018, 'month': 12, 'monthid': 528, 'MKTCAP': 57441943493.04199}, {'PERMNO': 93436, 'DATE': datetime.datetime(2019, 1, 31, 0, 0), 'SHRCD': 11.0, 'EXCHCD': 3.0, 'PRC': 307.0199890136719, 'RET': -0.07746393978595734, 'SHROUT': 172602.0, 'year': 2019, 'month': 1, 'monthid': 529, 'MKTCAP': 52992264143.73779}, {'PERMNO': 93436, 'DATE': datetime.datetime(2019, 2, 28, 0, 0), 'SHRCD': 11.0, 'EXCHCD': 3.0, 'PRC': 319.8800048828125, 'RET': 0.041886575520038605, 'SHROUT': 172721.0, 'year': 2019, 'month': 2, 'monthid': 530, 'MKTCAP': 55249994323.36426}, {'PERMNO': 93436, 'DATE': datetime.datetime(2019, 3, 29, 0, 0), 'SHRCD': 11.0, 'EXCHCD': 3.0, 'PRC': 279.8599853515625, 'RET': -0.1251094788312912, 'SHROUT': 173682.0, 'year': 2019, 'month': 3, 'monthid': 531, 'MKTCAP': 48606641975.83008}, {'PERMNO': 93436, 'DATE': datetime.datetime(2019, 4, 30, 0, 0), 'SHRCD': 11.0, 'EXCHCD': 3.0, 'PRC': 238.69000244140625, 'RET': -0.14710921049118042, 'SHROUT': 173721.0, 'year': 2019, 'month': 4, 'monthid': 532, 'MKTCAP': 41465465914.123535}, {'PERMNO': 93436, 'DATE': datetime.datetime(2019, 5, 31, 0, 0), 'SHRCD': 11.0, 'EXCHCD': 3.0, 'PRC': 185.16000366210938, 'RET': -0.22426578402519226, 'SHROUT': 177270.0, 'year': 2019, 'month': 5, 'monthid': 533, 'MKTCAP': 32823313849.18213}, {'PERMNO': 93436, 'DATE': datetime.datetime(2019, 6, 28, 0, 0), 'SHRCD': 11.0, 'EXCHCD': 3.0, 'PRC': 223.4600067138672, 'RET': 0.20684814453125, 'SHROUT': 179118.0, 'year': 2019, 'month': 6, 'monthid': 534, 'MKTCAP': 40025709482.57446}, {'PERMNO': 93436, 'DATE': datetime.datetime(2019, 7, 31, 0, 0), 'SHRCD': 11.0, 'EXCHCD': 3.0, 'PRC': 241.61000061035156, 'RET': 0.08122256398200989, 'SHROUT': 179127.0, 'year': 2019, 'month': 7, 'monthid': 535, 'MKTCAP': 43278874579.330444}, {'PERMNO': 93436, 'DATE': datetime.datetime(2019, 8, 30, 0, 0), 'SHRCD': 11.0, 'EXCHCD': 3.0, 'PRC': 225.61000061035156, 'RET': -0.0662224218249321, 'SHROUT': 179127.0, 'year': 2019, 'month': 8, 'monthid': 536, 'MKTCAP': 40412842579.330444}, {'PERMNO': 93436, 'DATE': datetime.datetime(2019, 9, 30, 0, 0), 'SHRCD': 11.0, 'EXCHCD': 3.0, 'PRC': 240.8699951171875, 'RET': 0.0676388218998909, 'SHROUT': 180000.0, 'year': 2019, 'month': 9, 'monthid': 537, 'MKTCAP': 43356599121.09375}, {'PERMNO': 93436, 'DATE': datetime.datetime(2019, 10, 31, 0, 0), 'SHRCD': 11.0, 'EXCHCD': 3.0, 'PRC': 314.9200134277344, 'RET': 0.307427316904068, 'SHROUT': 180245.0, 'year': 2019, 'month': 10, 'monthid': 538, 'MKTCAP': 56762757820.28198}, {'PERMNO': 93436, 'DATE': datetime.datetime(2019, 11, 29, 0, 0), 'SHRCD': 11.0, 'EXCHCD': 3.0, 'PRC': 329.94000244140625, 'RET': 0.0476946160197258, 'SHROUT': 180245.0, 'year': 2019, 'month': 11, 'monthid': 539, 'MKTCAP': 59470035740.05127}, {'PERMNO': 93436, 'DATE': datetime.datetime(2019, 12, 31, 0, 0), 'SHRCD': 11.0, 'EXCHCD': 3.0, 'PRC': 418.3299865722656, 'RET': 0.2678971290588379, 'SHROUT': 181062.0, 'year': 2019, 'month': 12, 'monthid': 540, 'MKTCAP': 75743664028.74756}, {'PERMNO': 93436, 'DATE': datetime.datetime(2020, 1, 31, 0, 0), 'SHRCD': 11.0, 'EXCHCD': 3.0, 'PRC': 650.5700073242188, 'RET': 0.555159866809845, 'SHROUT': 181062.0, 'year': 2020, 'month': 1, 'monthid': 541, 'MKTCAP': 117793506666.1377}, {'PERMNO': 93436, 'DATE': datetime.datetime(2020, 2, 28, 0, 0), 'SHRCD': 11.0, 'EXCHCD': 3.0, 'PRC': 667.989990234375, 'RET': 0.026776492595672607, 'SHROUT': 184110.0, 'year': 2020, 'month': 2, 'monthid': 542, 'MKTCAP': 122983637102.05078}, {'PERMNO': 93436, 'DATE': datetime.datetime(2020, 3, 31, 0, 0), 'SHRCD': 11.0, 'EXCHCD': 3.0, 'PRC': 524.0, 'RET': -0.21555711328983307, 'SHROUT': 185000.0, 'year': 2020, 'month': 3, 'monthid': 543, 'MKTCAP': 96940000000.0}, {'PERMNO': 93436, 'DATE': datetime.datetime(2020, 4, 30, 0, 0), 'SHRCD': 11.0, 'EXCHCD': 3.0, 'PRC': 781.8800048828125, 'RET': 0.4921374022960663, 'SHROUT': 185371.0, 'year': 2020, 'month': 4, 'monthid': 544, 'MKTCAP': 144937878385.13184}, {'PERMNO': 93436, 'DATE': datetime.datetime(2020, 5, 29, 0, 0), 'SHRCD': 11.0, 'EXCHCD': 3.0, 'PRC': 835.0, 'RET': 0.06793880462646484, 'SHROUT': 185371.0, 'year': 2020, 'month': 5, 'monthid': 545, 'MKTCAP': 154784785000.0}, {'PERMNO': 93436, 'DATE': datetime.datetime(2020, 6, 30, 0, 0), 'SHRCD': 11.0, 'EXCHCD': 3.0, 'PRC': 1079.81005859375, 'RET': 0.2931857109069824, 'SHROUT': 186000.0, 'year': 2020, 'month': 6, 'monthid': 546, 'MKTCAP': 200844670898.4375}, {'PERMNO': 93436, 'DATE': datetime.datetime(2020, 7, 31, 0, 0), 'SHRCD': 11.0, 'EXCHCD': 3.0, 'PRC': 1430.760009765625, 'RET': 0.3250108063220978, 'SHROUT': 186362.0, 'year': 2020, 'month': 7, 'monthid': 547, 'MKTCAP': 266639296939.9414}, {'PERMNO': 93436, 'DATE': datetime.datetime(2020, 8, 31, 0, 0), 'SHRCD': 11.0, 'EXCHCD': 3.0, 'PRC': 498.32000732421875, 'RET': 0.7414520978927612, 'SHROUT': 931809.0, 'year': 2020, 'month': 8, 'monthid': 548, 'MKTCAP': 464339067704.77295}, {'PERMNO': 93436, 'DATE': datetime.datetime(2020, 9, 30, 0, 0), 'SHRCD': 11.0, 'EXCHCD': 3.0, 'PRC': 429.010009765625, 'RET': -0.13908731937408447, 'SHROUT': 948000.0, 'year': 2020, 'month': 9, 'monthid': 549, 'MKTCAP': 406701489257.8125}, {'PERMNO': 93436, 'DATE': datetime.datetime(2020, 10, 30, 0, 0), 'SHRCD': 11.0, 'EXCHCD': 3.0, 'PRC': 388.0400085449219, 'RET': -0.0954989418387413, 'SHROUT': 947901.0, 'year': 2020, 'month': 10, 'monthid': 550, 'MKTCAP': 367823512139.74}, {'PERMNO': 93436, 'DATE': datetime.datetime(2020, 11, 30, 0, 0), 'SHRCD': 11.0, 'EXCHCD': 3.0, 'PRC': 567.5999755859375, 'RET': 0.46273571252822876, 'SHROUT': 947901.0, 'year': 2020, 'month': 11, 'monthid': 551, 'MKTCAP': 538028584457.88574}, {'PERMNO': 93436, 'DATE': datetime.datetime(2020, 12, 31, 0, 0), 'SHRCD': 11.0, 'EXCHCD': 3.0, 'PRC': 705.6699829101562, 'RET': 0.24325230717658997, 'SHROUT': 959854.0, 'year': 2020, 'month': 12, 'monthid': 552, 'MKTCAP': 677340155776.2451}]]

In [None]:
assets.data

Unnamed: 0,PERMNO,DATE,SHRCD,EXCHCD,PRC,RET,SHROUT,year,month,monthid,...,OIADPQ,PSTKQ,SALEQ,qtrid,DATEFF,SMB,HML,MKTRF,RF,UMD
0,10002,1998-01-30 00:00:00.000000,11.0,3.0,25.000,0.020408,4246.0,1998,1,277,...,2.154,0.00,7.250,273,1998-01-30,-0.0107,-0.0163,0.0015,0.0043,0.0014
1,10016,1998-01-30 00:00:00.000000,11.0,3.0,13.250,0.009524,13729.0,1998,1,277,...,5.733,0.00,38.084,273,1998-01-30,-0.0107,-0.0163,0.0015,0.0043,0.0014
2,10019,1998-01-30 00:00:00.000000,11.0,3.0,16.125,0.040323,8205.0,1998,1,277,...,3.179,0.00,25.521,273,1998-01-30,-0.0107,-0.0163,0.0015,0.0043,0.0014
3,10025,1998-01-30 00:00:00.000000,11.0,3.0,33.500,0.085020,7219.0,1998,1,277,...,10.677,0.00,198.031,271,1998-01-30,-0.0107,-0.0163,0.0015,0.0043,0.0014
4,10026,1998-01-30 00:00:00.000000,11.0,3.0,14.250,-0.129771,8872.0,1998,1,277,...,5.293,0.00,62.964,273,1998-01-30,-0.0107,-0.0163,0.0015,0.0043,0.0014
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1242503,92284,1986-06-30 00:00:00.000000,11.0,3.0,16.000,0.049180,9064.0,1986,6,138,...,1.836,1.01,20.347,132,1986-06-30,-0.0096,0.0128,0.0103,0.0052,0.0507
1242504,92567,1986-06-30 00:00:00.000000,11.0,3.0,14.375,0.017699,24677.0,1986,6,138,...,2.257,0.00,84.400,132,1986-06-30,-0.0096,0.0128,0.0103,0.0052,0.0507
1242505,92639,1986-06-30 00:00:00.000000,10.0,3.0,40.875,-0.130319,5486.0,1986,6,138,...,5.085,0.00,14.231,132,1986-06-30,-0.0096,0.0128,0.0103,0.0052,0.0507
1242506,92655,1986-06-30 00:00:00.000000,11.0,3.0,12.625,-0.114035,15167.0,1986,6,138,...,2.016,0.00,32.164,132,1986-06-30,-0.0096,0.0128,0.0103,0.0052,0.0507


In [None]:
assets.fact

Unnamed: 0,DATEFF,SMB,HML,MKTRF,RF,UMD,monthid,epq1,ioca,ra26,...,Re1,dRoe1,dRoa1,rnaq1,atoq1,dNoa,Nsi,dNca,dFnl,RET
0,1975-01-31,0.1114,0.0828,0.1366,0.0058,-0.1382,1,-0.026479,-0.039104,,...,,,,,,-0.109014,-0.018458,-0.090733,-0.033594,0.231061
1,1975-01-31,0.1114,0.0828,0.1366,0.0058,-0.1382,1,-0.026479,-0.039104,,...,,,,,,-0.109014,-0.018458,-0.090733,-0.033594,0.337349
2,1975-01-31,0.1114,0.0828,0.1366,0.0058,-0.1382,1,-0.026479,-0.039104,,...,,,,,,-0.109014,-0.018458,-0.090733,-0.033594,0.333333
3,1975-01-31,0.1114,0.0828,0.1366,0.0058,-0.1382,1,-0.026479,-0.039104,,...,,,,,,-0.109014,-0.018458,-0.090733,-0.033594,0.136564
4,1975-01-31,0.1114,0.0828,0.1366,0.0058,-0.1382,1,-0.026479,-0.039104,,...,,,,,,-0.109014,-0.018458,-0.090733,-0.033594,0.215447
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1242503,2020-12-31,0.0489,-0.0151,0.0463,0.0001,-0.0232,552,0.032500,0.046925,-0.016938,...,-0.026332,-0.019447,-0.00936,0.001423,-0.018967,0.006284,0.026051,-0.034905,-0.019750,0.143199
1242504,2020-12-31,0.0489,-0.0151,0.0463,0.0001,-0.0232,552,0.032500,0.046925,-0.016938,...,-0.026332,-0.019447,-0.00936,0.001423,-0.018967,0.006284,0.026051,-0.034905,-0.019750,0.109665
1242505,2020-12-31,0.0489,-0.0151,0.0463,0.0001,-0.0232,552,0.032500,0.046925,-0.016938,...,-0.026332,-0.019447,-0.00936,0.001423,-0.018967,0.006284,0.026051,-0.034905,-0.019750,0.076239
1242506,2020-12-31,0.0489,-0.0151,0.0463,0.0001,-0.0232,552,0.032500,0.046925,-0.016938,...,-0.026332,-0.019447,-0.00936,0.001423,-0.018967,0.006284,0.026051,-0.034905,-0.019750,0.135851


In [None]:
assets.factor_t

{'epq1': 4.354950149287537,
 'ioca': 6.725643404952385,
 'ra26': 2.2393226794162655,
 'e11': -3.832979499651745,
 'Re1': -0.37614028760115015,
 'dRoe1': -2.3516788105980737,
 'dRoa1': -2.6461568816234426,
 'rnaq1': -4.831482227198306,
 'atoq1': 5.357203182042189,
 'dNoa': 0.616490622745105,
 'Nsi': 7.897841745737835,
 'dNca': -7.197752976581749,
 'dFnl': -4.05335643742095}

In [None]:
assets

<__main__.Assets at 0x23df4807090>

In [None]:
assets.fact.to_csv('assets.csv', index=False)
