# FF3 Stock Factors

In [1]:
import dai
import pandas as pd

In [2]:
sd = '2023-01-01'
ed = '2024-01-01'

## 1. FF3 Stock Factors SQL

In [3]:
def get_ff3_sql(sql_base):

    sql = f"""--sql
    WITH 
    data1 AS (
        {sql_base}
    ),
    data2 AS ( 
        SELECT DISTINCT
            date, 
            c_sum(float_market_cap * change_ratio) / c_sum(float_market_cap) AS MKT
        FROM data1 
    ), 
    data3 AS (
        WITH 
        data3_0 AS (
            SELECT
                date,
                instrument,
                change_ratio,
                float_market_cap,
                c_pct_rank(float_market_cap) AS rank_sb,
                c_pct_rank(bp_ratio)         AS rank_lmh,
                CASE
                    WHEN rank_sb  < 0.5 THEN 1
                    ELSE 2
                END AS group_sb,
                CASE
                    WHEN rank_lmh < 0.3 THEN 1
                    WHEN rank_lmh > 0.7 THEN 3
                    ELSE 2
                END AS group_lmh,
            FROM data1
        ),
        data3_sl AS (
            SELECT DISTINCT
                date,
                c_sum(float_market_cap * change_ratio) / c_sum(float_market_cap) AS SL
            FROM data3_0
            WHERE group_sb = 1 AND group_lmh = 1
        ),
        data3_sm AS (
            SELECT DISTINCT
                date,
                c_sum(float_market_cap * change_ratio) / c_sum(float_market_cap) AS SM
            FROM data3_0
            WHERE group_sb = 1 AND group_lmh = 2
        ),
        data3_sh AS (
            SELECT DISTINCT
                date,
                c_sum(float_market_cap * change_ratio) / c_sum(float_market_cap) AS SH
            FROM data3_0
            WHERE group_sb = 1 AND group_lmh = 3
        ),
        data3_bl AS (
            SELECT DISTINCT
                date,
                c_sum(float_market_cap * change_ratio) / c_sum(float_market_cap) AS BL
            FROM data3_0
            WHERE group_sb = 2 AND group_lmh = 1
        ),
        data3_bm AS (
            SELECT DISTINCT
                date,
                c_sum(float_market_cap * change_ratio) / c_sum(float_market_cap) AS BM
            FROM data3_0
            WHERE group_sb = 2 AND group_lmh = 2
        ),
        data3_bh AS (
            SELECT DISTINCT
                date,
                c_sum(float_market_cap * change_ratio) / c_sum(float_market_cap) AS BH
            FROM data3_0
            WHERE group_sb = 2 AND group_lmh = 3
        ),
        data3_merge AS (
            SELECT 
                date,
                (1/3) * (SL + SM + SH) - (1/3) * (BL + BM + BH) AS SMB,
                (1/2) * (SH + BH)      - (1/2) * (SL + BL)      AS HML,
            FROM data3_sl
            JOIN data3_sm USING (date)
            JOIN data3_sh USING (date)
            JOIN data3_bl USING (date)
            JOIN data3_bm USING (date)
            JOIN data3_bh USING (date)
        )
        SELECT * 
        FROM data3_merge
    ),
    data4 AS ( 
        SELECT 
            date, 
            MKT,
            SMB, 
            HML, 
        FROM data2 JOIN data3 USING (date)
        QUALIFY COLUMNS(*) IS NOT NULL
    )
    SELECT *
    FROM data4
    ORDER BY date
    """

    return sql

## 2. FF3 Stock Factors

### 2.1 FF3 Daily

In [4]:
sql_ff3_base_daily = f"""--sql
SELECT
    date,
    instrument,
    change_ratio,
    float_market_cap,
    1 / pb AS bp_ratio,
FROM cn_stock_prefactors
WHERE instrument NOT LIKE '%BJ%'
"""

sql_ff3_daily = get_ff3_sql(sql_ff3_base_daily)

In [5]:
df_ff3_daily = dai.query(sql_ff3_daily, filters={"date":[sd, ed]}).df()
df_ff3_daily

Unnamed: 0,date,MKT,SMB,HML
0,2023-01-03,0.010998,0.015822,-0.004603
1,2023-01-04,0.002176,0.005147,0.008139
2,2023-01-05,0.013000,-0.010329,-0.014943
3,2023-01-06,0.001170,-0.004941,-0.006305
4,2023-01-09,0.006535,-0.000106,-0.004306
...,...,...,...,...
237,2023-12-25,0.002003,-0.010062,-0.005550
238,2023-12-26,-0.008222,-0.005551,0.005306
239,2023-12-27,0.005632,0.002678,-0.001444
240,2023-12-28,0.017699,-0.000411,-0.008186


### 2.2 FF3 Weekly

In [6]:
sql_ff3_base_weekly = """
WITH 
data_orgn AS (
    SELECT
        date,
        instrument,
        close,
        float_market_cap,
        total_market_cap,
        total_market_cap / pb AS total_book_value
    FROM cn_stock_prefactors
),
data_freq AS (
    WITH 
    data_temp_1 AS (
        SELECT
            instrument,
            CONCAT(CAST(year(date) AS CHAR), CAST(week(date) AS CHAR)) AS period,
            FIRST_VALUE(date)            OVER (PARTITION BY period, instrument ORDER BY date ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS date,
            LAST_VALUE(close)            OVER (PARTITION BY period, instrument ORDER BY date ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS close,
            LAST_VALUE(float_market_cap) OVER (PARTITION BY period, instrument ORDER BY date ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS float_market_cap,
            LAST_VALUE(total_market_cap) OVER (PARTITION BY period, instrument ORDER BY date ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS total_market_cap,
            LAST_VALUE(total_book_value) OVER (PARTITION BY period, instrument ORDER BY date ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS total_book_value,
            ROW_NUMBER()                 OVER (PARTITION BY period, instrument ORDER BY date DESC) AS rn
        FROM data_orgn
    ),
    data_temp_2 AS (
        SELECT
            date,
            instrument,
            close,
            LAG(close) OVER (PARTITION BY instrument ORDER BY period) AS close_m_lag_1,
            float_market_cap,
            total_market_cap,
            total_book_value
        FROM data_temp_1
        WHERE rn = 1 
    )
    SELECT 
        date,
        instrument,
        close / close_m_lag_1 - 1 AS change_ratio,
        float_market_cap,
        total_book_value / total_market_cap AS bp_ratio
    FROM data_temp_2
)
SELECT
    date,
    instrument,
    change_ratio,
    float_market_cap,
    bp_ratio,
FROM data_freq
WHERE instrument NOT LIKE '%BJ%'
ORDER BY date, instrument
"""

sql_ff3_weekly = get_ff3_sql(sql_ff3_base_weekly)

In [9]:
df_ff3_weekly = dai.query(sql_ff3_weekly, filters={"date":[sd, ed]}).df()
df_ff3_weekly

Unnamed: 0,date,MKT,SMB,HML
0,2023-01-09,0.035558,-0.024881,-0.155243
1,2023-01-16,0.079507,-0.043128,-0.200385
2,2023-01-30,0.200125,-0.151399,-0.271747
3,2023-02-06,0.19279,-0.131306,-0.248397
4,2023-02-13,-0.012301,-0.003407,0.004109
5,2023-02-20,0.01199,-0.002198,0.009568
6,2023-02-27,0.015465,-0.010742,0.013716
7,2023-03-06,0.035022,0.033158,-0.010232
8,2023-03-13,0.002497,-0.016451,0.012719
9,2023-03-20,0.017927,0.000426,-0.021531


### 2.3 FF3 Monthly

In [10]:
sql_ff3_base_monthly = """
WITH 
data_orgn AS (
    SELECT
        date,
        instrument,
        close,
        float_market_cap,
        total_market_cap,
        total_market_cap / pb AS total_book_value
    FROM cn_stock_prefactors
),
data_freq AS (
    WITH 
    data1 AS (
        SELECT
            instrument,
            CONCAT(CAST(year(date) AS CHAR), CAST(month(date) AS CHAR)) AS period,
            FIRST_VALUE(date)            OVER (PARTITION BY period, instrument ORDER BY date ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS date,
            LAST_VALUE(close)            OVER (PARTITION BY period, instrument ORDER BY date ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS close,
            LAST_VALUE(float_market_cap) OVER (PARTITION BY period, instrument ORDER BY date ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS float_market_cap,
            LAST_VALUE(total_market_cap) OVER (PARTITION BY period, instrument ORDER BY date ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS total_market_cap,
            LAST_VALUE(total_book_value) OVER (PARTITION BY period, instrument ORDER BY date ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS total_book_value,
            ROW_NUMBER()                 OVER (PARTITION BY period, instrument ORDER BY date DESC) AS rn
        FROM data_orgn
    ),
    data2 AS (
        SELECT
            date,
            instrument,
            close,
            LAG(close) OVER (PARTITION BY instrument ORDER BY period) AS close_m_lag_1,
            float_market_cap,
            total_market_cap,
            total_book_value
        FROM data1
        WHERE rn = 1 
    )
    SELECT 
        date,
        instrument,
        close / close_m_lag_1 - 1 AS change_ratio,
        float_market_cap,
        total_book_value / total_market_cap AS bp_ratio
    FROM data2
)
SELECT
    date,
    instrument,
    change_ratio,
    float_market_cap,
    bp_ratio,
FROM data_freq
WHERE instrument NOT LIKE '%BJ%'
ORDER BY date, instrument
"""

sql_ff3_monthly = get_ff3_sql(sql_ff3_base_monthly)

In [11]:
df_ff3_monthly = dai.query(sql_ff3_monthly, filters={"date":[sd, ed]}).df()
df_ff3_monthly

Unnamed: 0,date,MKT,SMB,HML
0,2023-02-01,0.184054,-0.133435,-0.215686
1,2023-03-01,0.010762,-0.034983,-0.029034
2,2023-04-03,0.012244,-0.055814,0.023377
3,2023-05-04,-0.023972,0.039218,-0.021567
4,2023-06-01,0.022841,0.007243,-0.038238
5,2023-07-03,0.040959,-0.035713,0.040181
6,2023-08-01,-0.049022,0.017738,-0.035505
7,2023-09-01,-0.002098,0.002422,0.012825
8,2023-10-09,-0.007133,-0.013165,-0.027054
9,2023-11-01,0.00918,0.040048,-0.031618


## 3. FF3 Analysis

In [None]:
def ff3_analysis(df, n):
    
    from sklearn.linear_model import LinearRegression

    results = []

    for instrument, group in df.groupby('instrument'):

        group = group.sort_values('date') 

        beta_ICP  = []
        beta_MKT  = []
        beta_SMB  = []
        beta_HML  = []
        alpha     = []
        regr_resd = []
        regr_pred = []
        regr_r2   = []


        if len(group) < n:
            group['beta_ICP']  = [None] * len(group)
            group['beta_MKT']  = [None] * len(group)
            group['beta_SMB']  = [None] * len(group)
            group['beta_HML']  = [None] * len(group)
            group['alpha']     = [None] * len(group)
            group['regr_resd'] = [None] * len(group)
            group['regr_pred'] = [None] * len(group)
            group['regr_r2']   = [None] * len(group)
        
        else:

            for i in range(len(group) - n + 1):

                window = group.iloc[i:i+n]
                y = window['Return'].values
                X = window[['MKT', 'SMB', 'HML']].values

                model = LinearRegression()
                model.fit(X, y)

                beta_ICP.append(model.intercept_)
                beta_MKT.append(model.coef_[0])
                beta_SMB.append(model.coef_[1])
                beta_HML.append(model.coef_[2])
                alpha.append(model.intercept_ + y[-1] - model.predict(X)[-1])
                regr_pred.append(model.predict(X)[-1]) 
                regr_resd.append(y[-1] - model.predict(X)[-1]) 
                regr_r2.append(model.score(X, y))

            beta_ICP  = [None] * (n - 1) + beta_ICP
            beta_MKT  = [None] * (n - 1) + beta_MKT
            beta_SMB  = [None] * (n - 1) + beta_SMB
            beta_HML  = [None] * (n - 1) + beta_HML
            alpha     = [None] * (n - 1) + alpha
            regr_resd = [None] * (n - 1) + regr_resd
            regr_pred = [None] * (n - 1) + regr_pred
            regr_r2   = [None] * (n - 1) + regr_r2

            group['beta_ICP']  = beta_ICP
            group['beta_MKT']  = beta_MKT
            group['beta_SMB']  = beta_SMB
            group['beta_HML']  = beta_HML
            group['alpha']     = alpha
            group['regr_resd'] = regr_resd
            group['regr_pred'] = regr_pred
            group['regr_r2']   = regr_r2

        results.append(group)

    result_df = pd.concat(results).sort_index()
    return result_df

In [None]:
df_ff3_derived_daily = ff3_analysis(df_ff3_daily, 20)
df_ff3_derived_daily

Unnamed: 0,date,instrument,Return,MKT,SMB,HML,beta_ICP,beta_MKT,beta_SMB,beta_HML,alpha,regr_resd,regr_pred,regr_r2
0,2024-07-01,000001.SZ,0.019704,0.009363,-0.002061,0.009897,,,,,,,,
1,2024-07-01,000002.SZ,0.051948,0.009363,-0.002061,0.009897,,,,,,,,
2,2024-07-01,000004.SZ,-0.067010,0.009363,-0.002061,0.009897,,,,,,,,
3,2024-07-01,000006.SZ,0.005155,0.009363,-0.002061,0.009897,,,,,,,,
4,2024-07-01,000007.SZ,0.000000,0.009363,-0.002061,0.009897,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
505372,2024-11-25,688799.SH,0.014184,0.000948,0.023147,0.002566,-0.005781,1.029895,0.848759,0.269813,-0.007130,-0.001349,0.015534,0.623833
505373,2024-11-25,688800.SH,0.005558,0.000948,0.023147,0.002566,-0.009455,2.034651,0.650783,-1.089811,-0.008637,0.000817,0.004741,0.705752
505374,2024-11-25,688819.SH,0.020329,0.000948,0.023147,0.002566,-0.006382,1.128446,0.324784,0.061114,0.011585,0.017968,0.002362,0.374600
505375,2024-11-25,688981.SH,-0.029402,0.000948,0.023147,0.002566,-0.002912,2.022417,-1.065748,-1.100290,-0.003827,-0.000916,-0.028486,0.701021
