## CN Stock Factors FF3

### 1. Import Packages

In [8]:
import dai
import pandas as pd
import numpy as np

In [9]:
import sys
sys.path.append("/home/aiuser/work/userlib/BigQuant_Resources_Collection/BigQuant_Resources_Collection/00_General_Resources/General_Tool")
from cn_general_tool import *

In [10]:
sd, ed = get_sd_ed('2024-07-01', '2024-11-25')

### 2. FF3 Factors

#### 2.1 FF3 General SQL

In [11]:
def ff3_get_sql_str(sql_base):
    sql = f"""
    WITH 
    data_base AS (
        {sql_base}
    ),
    data1 AS ( 
        SELECT 
            date, 
            instrument, 
            change_ratio, 
        FROM data_base 
    ), 
    data2 AS ( 
        SELECT DISTINCT
            date, 
            c_sum(float_market_cap * change_ratio) / c_sum(float_market_cap) AS MKT
        FROM data_base 
    ), 
    data3 AS (
        WITH 
        data3_0 AS (
            SELECT
                date,
                instrument,
                change_ratio,
                float_market_cap,
                c_pct_rank(float_market_cap) AS rank_sb,
                c_pct_rank(bp_ratio)         AS rank_lmh,
                CASE
                    WHEN rank_sb  < 0.5 THEN 1
                    ELSE 2
                END AS group_sb,
                CASE
                    WHEN rank_lmh < 0.3 THEN 1
                    WHEN rank_lmh > 0.7 THEN 3
                    ELSE 2
                END AS group_lmh,
            FROM data_base
        ),
        data3_sl AS (
            SELECT DISTINCT
                date,
                c_sum(float_market_cap * change_ratio) / c_sum(float_market_cap) AS SL
            FROM data3_0
            WHERE group_sb = 1 AND group_lmh = 1
        ),
        data3_sm AS (
            SELECT DISTINCT
                date,
                c_sum(float_market_cap * change_ratio) / c_sum(float_market_cap) AS SM
            FROM data3_0
            WHERE group_sb = 1 AND group_lmh = 2
        ),
        data3_sh AS (
            SELECT DISTINCT
                date,
                c_sum(float_market_cap * change_ratio) / c_sum(float_market_cap) AS SH
            FROM data3_0
            WHERE group_sb = 1 AND group_lmh = 3
        ),
        data3_bl AS (
            SELECT DISTINCT
                date,
                c_sum(float_market_cap * change_ratio) / c_sum(float_market_cap) AS BL
            FROM data3_0
            WHERE group_sb = 2 AND group_lmh = 1
        ),
        data3_bm AS (
            SELECT DISTINCT
                date,
                c_sum(float_market_cap * change_ratio) / c_sum(float_market_cap) AS BM
            FROM data3_0
            WHERE group_sb = 2 AND group_lmh = 2
        ),
        data3_bh AS (
            SELECT DISTINCT
                date,
                c_sum(float_market_cap * change_ratio) / c_sum(float_market_cap) AS BH
            FROM data3_0
            WHERE group_sb = 2 AND group_lmh = 3
        ),
        data3_merge AS (
            SELECT 
                date,
                (1/3) * (SL + SM + SH) - (1/3) * (BL + BM + BH) AS SMB,
                (1/2) * (SH + BH)      - (1/2) * (SL + BL)      AS HML,
            FROM data3_sl
            JOIN data3_sm USING (date)
            JOIN data3_sh USING (date)
            JOIN data3_bl USING (date)
            JOIN data3_bm USING (date)
            JOIN data3_bh USING (date)
        )
        SELECT * 
        FROM data3_merge
    ),
    data_merge AS ( 
        SELECT 
            date, 
            instrument, 
            change_ratio AS Return,
            MKT,
            SMB, 
            HML, 
        FROM data1 JOIN data2 USING (date) JOIN data3 USING (date)
        QUALIFY COLUMNS(*) IS NOT NULL
    )
    SELECT *
    FROM data_merge 
    ORDER BY date, instrument
    """
    return sql

#### 2.2 FF3 Daily

In [12]:
def ff3_factors_1d():
    sql_base = """
    SELECT
        date,
        instrument,
        change_ratio,
        float_market_cap,
        1 / pb AS bp_ratio,
    FROM cn_stock_prefactors
    WHERE instrument NOT LIKE '%BJ%'
    """
    return ff3_get_sql_str(sql_base)

In [13]:
sql_ff3_1d = ff3_factors_1d()

In [14]:
df_ff3_1d = dai.query(sql_ff3_1d, filters={'date':[sd, ed]}).df()
df_ff3_1d

Unnamed: 0,date,instrument,Return,MKT,SMB,HML
0,2024-07-01,000001.SZ,0.019704,0.009363,-0.002061,0.009897
1,2024-07-01,000002.SZ,0.051948,0.009363,-0.002061,0.009897
2,2024-07-01,000004.SZ,-0.067010,0.009363,-0.002061,0.009897
3,2024-07-01,000006.SZ,0.005155,0.009363,-0.002061,0.009897
4,2024-07-01,000007.SZ,0.000000,0.009363,-0.002061,0.009897
...,...,...,...,...,...,...
505372,2024-11-25,688799.SH,0.014184,0.000948,0.023147,0.002566
505373,2024-11-25,688800.SH,0.005558,0.000948,0.023147,0.002566
505374,2024-11-25,688819.SH,0.020329,0.000948,0.023147,0.002566
505375,2024-11-25,688981.SH,-0.029402,0.000948,0.023147,0.002566


#### 2.3 FF3 Weekly

In [None]:
def ff3_factors_1w():
    sql_base = """
    WITH 
    data_orgn AS (
        SELECT
            date,
            instrument,
            close,
            float_market_cap,
            total_market_cap,
            total_market_cap / pb AS total_book_value
        FROM cn_stock_prefactors
    ),
    data_freq AS (
        WITH 
        data1 AS (
            SELECT
                instrument,
                CONCAT(CAST(year(date) AS CHAR), CAST(week(date) AS CHAR)) AS period,
                FIRST_VALUE(date)            OVER (PARTITION BY period, instrument ORDER BY date ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS date,
                LAST_VALUE(close)            OVER (PARTITION BY period, instrument ORDER BY date ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS close,
                LAST_VALUE(float_market_cap) OVER (PARTITION BY period, instrument ORDER BY date ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS float_market_cap,
                LAST_VALUE(total_market_cap) OVER (PARTITION BY period, instrument ORDER BY date ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS total_market_cap,
                LAST_VALUE(total_book_value) OVER (PARTITION BY period, instrument ORDER BY date ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS total_book_value,
                ROW_NUMBER()                 OVER (PARTITION BY period, instrument ORDER BY date DESC) AS rn
            FROM data_orgn
        ),
        data2 AS (
            SELECT
                date,
                instrument,
                close,
                LAG(close) OVER (PARTITION BY instrument ORDER BY period) AS close_m_lag_1,
                float_market_cap,
                total_market_cap,
                total_book_value
            FROM data1
            WHERE rn = 1 
        )
        SELECT 
            date,
            instrument,
            close / close_m_lag_1 - 1 AS change_ratio,
            float_market_cap,
            total_book_value / total_market_cap AS bp_ratio
        FROM data2
    )
    SELECT
        date,
        instrument,
        change_ratio,
        float_market_cap,
        bp_ratio,
    FROM data_freq
    WHERE instrument NOT LIKE '%BJ%'
    ORDER BY date, instrument
    """
    return ff3_get_sql_str(sql_base)

In [None]:
sql_ff3_1w = ff3_factors_1w()

In [None]:
df_ff3_1w = dai.query(sql_ff3_1w, filters={'date':[sd, ed]}).df()
df_ff3_1w

Unnamed: 0,date,instrument,Return,MKT,SMB,HML
0,2024-07-08,000001.SZ,0.034102,0.016211,-0.007951,-0.012633
1,2024-07-08,000002.SZ,0.010174,0.016211,-0.007951,-0.012633
2,2024-07-08,000004.SZ,-0.041386,0.016211,-0.007951,-0.012633
3,2024-07-08,000006.SZ,0.013477,0.016211,-0.007951,-0.012633
4,2024-07-08,000007.SZ,0.023077,0.016211,-0.007951,-0.012633
...,...,...,...,...,...,...
106816,2024-11-25,688799.SH,0.014184,0.000944,0.023096,0.002642
106817,2024-11-25,688800.SH,0.005558,0.000944,0.023096,0.002642
106818,2024-11-25,688819.SH,0.020329,0.000944,0.023096,0.002642
106819,2024-11-25,688981.SH,-0.029402,0.000944,0.023096,0.002642


#### 2.4 FF3 Monthly

In [None]:
def ff3_factors_1m():
    sql_base = """
    WITH 
    data_orgn AS (
        SELECT
            date,
            instrument,
            close,
            float_market_cap,
            total_market_cap,
            total_market_cap / pb AS total_book_value
        FROM cn_stock_prefactors
    ),
    data_freq AS (
        WITH 
        data1 AS (
            SELECT
                instrument,
                CONCAT(CAST(year(date) AS CHAR), CAST(month(date) AS CHAR)) AS period,
                FIRST_VALUE(date)            OVER (PARTITION BY period, instrument ORDER BY date ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS date,
                LAST_VALUE(close)            OVER (PARTITION BY period, instrument ORDER BY date ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS close,
                LAST_VALUE(float_market_cap) OVER (PARTITION BY period, instrument ORDER BY date ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS float_market_cap,
                LAST_VALUE(total_market_cap) OVER (PARTITION BY period, instrument ORDER BY date ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS total_market_cap,
                LAST_VALUE(total_book_value) OVER (PARTITION BY period, instrument ORDER BY date ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS total_book_value,
                ROW_NUMBER()                 OVER (PARTITION BY period, instrument ORDER BY date DESC) AS rn
            FROM data_orgn
        ),
        data2 AS (
            SELECT
                date,
                instrument,
                close,
                LAG(close) OVER (PARTITION BY instrument ORDER BY period) AS close_m_lag_1,
                float_market_cap,
                total_market_cap,
                total_book_value
            FROM data1
            WHERE rn = 1 
        )
        SELECT 
            date,
            instrument,
            close / close_m_lag_1 - 1 AS change_ratio,
            float_market_cap,
            total_book_value / total_market_cap AS bp_ratio
        FROM data2
    )
    SELECT
        date,
        instrument,
        change_ratio,
        float_market_cap,
        bp_ratio,
    FROM data_freq
    WHERE instrument NOT LIKE '%BJ%'
    ORDER BY date, instrument
    """
    return ff3_get_sql_str(sql_base)

In [None]:
sql_ff3_1m = ff3_factors_1m()

In [None]:
df_ff3_1m = dai.query(sql_ff3_1m, filters={'date':[sd, ed]}).df()
df_ff3_1m

Unnamed: 0,date,instrument,Return,MKT,SMB,HML
0,2024-07-01,000001.SZ,-0.101057,-0.102712,-0.134107,0.006398
1,2024-07-01,000002.SZ,-0.153938,-0.102712,-0.134107,0.006398
2,2024-07-01,000004.SZ,-0.332209,-0.102712,-0.134107,0.006398
3,2024-07-01,000006.SZ,-0.513995,-0.102712,-0.134107,0.006398
4,2024-07-01,000007.SZ,-0.279037,-0.102712,-0.134107,0.006398
...,...,...,...,...,...,...
20299,2024-11-01,688799.SH,-0.001496,0.003465,0.030298,-0.005812
20300,2024-11-01,688800.SH,-0.048823,0.003465,0.030298,-0.005812
20301,2024-11-01,688819.SH,-0.012216,0.003465,0.030298,-0.005812
20302,2024-11-01,688981.SH,-0.008858,0.003465,0.030298,-0.005812


### 3. FF3 Analysis

In [None]:
def ff3_analysis(df, n):
    
    from sklearn.linear_model import LinearRegression

    results = []

    for instrument, group in df.groupby('instrument'):

        group = group.sort_values('date') 

        beta_ICP  = []
        beta_MKT  = []
        beta_SMB  = []
        beta_HML  = []
        alpha     = []
        regr_resd = []
        regr_pred = []
        regr_r2   = []


        if len(group) < n:
            group['beta_ICP']  = [None] * len(group)
            group['beta_MKT']  = [None] * len(group)
            group['beta_SMB']  = [None] * len(group)
            group['beta_HML']  = [None] * len(group)
            group['alpha']     = [None] * len(group)
            group['regr_resd'] = [None] * len(group)
            group['regr_pred'] = [None] * len(group)
            group['regr_r2']   = [None] * len(group)
        
        else:

            for i in range(len(group) - n + 1):

                window = group.iloc[i:i+n]
                y = window['Return'].values
                X = window[['MKT', 'SMB', 'HML']].values

                model = LinearRegression()
                model.fit(X, y)

                beta_ICP.append(model.intercept_)
                beta_MKT.append(model.coef_[0])
                beta_SMB.append(model.coef_[1])
                beta_HML.append(model.coef_[2])
                alpha.append(model.intercept_ + y[-1] - model.predict(X)[-1])
                regr_pred.append(model.predict(X)[-1]) 
                regr_resd.append(y[-1] - model.predict(X)[-1]) 
                regr_r2.append(model.score(X, y))

            beta_ICP  = [None] * (n - 1) + beta_ICP
            beta_MKT  = [None] * (n - 1) + beta_MKT
            beta_SMB  = [None] * (n - 1) + beta_SMB
            beta_HML  = [None] * (n - 1) + beta_HML
            alpha     = [None] * (n - 1) + alpha
            regr_resd = [None] * (n - 1) + regr_resd
            regr_pred = [None] * (n - 1) + regr_pred
            regr_r2   = [None] * (n - 1) + regr_r2

            group['beta_ICP']  = beta_ICP
            group['beta_MKT']  = beta_MKT
            group['beta_SMB']  = beta_SMB
            group['beta_HML']  = beta_HML
            group['alpha']     = alpha
            group['regr_resd'] = regr_resd
            group['regr_pred'] = regr_pred
            group['regr_r2']   = regr_r2

        results.append(group)

    result_df = pd.concat(results).sort_index()
    return result_df

In [None]:
df_ff3_derived_1d = ff3_analysis(df_ff3_1d, 20)
df_ff3_derived_1d

Unnamed: 0,date,instrument,Return,MKT,SMB,HML,beta_ICP,beta_MKT,beta_SMB,beta_HML,alpha,regr_resd,regr_pred,regr_r2
0,2024-07-01,000001.SZ,0.019704,0.009363,-0.002061,0.009897,,,,,,,,
1,2024-07-01,000002.SZ,0.051948,0.009363,-0.002061,0.009897,,,,,,,,
2,2024-07-01,000004.SZ,-0.067010,0.009363,-0.002061,0.009897,,,,,,,,
3,2024-07-01,000006.SZ,0.005155,0.009363,-0.002061,0.009897,,,,,,,,
4,2024-07-01,000007.SZ,0.000000,0.009363,-0.002061,0.009897,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
505372,2024-11-25,688799.SH,0.014184,0.000948,0.023147,0.002566,-0.005781,1.029895,0.848759,0.269813,-0.007130,-0.001349,0.015534,0.623833
505373,2024-11-25,688800.SH,0.005558,0.000948,0.023147,0.002566,-0.009455,2.034651,0.650783,-1.089811,-0.008637,0.000817,0.004741,0.705752
505374,2024-11-25,688819.SH,0.020329,0.000948,0.023147,0.002566,-0.006382,1.128446,0.324784,0.061114,0.011585,0.017968,0.002362,0.374600
505375,2024-11-25,688981.SH,-0.029402,0.000948,0.023147,0.002566,-0.002912,2.022417,-1.065748,-1.100290,-0.003827,-0.000916,-0.028486,0.701021
