In [2]:
import pandas as pd
import numpy as np
import statsmodels.api as sm

In [33]:
# === 參數設定 ===
estimation_window_days = 90
event_window = [-1, 0, 1]

# === 讀取資料 ===
# 以 EVA 為例，可重複使用於其他公司
eva_df = pd.read_csv('./stock/return/6757_return.csv', parse_dates=['Date'])  # 包含 R_i,t
taiex_df = pd.read_csv('./stock/return/taiex_return.csv', parse_dates=['Date'])  # 包含 R_m,t
event_dates = pd.read_csv('./final/tiger.csv', parse_dates=['Event Date'])  # 僅含 Event Date 欄位

event_dates

Unnamed: 0,airline,Event Date,Label,Count,Avg Sentiment,Avg Positive Sentiment,Avg Negative Sentiment,Positive Count,Negative Count,Count[-1:+1],Avg Sentiment[-1:+1],Avg Positive Sentiment[-1:+1],Avg Negative Sentiment[-1:+1],Positive Count[-1:+1],Negative Count[-1:+1],Corporation size,LEV,Current ratio,EPS
0,tiger,2023-12-10,Label_2,6.0,-0.290922,0.0,-0.290922,0.0,6.0,6.0,-0.290922,0.0,-0.290922,0.0,6.0,810.0,0.7656,0.9534,4.27
1,tiger,2023-12-10,Label_4,5.0,-0.112486,0.056978,-0.154852,1.0,4.0,5.0,-0.112486,0.056978,-0.154852,1.0,4.0,810.0,0.7656,0.9534,4.27
2,tiger,2023-12-10,Label_5,4.0,-0.30907,0.27654,-0.504274,1.0,3.0,4.0,-0.30907,0.27654,-0.504274,1.0,3.0,810.0,0.7656,0.9534,4.27
3,tiger,2024-01-02,Label_2,5.0,-0.514286,0.0,-0.514286,0.0,5.0,5.0,-0.514286,0.0,-0.514286,0.0,5.0,795.0,0.6591,1.6242,6.16
4,tiger,2024-04-29,Label_2,4.0,-0.100835,0.026291,-0.227962,2.0,2.0,4.0,-0.100835,0.026291,-0.227962,2.0,2.0,795.0,0.6591,1.6242,6.16
5,tiger,2024-05-07,Label_4,5.0,-0.261078,0.056261,-0.340412,1.0,4.0,5.0,-0.261078,0.056261,-0.340412,1.0,4.0,795.0,0.6591,1.6242,6.16
6,tiger,2024-07-15,Label_5,4.0,-0.25945,0.233168,-0.423656,1.0,3.0,4.0,-0.25945,0.233168,-0.423656,1.0,3.0,795.0,0.6591,1.6242,6.16
7,tiger,2024-07-26,Label_1,3.0,-0.013849,0.413278,-0.227413,1.0,2.0,3.0,-0.013849,0.413278,-0.227413,1.0,2.0,795.0,0.6591,1.6242,6.16
8,tiger,2024-07-26,Label_3,4.0,-0.242732,0.017657,-0.329528,1.0,3.0,4.0,-0.242732,0.017657,-0.329528,1.0,3.0,795.0,0.6591,1.6242,6.16
9,tiger,2024-07-26,Label_7,3.0,-0.23667,0.086007,-0.398008,1.0,2.0,3.0,-0.23667,0.086007,-0.398008,1.0,2.0,795.0,0.6591,1.6242,6.16


In [34]:
# 合併市場報酬
merged = pd.merge(eva_df, taiex_df, on='Date', suffixes=('_i', '_m'))
merged.sort_values('Date', inplace=True)
trading_days = merged['Date'].reset_index(drop=True)

# === 計算 SAAR 與 SCAAR ===
results = []

for idx, row in event_dates.iterrows():
    original_event_date = row['Event Date']

    # 如果事件日不是交易日，調整為最近往後的交易日
    if original_event_date not in trading_days.values:
        future_dates = trading_days[trading_days > original_event_date]
        if future_dates.empty:
            continue
        event_date = future_dates.iloc[0]
    else:
        event_date = original_event_date

    # 計算估計期
    estimation_start = event_date - pd.Timedelta(days=estimation_window_days)
    estimation_end = event_date - pd.Timedelta(days=2)
    est_window = merged[(merged['Date'] >= estimation_start) & (merged['Date'] <= estimation_end)]

    if len(est_window) < 30:
        continue

    # 市場模型
    X = sm.add_constant(est_window['Return_m'])
    y = est_window['Return_i']
    model = sm.OLS(y, X).fit()
    alpha, beta = model.params

    # 計算 AR, SAR
    ar_list = []
    sar_list = []

    for offset in event_window:
        target_date = event_date + pd.Timedelta(days=offset)

        # 調整非交易日：0 與 +1 用未來最近交易日，-1 用過去最近交易日
        if target_date not in trading_days.values:
            if offset < 0:
                past_dates = trading_days[trading_days < target_date]
                if past_dates.empty:
                    ar_list.append(np.nan)
                    sar_list.append(np.nan)
                    continue
                target_date = past_dates.iloc[-1]
            else:
                future_dates = trading_days[trading_days > target_date]
                if future_dates.empty:
                    ar_list.append(np.nan)
                    sar_list.append(np.nan)
                    continue
                target_date = future_dates.iloc[0]

        row_t = merged[merged['Date'] == target_date]
        if row_t.empty:
            ar_list.append(np.nan)
            sar_list.append(np.nan)
            continue

        r_i = row_t['Return_i'].values[0]
        r_m = row_t['Return_m'].values[0]
        ar = r_i - (alpha + beta * r_m)
        ar_list.append(ar)

        sigma = np.std(y - model.predict(X), ddof=1)
        sar = ar / sigma if sigma != 0 else np.nan
        sar_list.append(sar)

    saar_dict = {f'SAAR_t{offset}': sar_list[i] for i, offset in enumerate(event_window)}
    scaar_n1_0 = np.nansum(sar_list[0:2])
    scaar_n1_p1 = np.nansum(sar_list)

    result = {
        'Original Event Date': original_event_date,
        'Adjusted Event Date': event_date,
        'SCAAR[-1,0]': scaar_n1_0,
        'SCAAR[-1,+1]': scaar_n1_p1
    }
    result.update(saar_dict)
    results.append(result)

# === 輸出 ===
df_result = pd.DataFrame(results)
df_result.to_csv('tiger_saar_scaar.csv', index=False)


In [35]:
df_result

Unnamed: 0,Original Event Date,Adjusted Event Date,"SCAAR[-1,0]","SCAAR[-1,+1]",SAAR_t-1,SAAR_t0,SAAR_t1
0,2023-12-10,2023-12-11,0.599635,0.71841,1.228425,-0.628791,0.118775
1,2023-12-10,2023-12-11,0.599635,0.71841,1.228425,-0.628791,0.118775
2,2023-12-10,2023-12-11,0.599635,0.71841,1.228425,-0.628791,0.118775
3,2024-01-02,2024-01-02,-0.29975,-0.283694,-0.786979,0.487228,0.016056
4,2024-04-29,2024-04-29,2.600922,2.087166,1.948928,0.651994,-0.513756
5,2024-05-07,2024-05-07,5.032724,7.490714,5.408064,-0.37534,2.45799
6,2024-07-15,2024-07-15,-3.885956,-5.518335,-1.671216,-2.214741,-1.632379
7,2024-07-26,2024-07-26,-2.429058,-3.567496,-0.152894,-2.276164,-1.138438
8,2024-07-26,2024-07-26,-2.429058,-3.567496,-0.152894,-2.276164,-1.138438
9,2024-07-26,2024-07-26,-2.429058,-3.567496,-0.152894,-2.276164,-1.138438
