In [20]:
import pandas as pd
import numpy as np
import sys
sys.path.append('..')

from data_center.binance_handler import BinanceHandHandler
from backtest.performance_generater import *
from backtest.factor_analysis_tool import FactorAnalysisTool
from backtest.operators import *

import alphalens
import quantstats as qs
import matplotlib.pyplot as plt
import plotly.graph_objects as go

### 用不考慮成本看 period 變化 績效與摩插成本的抵換

In [21]:
Open = BinanceHandHandler().get_factor_data('open')
High = BinanceHandHandler().get_factor_data('high')
Low  = BinanceHandHandler().get_factor_data('low')
Close = BinanceHandHandler().get_factor_data('close')
Volume = BinanceHandHandler().get_factor_data('volume')
Volvalue = BinanceHandHandler().get_factor_data('volvalue')
Takerbuy = BinanceHandHandler().get_factor_data('takerbuy')
Takerbuyvalue = BinanceHandHandler().get_factor_data('takerbuyvalue')

returns = Close.pct_change()
expreturn = Close.pct_change().shift(-2)
Benchmark = expreturn['BTCUSDT']

In [22]:
### 將資料太短或可疑資料去除

raw_factor_list = [Open, High, Low, Close, Volume, Volvalue, Takerbuy, Takerbuyvalue, returns, expreturn]

comparison_date = pd.to_datetime('2022-01-01')

columns_to_delete = []

for raw_factor in raw_factor_list:

    # 时间戳检查：删除首个非空条目时间戳大于 comparison_date 的列
    for column in raw_factor.columns:
        if raw_factor[column].dropna().index[0] >= comparison_date:
            columns_to_delete.append(column)

    for column in raw_factor.columns:
        # 计算唯一值占比
        unique_ratio = raw_factor[column].nunique() / len(raw_factor[column])
        if unique_ratio <= 0.35:
            columns_to_delete.append(column)


columns_to_delete = list(set(columns_to_delete))
for raw_factor in raw_factor_list:
    raw_factor.drop(columns=columns_to_delete, inplace=True)

for column in raw_factor_list:
    column.dropna(inplace = True)

In [23]:
# 去除 ETF效應
for raw_factor in raw_factor_list:
    del raw_factor['BTCUSDT']
    del raw_factor['ETHUSDT']

In [24]:
start_time = '2020-01-01'
end_time = '2024-04-05'

In [25]:
period = 21
rolling_returns = returns.rolling(window = period).mean()
rolling_std_dev = returns.rolling(window = period).std()
sharpe_factor = rolling_returns / rolling_std_dev
sharpe_factor = sharpe_factor.dropna()
sharpe_returns_by_period, _ = PerformanceGenerator(factor = sharpe_factor, 
                                            expreturn = expreturn,
                                            strategy = 'LS',
                                            buy_fee = 0.04/100,
                                            sell_fee = 0.04/100,
                                            start_time = start_time ,
                                            end_time = end_time,
                                            period_of_year = 365,
                                            benchmark = Benchmark
                                            ).backtest()
FactorAnalysisTool(factor = sharpe_factor, 
                                            expreturn = expreturn,
                                            strategy = 'LS',
                                            buy_fee = 0,#0.04/100,
                                            sell_fee = 0,#0.04/100,
                                            start_time = start_time ,
                                            end_time = end_time,
                                            period_of_year = 365,
                                            benchmark = Benchmark
                                            ).quantile_analysis(5)

starttime: 2021-03-24 00:00:00 endtime:  2024-04-05 00:00:00
+-------------+-----------------------+----------------------+--------------+----------------+--------------+------------+--------+----------+
|             | Cumprod Total Returns | Cumsum Total Returns | Sharpe Ratio | Annualized Ret | Max Drawdown | Volatility |  STD   | Turnover |
+-------------+-----------------------+----------------------+--------------+----------------+--------------+------------+--------+----------+
| Performance |       141.47 %        |       97.59 %        |     1.07     |    22.18 %     |   22.84 %    |  20.76 %   | 1.31 % | 32.67 %  |
|  Benchmark  |        35.14 %        |       87.89 %        |     0.39     |     7.08 %     |   76.67 %    |  51.15 %   | 3.22 % |   nan    |
+-------------+-----------------------+----------------------+--------------+----------------+--------------+------------+--------+----------+


Quantile 5 represents the highest factor value


### Momentum Factor

In [26]:
sum_ret_factor = returns.rolling(window=20).sum().dropna()
factor = sum_ret_factor
sum_ret_returns_by_period, _ = PerformanceGenerator(factor = factor, 
                                            expreturn = expreturn,
                                            strategy = 'LS',
                                            buy_fee = 0.04/100,
                                            sell_fee = 0.04/100,
                                            start_time = start_time ,
                                            end_time = end_time,
                                            period_of_year = 365,
                                            benchmark = Benchmark
                                            ).backtest()
FactorAnalysisTool(factor = factor, 
                    expreturn = expreturn,
                    strategy = 'LS',
                    buy_fee = 0,#.04/100,
                    sell_fee = 0,#.04/100,
                    start_time = start_time ,
                    end_time = end_time,
                    period_of_year = 365,
                    benchmark = Benchmark
                    ).quantile_analysis(5)

starttime: 2021-03-23 00:00:00 endtime:  2024-04-05 00:00:00
+-------------+-----------------------+----------------------+--------------+----------------+--------------+------------+--------+----------+
|             | Cumprod Total Returns | Cumsum Total Returns | Sharpe Ratio | Annualized Ret | Max Drawdown | Volatility |  STD   | Turnover |
+-------------+-----------------------+----------------------+--------------+----------------+--------------+------------+--------+----------+
| Performance |       235.85 %        |       135.79 %       |     1.19     |    31.66 %     |   26.39 %    |  25.91 %   | 1.63 % | 30.29 %  |
|  Benchmark  |        32.60 %        |       86.01 %        |     0.38     |     6.61 %     |   76.67 %    |  51.14 %   | 3.22 % |   nan    |
+-------------+-----------------------+----------------------+--------------+----------------+--------------+------------+--------+----------+


Quantile 5 represents the highest factor value


### 價量背離因子

In [27]:
pv_factor = ts_corr(returns,Volume,30)
factor = pv_factor 
pv_returns_by_period, _ = PerformanceGenerator(factor = factor , 
                                            expreturn = expreturn,
                                            strategy = 'LS',
                                            buy_fee = 0.04/100,
                                            sell_fee = 0.04/100,
                                            start_time = start_time ,
                                            end_time = end_time,
                                            period_of_year = 365,
                                            benchmark = Benchmark
                                            ).backtest()

FactorAnalysisTool(factor = factor , 
                    expreturn = expreturn,
                    strategy = 'LS',
                    buy_fee = 0,
                    sell_fee = 0,
                    start_time = start_time ,
                    end_time = end_time,
                    period_of_year = 365,
                    benchmark = Benchmark
                    ).quantile_analysis(5)

starttime: 2021-03-03 00:00:00 endtime:  2024-04-05 00:00:00
+-------------+-----------------------+----------------------+--------------+----------------+--------------+------------+--------+----------+
|             | Cumprod Total Returns | Cumsum Total Returns | Sharpe Ratio | Annualized Ret | Max Drawdown | Volatility |  STD   | Turnover |
+-------------+-----------------------+----------------------+--------------+----------------+--------------+------------+--------+----------+
| Performance |       107.55 %        |       81.78 %        |     0.92     |    17.69 %     |   23.56 %    |  19.84 %   | 1.25 % | 22.04 %  |
|  Benchmark  |        43.35 %        |       94.96 %        |     0.41     |     8.36 %     |   76.67 %    |  51.19 %   | 3.22 % |   nan    |
+-------------+-----------------------+----------------------+--------------+----------------+--------------+------------+--------+----------+


Quantile 5 represents the highest factor value


### Factor_bias

In [28]:
bias_factor =  ts_bias(df = Close, d = 27, method='sma')
factor = bias_factor 
bias_returns_by_period, _ = PerformanceGenerator(factor = factor, 
                                            expreturn = expreturn,
                                            strategy = 'LS',
                                            buy_fee = 0.04/100,
                                            sell_fee = 0.04/100,
                                            start_time = start_time ,
                                            end_time = end_time,
                                            period_of_year = 365,
                                            benchmark = Benchmark
                                            ).backtest()

FactorAnalysisTool(factor = factor, 
                    expreturn = expreturn,
                    strategy = 'LS',
                    buy_fee = 0,
                    sell_fee = 0,
                    start_time = start_time ,
                    end_time = end_time,
                    period_of_year = 365,
                    benchmark = Benchmark
                    ).quantile_analysis(5)

starttime: 2021-03-03 00:00:00 endtime:  2024-04-05 00:00:00
+-------------+-----------------------+----------------------+--------------+----------------+--------------+------------+--------+----------+
|             | Cumprod Total Returns | Cumsum Total Returns | Sharpe Ratio | Annualized Ret | Max Drawdown | Volatility |  STD   | Turnover |
+-------------+-----------------------+----------------------+--------------+----------------+--------------+------------+--------+----------+
| Performance |       179.16 %        |       121.14 %       |     0.93     |    25.73 %     |   29.17 %    |  28.95 %   | 1.82 % | 30.95 %  |
|  Benchmark  |        43.35 %        |       94.96 %        |     0.41     |     8.36 %     |   76.67 %    |  51.19 %   | 3.22 % |   nan    |
+-------------+-----------------------+----------------------+--------------+----------------+--------------+------------+--------+----------+


Quantile 5 represents the highest factor value


### factor CMO

In [29]:
cmo_factor = ts_cmo(Close, 20)
factor = cmo_factor.dropna() 
cmo_returns_by_period, _ = PerformanceGenerator(factor = factor, 
                                            expreturn = expreturn,
                                            strategy = 'LS',
                                            buy_fee = 0.04/100,
                                            sell_fee = 0.04/100,
                                            start_time = start_time ,
                                            end_time = end_time,
                                            period_of_year = 365,
                                            benchmark = Benchmark
                                            ).backtest()

FactorAnalysisTool(factor = factor, 
                    expreturn = expreturn,
                    strategy = 'LS',
                    buy_fee = 0,
                    sell_fee = 0,
                    start_time = start_time ,
                    end_time = end_time,
                    period_of_year = 365,
                    benchmark = Benchmark
                    ).quantile_analysis(5)

starttime: 2021-03-23 00:00:00 endtime:  2024-04-05 00:00:00
+-------------+-----------------------+----------------------+--------------+----------------+--------------+------------+--------+----------+
|             | Cumprod Total Returns | Cumsum Total Returns | Sharpe Ratio | Annualized Ret | Max Drawdown | Volatility |  STD   | Turnover |
+-------------+-----------------------+----------------------+--------------+----------------+--------------+------------+--------+----------+
| Performance |       194.78 %        |       118.72 %       |     1.22     |    27.82 %     |   22.36 %    |  22.05 %   | 1.39 % | 34.34 %  |
|  Benchmark  |        32.60 %        |       86.01 %        |     0.38     |     6.61 %     |   76.67 %    |  51.14 %   | 3.22 % |   nan    |
+-------------+-----------------------+----------------------+--------------+----------------+--------------+------------+--------+----------+


Quantile 5 represents the highest factor value


### Factor COPP

In [30]:
copp_factor = ts_copp(Close, roc1_period=15, roc2_period=15, wma_period=15)
factor = copp_factor.dropna()

# 进行回测，这里假设 PerformanceGenerator 和 get_returns_by_period 已经定义
copp_returns_by_period, _ = PerformanceGenerator(factor=factor, 
                                            expreturn=expreturn,
                                            strategy='LS',
                                            buy_fee=0.04/100,
                                            sell_fee=0.04/100,
                                            start_time = start_time ,
                                            end_time = end_time,
                                            period_of_year=365,
                                            benchmark=Benchmark).backtest()

starttime: 2021-04-01 00:00:00 endtime:  2024-04-05 00:00:00
+-------------+-----------------------+----------------------+--------------+----------------+--------------+------------+--------+----------+
|             | Cumprod Total Returns | Cumsum Total Returns | Sharpe Ratio | Annualized Ret | Max Drawdown | Volatility |  STD   | Turnover |
+-------------+-----------------------+----------------------+--------------+----------------+--------------+------------+--------+----------+
| Performance |       132.01 %        |       96.06 %        |     0.94     |    21.24 %     |   23.47 %    |  23.46 %   | 1.48 % | 12.43 %  |
|  Benchmark  |        17.50 %        |       73.57 %        |     0.33     |     3.76 %     |   76.67 %    |  51.19 %   | 3.22 % |   nan    |
+-------------+-----------------------+----------------------+--------------+----------------+--------------+------------+--------+----------+


In [31]:
df = pd.concat([sharpe_returns_by_period, sum_ret_returns_by_period, pv_returns_by_period, bias_returns_by_period, cmo_returns_by_period, copp_returns_by_period, Benchmark], axis=1).dropna()
df.columns = ['sharpe', 'sum_ret', 'PV', 'bias', 'cmo', 'copp', 'BTC']

# 计算相关系数
correlation_matrix = df.corr()

print(correlation_matrix)

           sharpe   sum_ret        PV      bias       cmo      copp       BTC
sharpe   1.000000  0.895007  0.582606  0.836579  0.945584  0.692836 -0.032687
sum_ret  0.895007  1.000000  0.655169  0.927127  0.913207  0.720955 -0.014047
PV       0.582606  0.655169  1.000000  0.660140  0.667319  0.434121 -0.002344
bias     0.836579  0.927127  0.660140  1.000000  0.893626  0.522732 -0.060454
cmo      0.945584  0.913207  0.667319  0.893626  1.000000  0.600613 -0.048079
copp     0.692836  0.720955  0.434121  0.522732  0.600613  1.000000  0.001489
BTC     -0.032687 -0.014047 -0.002344 -0.060454 -0.048079  0.001489  1.000000


In [32]:
Multi_alpha = sharpe_factor + sum_ret_factor + bias_factor + cmo_factor + copp_factor + pv_factor

factor = Multi_alpha.dropna()
Multi_returns_by_period, _ = PerformanceGenerator(factor = factor, 
                                                expreturn = expreturn,
                                                strategy = 'LS',
                                                buy_fee = 0.04/100,
                                                sell_fee = 0.04/100,
                                                start_time = start_time ,
                                                end_time = end_time,
                                                period_of_year = 365,
                                                benchmark = Benchmark
                                                ).backtest()


FactorAnalysisTool(factor = factor, 
                    expreturn = expreturn,
                    strategy = 'LS',
                    buy_fee = 0,
                    sell_fee = 0,
                    start_time = start_time ,
                    end_time = end_time,
                    period_of_year = 365,
                    benchmark = Benchmark
                    ).quantile_analysis(5)

starttime: 2021-04-02 00:00:00 endtime:  2024-04-05 00:00:00
+-------------+-----------------------+----------------------+--------------+----------------+--------------+------------+--------+----------+
|             | Cumprod Total Returns | Cumsum Total Returns | Sharpe Ratio | Annualized Ret | Max Drawdown | Volatility |  STD   | Turnover |
+-------------+-----------------------+----------------------+--------------+----------------+--------------+------------+--------+----------+
| Performance |       187.17 %        |       118.39 %       |     1.11     |    27.34 %     |   23.84 %    |  24.44 %   | 1.54 % | 21.18 %  |
|  Benchmark  |        21.40 %        |       76.78 %        |     0.34     |     4.54 %     |   76.67 %    |  51.19 %   | 3.22 % |   nan    |
+-------------+-----------------------+----------------------+--------------+----------------+--------------+------------+--------+----------+


Quantile 5 represents the highest factor value


In [33]:
Multi_alpha.columns.to_list()

['BNBUSDT',
 'AAVEUSDT',
 'AVAXUSDT',
 'ETCUSDT',
 'SOLUSDT',
 'ADAUSDT',
 'LINKUSDT',
 'BCHUSDT',
 'DOTUSDT',
 'LTCUSDT',
 'UNIUSDT',
 'XRPUSDT',
 'XLMUSDT',
 'MATICUSDT',
 'TRXUSDT',
 'DOGEUSDT',
 'DASHUSDT',
 'XEMUSDT',
 'ZECUSDT']

In [60]:
Multi_alpha = sharpe_factor + sum_ret_factor + bias_factor + cmo_factor # + copp_factor + pv_factor
ranked_df = Multi_alpha.dropna().rank(axis=1, pct=True)

# Replace values between 0.2 and 0.8 with NaN
# filtered_df = ranked_df.where(~((ranked_df > 0.2) & (ranked_df < 0.8)), np.nan)
filtered_df = ranked_df.where(~((ranked_df > 0.2) & (ranked_df < 0.8)), np.nan)
factor = filtered_df['2021-01-01':]

Multi_quantile_returns_by_period, _ = PerformanceGenerator(factor = factor, 
                                            expreturn = expreturn,
                                            strategy = 'LS',
                                            buy_fee = 0.04/100,
                                            sell_fee = 0.04/100,
                                            start_time = start_time ,
                                            end_time = end_time,
                                            period_of_year = 365,
                                            benchmark = Benchmark
                                            ).backtest()

starttime: 2021-03-29 00:00:00 endtime:  2024-04-05 00:00:00
+-------------+-----------------------+----------------------+--------------+----------------+--------------+------------+--------+----------+
|             | Cumprod Total Returns | Cumsum Total Returns | Sharpe Ratio | Annualized Ret | Max Drawdown | Volatility |  STD   | Turnover |
+-------------+-----------------------+----------------------+--------------+----------------+--------------+------------+--------+----------+
| Performance |       281.80 %        |       149.20 %       |     1.29     |    35.77 %     |   23.32 %    |  26.39 %   | 1.66 % |  5.19 %  |
|  Benchmark  |        17.98 %        |       73.97 %        |     0.33     |     3.85 %     |   76.67 %    |  51.12 %   | 3.22 % |   nan    |
+-------------+-----------------------+----------------------+--------------+----------------+--------------+------------+--------+----------+


In [61]:
import pandas as pd
from scipy.stats import spearmanr
from scipy import stats
# bias_factor
factor_df = bias_factor

# 计算每个日期的IC值
def calculate_ic(factor_df, returns_df):
    ic_series = pd.Series(index=factor_df.index)
    for date in factor_df.index:
        # 检查日期是否存在于returns_df中
        if date in returns_df.index:
            factor_values = factor_df.loc[date]
            return_values = returns_df.loc[date]
            # 仅考虑两个数据框都有数据的股票
            valid_index = factor_values.dropna().index.intersection(return_values.dropna().index)
            if not valid_index.empty:
                ic, _ = spearmanr(factor_values[valid_index], return_values[valid_index])
                ic_series[date] = ic
    return ic_series

ic_series = calculate_ic(factor_df, expreturn)

In [62]:
ic_mean = ic_series.mean()
print("Mean IC:", ic_mean)

ir = ic_series.mean() / ic_series.std()
print("IR:", ir)

n = ic_series.count()  # 有效IC值的数量
mean_ic = ic_series.mean()
std_ic = ic_series.std()
stderr = std_ic / (n ** 0.5)
t_stat = mean_ic / stderr
p_value = stats.t.sf(abs(t_stat), df=n-1)*2  # 双尾检验
print('T stat:', t_stat )
print('p value:', p_value)

Mean IC: -0.03316842058841176
IR: -0.11172967582398388
T stat: -3.7191048209728694
p value: 0.0002099533215656203


In [63]:
print('BTC:', Benchmark.std())
print('Multi:', Multi_returns_by_period.std())
print('Multi_quantile:', Multi_quantile_returns_by_period.std())


BTC: 0.03509821768679622
Multi: 0.01539570966306669
Multi_quantile: 0.01662141084247306


### 給ARAS

In [64]:
import pandas as pd
import matplotlib.pyplot as plt



profolio_manipulate = Multi_quantile_returns_by_period*.7 + Benchmark*.3

summary_df = get_performance_report(profolio_manipulate, benchmark=Benchmark, period_of_year=365)

starttime: 2021-03-29 00:00:00 endtime:  2024-04-05 00:00:00
+-------------+-----------------------+----------------------+--------------+----------------+--------------+------------+--------+
|             | Cumprod Total Returns | Cumsum Total Returns | Sharpe Ratio | Annualized Ret | Max Drawdown | Volatility |  STD   |
+-------------+-----------------------+----------------------+--------------+----------------+--------------+------------+--------+
| Performance |       214.87 %        |       126.63 %       |     1.49     |    46.11 %     |   25.43 %    |  28.09 %   | 1.47 % |
|  Benchmark  |        17.98 %        |       73.97 %        |     0.40     |     5.62 %     |   76.67 %    |  61.52 %   | 3.22 % |
+-------------+-----------------------+----------------------+--------------+----------------+--------------+------------+--------+
