In [1]:
import pandas as pd
import numpy as np
import sys
sys.path.append('..')

from data_center.binance_handler import BinanceHandHandler
from backtest.performance_generater import *
from backtest.factor_analysis_tool import FactorAnalysisTool
from backtest.operators import *

import alphalens
import quantstats as qs
import matplotlib.pyplot as plt
import plotly.graph_objects as go

In [2]:
Open = BinanceHandHandler().get_factor_data('open')
High = BinanceHandHandler().get_factor_data('high')
Low  = BinanceHandHandler().get_factor_data('low')
Close = BinanceHandHandler().get_factor_data('close')
Volume = BinanceHandHandler().get_factor_data('volume')
Volvalue = BinanceHandHandler().get_factor_data('volvalue')
Takerbuy = BinanceHandHandler().get_factor_data('takerbuy')
Takerbuyvalue = BinanceHandHandler().get_factor_data('takerbuyvalue')

returns = Close.pct_change()
expreturn = Close.pct_change().shift(-2)
Benchmark = expreturn['BTCUSDT']

In [3]:
### 將資料太短或可疑資料去除

raw_factor_list = [Open, High, Low, Close, Volume, Volvalue, Takerbuy, Takerbuyvalue, returns, expreturn]

comparison_date = pd.to_datetime('2022-01-01')

columns_to_delete = []

for raw_factor in raw_factor_list:

    # 时间戳检查：删除首个非空条目时间戳大于 comparison_date 的列
    for column in raw_factor.columns:
        if raw_factor[column].dropna().index[0] >= comparison_date:
            columns_to_delete.append(column)

    for column in raw_factor.columns:
        # 计算唯一值占比
        unique_ratio = raw_factor[column].nunique() / len(raw_factor[column])
        if unique_ratio <= 0.35:
            columns_to_delete.append(column)


columns_to_delete = list(set(columns_to_delete))
for raw_factor in raw_factor_list:
    raw_factor.drop(columns=columns_to_delete, inplace=True)

for column in raw_factor_list:
    column.dropna(inplace = True)

In [20]:
# 去除 ETF效應
for raw_factor in raw_factor_list:
    del raw_factor['BTCUSDT']
    del raw_factor['ETHUSDT']

In [21]:
start_time = '2020-01-01'
end_time = '2024-04-05'

In [22]:
period = 21
rolling_returns = returns.rolling(window = period).mean()
rolling_std_dev = returns.rolling(window = period).std()
sharpe_factor = rolling_returns / rolling_std_dev
sharpe_factor = sharpe_factor.dropna()
sharpe_returns_by_period, _ = PerformanceGenerator(factor = sharpe_factor, 
                                            expreturn = expreturn,
                                            strategy = 'LS',
                                            buy_fee = 0.04/100,
                                            sell_fee = 0.04/100,
                                            start_time = start_time ,
                                            end_time = end_time,
                                            period_of_year = 365,
                                            benchmark = Benchmark
                                            ).backtest()
FactorAnalysisTool(factor = sharpe_factor, 
                                            expreturn = expreturn,
                                            strategy = 'LS',
                                            buy_fee = 0,#0.04/100,
                                            sell_fee = 0,#0.04/100,
                                            start_time = start_time ,
                                            end_time = end_time,
                                            period_of_year = 365,
                                            benchmark = Benchmark
                                            ).quantile_analysis(5)

starttime: 2021-03-24 00:00:00 endtime:  2024-04-05 00:00:00
+-------------+-----------------------+----------------------+--------------+----------------+--------------+------------+--------+----------+
|             | Cumprod Total Returns | Cumsum Total Returns | Sharpe Ratio | Annualized Ret | Max Drawdown | Volatility |  STD   | Turnover |
+-------------+-----------------------+----------------------+--------------+----------------+--------------+------------+--------+----------+
| Performance |       139.91 %        |       96.93 %        |     1.06     |    22.00 %     |   22.84 %    |  20.75 %   | 1.31 % | 32.67 %  |
|  Benchmark  |         nan %         |        nan %         |     0.38     |     nan %      |   76.67 %    |  51.19 %   | 3.22 % |   nan    |
+-------------+-----------------------+----------------------+--------------+----------------+--------------+------------+--------+----------+


Quantile 5 represents the highest factor value


### Momentum Factor

In [23]:
sum_ret_factor = returns.rolling(window=20).sum().dropna()
factor = sum_ret_factor
sum_ret_returns_by_period, _ = PerformanceGenerator(factor = factor, 
                                            expreturn = expreturn,
                                            strategy = 'LS',
                                            buy_fee = 0.04/100,
                                            sell_fee = 0.04/100,
                                            start_time = start_time ,
                                            end_time = end_time,
                                            period_of_year = 365,
                                            benchmark = Benchmark
                                            ).backtest()
FactorAnalysisTool(factor = factor, 
                    expreturn = expreturn,
                    strategy = 'LS',
                    buy_fee = 0,#.04/100,
                    sell_fee = 0,#.04/100,
                    start_time = start_time ,
                    end_time = end_time,
                    period_of_year = 365,
                    benchmark = Benchmark
                    ).quantile_analysis(5)

starttime: 2021-03-23 00:00:00 endtime:  2024-04-05 00:00:00
+-------------+-----------------------+----------------------+--------------+----------------+--------------+------------+--------+----------+
|             | Cumprod Total Returns | Cumsum Total Returns | Sharpe Ratio | Annualized Ret | Max Drawdown | Volatility |  STD   | Turnover |
+-------------+-----------------------+----------------------+--------------+----------------+--------------+------------+--------+----------+
| Performance |       233.07 %        |       134.95 %       |     1.18     |    31.41 %     |   26.39 %    |  25.91 %   | 1.63 % | 30.29 %  |
|  Benchmark  |         nan %         |        nan %         |     0.37     |     nan %      |   76.67 %    |  51.18 %   | 3.22 % |   nan    |
+-------------+-----------------------+----------------------+--------------+----------------+--------------+------------+--------+----------+


Quantile 5 represents the highest factor value


### 價量背離因子

In [24]:
pv_factor = ts_corr(returns,Volume,30)
factor = pv_factor 
pv_returns_by_period, _ = PerformanceGenerator(factor = factor , 
                                            expreturn = expreturn,
                                            strategy = 'LS',
                                            buy_fee = 0.04/100,
                                            sell_fee = 0.04/100,
                                            start_time = start_time ,
                                            end_time = end_time,
                                            period_of_year = 365,
                                            benchmark = Benchmark
                                            ).backtest()

FactorAnalysisTool(factor = factor , 
                    expreturn = expreturn,
                    strategy = 'LS',
                    buy_fee = 0,
                    sell_fee = 0,
                    start_time = start_time ,
                    end_time = end_time,
                    period_of_year = 365,
                    benchmark = Benchmark
                    ).quantile_analysis(5)

starttime: 2021-03-03 00:00:00 endtime:  2024-04-05 00:00:00
+-------------+-----------------------+----------------------+--------------+----------------+--------------+------------+--------+----------+
|             | Cumprod Total Returns | Cumsum Total Returns | Sharpe Ratio | Annualized Ret | Max Drawdown | Volatility |  STD   | Turnover |
+-------------+-----------------------+----------------------+--------------+----------------+--------------+------------+--------+----------+
| Performance |       105.43 %        |       80.75 %        |     0.91     |    17.42 %     |   23.56 %    |  19.84 %   | 1.25 % | 22.04 %  |
|  Benchmark  |         nan %         |        nan %         |     0.40     |     nan %      |   76.67 %    |  51.23 %   | 3.23 % |   nan    |
+-------------+-----------------------+----------------------+--------------+----------------+--------------+------------+--------+----------+


Quantile 5 represents the highest factor value


### Factor_bias

In [25]:
bias_factor = ts_bias(df = Close, d = 27, method='sma')
factor = bias_factor 
bias_returns_by_period, _ = PerformanceGenerator(factor = factor, 
                                            expreturn = expreturn,
                                            strategy = 'LS',
                                            buy_fee = 0.04/100,
                                            sell_fee = 0.04/100,
                                            start_time = start_time ,
                                            end_time = end_time,
                                            period_of_year = 365,
                                            benchmark = Benchmark
                                            ).backtest()

FactorAnalysisTool(factor = factor, 
                    expreturn = expreturn,
                    strategy = 'LS',
                    buy_fee = 0,
                    sell_fee = 0,
                    start_time = start_time ,
                    end_time = end_time,
                    period_of_year = 365,
                    benchmark = Benchmark
                    ).quantile_analysis(5)

starttime: 2021-03-03 00:00:00 endtime:  2024-04-05 00:00:00
+-------------+-----------------------+----------------------+--------------+----------------+--------------+------------+--------+----------+
|             | Cumprod Total Returns | Cumsum Total Returns | Sharpe Ratio | Annualized Ret | Max Drawdown | Volatility |  STD   | Turnover |
+-------------+-----------------------+----------------------+--------------+----------------+--------------+------------+--------+----------+
| Performance |       179.03 %        |       121.08 %       |     0.93     |    25.71 %     |   29.17 %    |  28.94 %   | 1.82 % | 30.95 %  |
|  Benchmark  |         nan %         |        nan %         |     0.40     |     nan %      |   76.67 %    |  51.23 %   | 3.23 % |   nan    |
+-------------+-----------------------+----------------------+--------------+----------------+--------------+------------+--------+----------+


Quantile 5 represents the highest factor value


### factor CMO

In [26]:
cmo_factor = ts_cmo(Close, 20)
factor = cmo_factor.dropna() 
cmo_returns_by_period, _ = PerformanceGenerator(factor = factor, 
                                            expreturn = expreturn,
                                            strategy = 'LS',
                                            buy_fee = 0.04/100,
                                            sell_fee = 0.04/100,
                                            start_time = start_time ,
                                            end_time = end_time,
                                            period_of_year = 365,
                                            benchmark = Benchmark
                                            ).backtest()

FactorAnalysisTool(factor = factor, 
                    expreturn = expreturn,
                    strategy = 'LS',
                    buy_fee = 0,
                    sell_fee = 0,
                    start_time = start_time ,
                    end_time = end_time,
                    period_of_year = 365,
                    benchmark = Benchmark
                    ).quantile_analysis(5)

starttime: 2021-03-23 00:00:00 endtime:  2024-04-05 00:00:00
+-------------+-----------------------+----------------------+--------------+----------------+--------------+------------+--------+----------+
|             | Cumprod Total Returns | Cumsum Total Returns | Sharpe Ratio | Annualized Ret | Max Drawdown | Volatility |  STD   | Turnover |
+-------------+-----------------------+----------------------+--------------+----------------+--------------+------------+--------+----------+
| Performance |       193.10 %        |       118.14 %       |     1.22     |    27.65 %     |   22.36 %    |  22.04 %   | 1.39 % | 34.34 %  |
|  Benchmark  |         nan %         |        nan %         |     0.37     |     nan %      |   76.67 %    |  51.18 %   | 3.22 % |   nan    |
+-------------+-----------------------+----------------------+--------------+----------------+--------------+------------+--------+----------+


Quantile 5 represents the highest factor value


### Factor COPP

In [27]:
copp_factor = ts_copp(Close, roc1_period=15, roc2_period=15, wma_period=15)
factor = copp_factor.dropna()

# 进行回测，这里假设 PerformanceGenerator 和 get_returns_by_period 已经定义
copp_returns_by_period, _ = PerformanceGenerator(factor=factor, 
                                            expreturn=expreturn,
                                            strategy='LS',
                                            buy_fee=0.04/100,
                                            sell_fee=0.04/100,
                                            start_time = start_time ,
                                            end_time = end_time,
                                            period_of_year=365,
                                            benchmark=Benchmark).backtest()

starttime: 2021-04-01 00:00:00 endtime:  2024-04-05 00:00:00
+-------------+-----------------------+----------------------+--------------+----------------+--------------+------------+--------+----------+
|             | Cumprod Total Returns | Cumsum Total Returns | Sharpe Ratio | Annualized Ret | Max Drawdown | Volatility |  STD   | Turnover |
+-------------+-----------------------+----------------------+--------------+----------------+--------------+------------+--------+----------+
| Performance |       129.81 %        |       95.11 %        |     0.93     |    20.98 %     |   23.47 %    |  23.46 %   | 1.48 % | 12.43 %  |
|  Benchmark  |         nan %         |        nan %         |     0.32     |     nan %      |   76.67 %    |  51.23 %   | 3.23 % |   nan    |
+-------------+-----------------------+----------------------+--------------+----------------+--------------+------------+--------+----------+


In [28]:
df = pd.concat([sharpe_returns_by_period, sum_ret_returns_by_period, pv_returns_by_period, bias_returns_by_period, cmo_returns_by_period, copp_returns_by_period, Benchmark], axis=1).dropna()
df.columns = ['sharpe', 'sum_ret', 'PV', 'bias', 'cmo', 'copp', 'BTC']

# 计算相关系数
correlation_matrix = df.corr()

print(correlation_matrix)

           sharpe   sum_ret        PV      bias       cmo      copp       BTC
sharpe   1.000000  0.894983  0.582361  0.836528  0.945558  0.692656 -0.032982
sum_ret  0.894983  1.000000  0.655017  0.927146  0.913176  0.720828 -0.014273
PV       0.582361  0.655017  1.000000  0.660087  0.667168  0.433803 -0.002665
bias     0.836528  0.927146  0.660087  1.000000  0.893595  0.522566 -0.060586
cmo      0.945558  0.913176  0.667168  0.893595  1.000000  0.600407 -0.048318
copp     0.692656  0.720828  0.433803  0.522566  0.600407  1.000000  0.001208
BTC     -0.032982 -0.014273 -0.002665 -0.060586 -0.048318  0.001208  1.000000


In [29]:
Multi_alpha = sharpe_factor + sum_ret_factor + bias_factor + cmo_factor + copp_factor + pv_factor

factor = Multi_alpha.dropna()
Multi_returns_by_period, _ = PerformanceGenerator(factor = factor, 
                                                expreturn = expreturn,
                                                strategy = 'LS',
                                                buy_fee = 0.04/100,
                                                sell_fee = 0.04/100,
                                                start_time = start_time ,
                                                end_time = end_time,
                                                period_of_year = 365,
                                                benchmark = Benchmark
                                                ).backtest()


FactorAnalysisTool(factor = factor, 
                    expreturn = expreturn,
                    strategy = 'LS',
                    buy_fee = 0,
                    sell_fee = 0,
                    start_time = start_time ,
                    end_time = end_time,
                    period_of_year = 365,
                    benchmark = Benchmark
                    ).quantile_analysis(5)

starttime: 2021-04-02 00:00:00 endtime:  2024-04-05 00:00:00
+-------------+-----------------------+----------------------+--------------+----------------+--------------+------------+--------+----------+
|             | Cumprod Total Returns | Cumsum Total Returns | Sharpe Ratio | Annualized Ret | Max Drawdown | Volatility |  STD   | Turnover |
+-------------+-----------------------+----------------------+--------------+----------------+--------------+------------+--------+----------+
| Performance |       185.01 %        |       117.63 %       |     1.10     |    27.12 %     |   23.84 %    |  24.43 %   | 1.54 % | 21.18 %  |
|  Benchmark  |         nan %         |        nan %         |     0.33     |     nan %      |   76.67 %    |  51.23 %   | 3.23 % |   nan    |
+-------------+-----------------------+----------------------+--------------+----------------+--------------+------------+--------+----------+


Quantile 5 represents the highest factor value


In [30]:
Multi_alpha.columns.to_list()

['BNBUSDT',
 'AAVEUSDT',
 'AVAXUSDT',
 'ETCUSDT',
 'SOLUSDT',
 'ADAUSDT',
 'LINKUSDT',
 'BCHUSDT',
 'DOTUSDT',
 'LTCUSDT',
 'UNIUSDT',
 'XRPUSDT',
 'XLMUSDT',
 'MATICUSDT',
 'TRXUSDT',
 'DOGEUSDT',
 'DASHUSDT',
 'XEMUSDT',
 'ZECUSDT']

In [31]:
Multi_alpha = sharpe_factor + sum_ret_factor + bias_factor + cmo_factor # + copp_factor + pv_factor
ranked_df = Multi_alpha.dropna().rank(axis=1, pct=True)

# Replace values between 0.2 and 0.8 with NaN
filtered_df = ranked_df.where(~((ranked_df > 0.2) & (ranked_df < 0.8)), np.nan)
factor = filtered_df['2023-01-01':]

Multi_quantile_returns_by_period, _ = PerformanceGenerator(factor = factor, 
                                            expreturn = expreturn,
                                            strategy = 'LS',
                                            buy_fee = 0.04/100,
                                            sell_fee = 0.04/100,
                                            start_time = start_time ,
                                            end_time = end_time,
                                            period_of_year = 365,
                                            benchmark = Benchmark
                                            ).backtest()

starttime: 2023-01-01 00:00:00 endtime:  2024-04-05 00:00:00
+-------------+-----------------------+----------------------+--------------+----------------+--------------+------------+--------+----------+
|             | Cumprod Total Returns | Cumsum Total Returns | Sharpe Ratio | Annualized Ret | Max Drawdown | Volatility |  STD   | Turnover |
+-------------+-----------------------+----------------------+--------------+----------------+--------------+------------+--------+----------+
| Performance |        20.79 %        |       23.39 %        |     0.58     |    10.88 %     |   21.05 %    |  22.04 %   | 1.39 % |  4.91 %  |
|  Benchmark  |         nan %         |        nan %         |     2.12     |     nan %      |   20.01 %    |  40.09 %   | 2.53 % |   nan    |
+-------------+-----------------------+----------------------+--------------+----------------+--------------+------------+--------+----------+


In [32]:
import pandas as pd
from scipy.stats import spearmanr
from scipy import stats

factor_df = Multi_alpha

# 计算每个日期的IC值
def calculate_ic(factor_df, returns_df):
    ic_series = pd.Series(index=factor_df.index)
    for date in factor_df.index:
        # 检查日期是否存在于returns_df中
        if date in returns_df.index:
            factor_values = factor_df.loc[date]
            return_values = returns_df.loc[date]
            # 仅考虑两个数据框都有数据的股票
            valid_index = factor_values.dropna().index.intersection(return_values.dropna().index)
            if not valid_index.empty:
                ic, _ = spearmanr(factor_values[valid_index], return_values[valid_index])
                ic_series[date] = ic
    return ic_series

ic_series = calculate_ic(factor_df, expreturn)

In [33]:
ic_mean = ic_series.mean()
print("Mean IC:", ic_mean)

ir = ic_series.mean() / ic_series.std()
print("IR:", ir)

n = ic_series.count()  # 有效IC值的数量
mean_ic = ic_series.mean()
std_ic = ic_series.std()
stderr = std_ic / (n ** 0.5)
t_stat = mean_ic / stderr
p_value = stats.t.sf(abs(t_stat), df=n-1)*2  # 双尾检验
print('T stat:', t_stat )
print('p value:', p_value)

Mean IC: -0.02701286965576413
IR: -0.09805420418859148
T stat: -3.2550451469805926
p value: 0.00116826093754675


In [34]:
print('BTC:', Benchmark.std())
print('Multi:', Multi_returns_by_period.std())
print('Multi_quantile:', Multi_quantile_returns_by_period.std())


BTC: 0.035135173509368636
Multi: 0.01539066585224577
Multi_quantile: 0.013881728728236625


### 給ARAS

In [35]:
import pandas as pd
import matplotlib.pyplot as plt



profolio_manipulate = Multi_quantile_returns_by_period*.7 + Benchmark*.3

summary_df = get_performance_report(profolio_manipulate, benchmark=Benchmark, period_of_year=365)

starttime: 2023-01-01 00:00:00 endtime:  2024-04-03 00:00:00
+-------------+-----------------------+----------------------+--------------+----------------+--------------+------------+--------+
|             | Cumprod Total Returns | Cumsum Total Returns | Sharpe Ratio | Annualized Ret | Max Drawdown | Volatility |  STD   |
+-------------+-----------------------+----------------------+--------------+----------------+--------------+------------+--------+
| Performance |        80.65 %        |       62.87 %        |     2.06     |    60.04 %     |   18.86 %    |  24.27 %   | 1.27 % |
|  Benchmark  |       306.97 %        |       155.01 %       |     2.55     |    205.30 %    |   20.01 %    |  48.24 %   | 2.53 % |
+-------------+-----------------------+----------------------+--------------+----------------+--------------+------------+--------+
