In [1]:
import os
import datetime as dt
import time
from typing import Any, Dict, Optional, List

import requests
import pickle
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import talib
import multiprocessing as mp
from requests.exceptions import ConnectionError, Timeout

%matplotlib inline
plt.style.use("fivethirtyeight")

In [2]:
import akshare as ak


stock_info_sh_name_code_df = ak.stock_info_sh_name_code(indicator="主板A股")
stock_info_sh_name_code_df = stock_info_sh_name_code_df[["证券代码", "证券简称", "上市日期"]]
stock_info_sh_name_code_df.columns = ["symbol", "name", "time to market"]

print(stock_info_sh_name_code_df)


      symbol  name time to market
0     600000  浦发银行     1999-11-10
1     600004  白云机场     2003-04-28
2     600006  东风汽车     1999-07-27
3     600007  中国国贸     1999-03-12
4     600008  首创环保     2000-04-27
...      ...   ...            ...
1666  605580  恒盛能源     2021-08-19
1667  605588  冠石科技     2021-08-12
1668  605589  圣泉集团     2021-08-10
1669  605598  上海港湾     2021-09-17
1670  605599  菜百股份     2021-09-09

[1671 rows x 3 columns]


In [None]:
symbols = stock_info_sh_name_code_df.symbol.to_list()
symbols

In [35]:
start_date = "20211203"
end_date = "20230202"
adjust = ""
min_klines = 260

In [50]:
ohlc_list = []
for symbol in symbols:
    try:
        stock_zh_a_hist_df = ak.stock_zh_a_hist(symbol=symbol, period="daily", start_date=start_date, end_date=end_date, adjust=adjust)
        assert len(stock_zh_a_hist_df) != 0
        
        if stock_zh_a_hist_df is not None and len(stock_zh_a_hist_df) >= min_klines:
            stock_zh_a_hist_df = stock_zh_a_hist_df[["日期", "开盘", "最高", "最低", "收盘", "成交额"]]
            stock_zh_a_hist_df.columns = ["Date", "Open", "High", "Low", "Close", "Volume"]
            stock_zh_a_hist_df['Symbol'] = symbol
            stock_zh_a_hist_df.index.name = "Date"
            stock_zh_a_hist_df.set_index('Date', inplace=True)
            ohlc_list.append(stock_zh_a_hist_df)
        else:
            print(symbol)
    except Exception as e:
        print(e)


600532
600734
600781
600938
600941
601022
601059
601089
601136
603051
603052
603057
603070
603097
603102
603122
603130
603132
603150
603151
603163
603170
603173
603182
603191
603201
603206
603209
603211
603215
603235
603237
603255
603261
603272
603280
603281


In [51]:
ohlc_joined = pd.concat(ohlc_list)
ohlc_joined.info()

<class 'pandas.core.frame.DataFrame'>
Index: 458473 entries, 2021-12-03 to 2023-02-02
Data columns (total 6 columns):
 #   Column  Non-Null Count   Dtype  
---  ------  --------------   -----  
 0   Open    458473 non-null  float64
 1   High    458473 non-null  float64
 2   Low     458473 non-null  float64
 3   Close   458473 non-null  float64
 4   Volume  458473 non-null  float64
 5   Symbol  458473 non-null  object 
dtypes: float64(5), object(1)
memory usage: 24.5+ MB


In [53]:
ohlc_joined.isnull().sum()

Open      0
High      0
Low       0
Close     0
Volume    0
Symbol    0
dtype: int64

In [54]:
ohlc_joined.to_csv("/tmp/cnstock_daily_ohlc.csv", index=True)

In [55]:
# 上证指数

benchmark = ak.stock_zh_a_daily(symbol="sh000001", start_date=start_date, end_date=end_date)
benchmark.head()

Unnamed: 0,date,open,high,low,close,volume,outstanding_share,turnover
0,2021-12-03,3576.45,3608.47,3573.21,3607.43,37755100000.0,19405750000.0,1.945562
1,2021-12-06,3615.24,3626.13,3586.8,3589.31,41867670000.0,19405750000.0,2.157488
2,2021-12-07,3611.22,3614.22,3572.57,3595.09,40384260000.0,19405750000.0,2.081046
3,2021-12-08,3602.82,3637.72,3591.99,3637.57,36103670000.0,19405750000.0,1.860462
4,2021-12-09,3641.16,3688.4,3638.7,3673.04,43191820000.0,19405750000.0,2.225722


In [56]:
benchmark_ann_ret = benchmark.close.pct_change(252).iloc[-1]
benchmark_ann_ret

-0.07582849074329312

In [57]:
def screen(close: pd.Series, benchmark_ann_ret: float) -> pd.Series:
    
    """ 实现MM选股模型的逻辑，评估单只股票是否满足筛选条件

    Args:
        close(pd.Series): 股票收盘价，默认时间序列索引
        benchmark_ann_ret(float): 基准指数1年收益率，用于计算相对强弱
    """
    
    # 计算50，150，200日均线
    ema_50 = talib.EMA(close, 50).iloc[-1]
    ema_150 = talib.EMA(close, 150).iloc[-1]
    ema_200 = talib.EMA(close, 200).iloc[-1]

    # 200日均线的20日移动平滑，用于判断200日均线是否上升
    ema_200_smooth = talib.EMA(talib.EMA(close, 200), 20).iloc[-1]

    # 收盘价的52周高点和52周低点
    high_52week = close.rolling(52 * 5).max().iloc[-1]
    low_52week = close.rolling(52 * 5).min().iloc[-1]

    # 最新收盘价
    cl = close.iloc[-1]

    # 筛选条件1：收盘价高于150日均线和200日均线
    if cl > ema_150 and cl > ema_200:
        condition_1 = True
    else:
        condition_1 = False

    # 筛选条件2：150日均线高于200日均线
    if ema_150 > ema_200:
        condition_2 = True
    else:
        condition_2 = False

    # 筛选条件3：200日均线上升1个月
    if ema_200 > ema_200_smooth:
        condition_3 = True
    else:
        condition_3 = False

    # 筛选条件4：50日均线高于150日均线和200日均线
    if ema_50 > ema_150 and ema_50 > ema_200:
        condition_4 = True
    else:
        condition_4 = False

    # 筛选条件5：收盘价高于50日均线
    if cl > ema_50:
        condition_5 = True
    else:
        condition_5 = False

    # 筛选条件6：收盘价比52周低点高30%
    if cl >= low_52week * 1.3:
        condition_6 = True
    else:
        condition_6 = False

    # 筛选条件7：收盘价在52周高点的25%以内
    if cl >= high_52week * 0.75 and cl <= high_52week * 1.25:
        condition_7 = True
    else:
        condition_7 = False

    # 筛选条件8：相对强弱指数大于等于70
    rs = close.pct_change(252).iloc[-1] / benchmark_ann_ret * 100
    if rs >= 70:
        condition_8 = True
    else:
        condition_8 = False

    # 判断股票是否符合标准
    if (condition_1 and condition_2 and condition_3 and
        condition_4 and condition_5 and condition_6 and
        condition_7 and condition_8):
        meet_criterion = True
    else:
        meet_criterion = False

    out = {
        "rs": round(rs, 2),
        "close": cl,
        "ema_50": ema_50,
        "ema_150": ema_150,
        "ema_200": ema_200,
        "high_52week": high_52week,
        "low_52week": low_52week,
        "meet_criterion": meet_criterion
    }

    return pd.Series(out)

In [60]:
symbols_to_screen = list(ohlc_joined.Symbol.unique())

# 将数据框的格式从long-format转化为wide-format
ohlc_joined_wide = ohlc_joined.pivot(columns="Symbol", values="Close").fillna(method="ffill")

ohlc_joined_wide.head()


Symbol,600000,600004,600006,600007,600008,600009,600010,600011,600012,600015,...,605507,605555,605566,605567,605577,605580,605588,605589,605598,605599
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2021-12-03,8.59,11.12,7.4,14.25,3.34,46.02,2.87,7.23,6.82,5.69,...,28.0,48.67,29.93,15.05,12.23,17.17,43.15,37.07,15.15,12.25
2021-12-06,8.6,11.11,7.24,14.32,3.32,45.71,2.87,7.34,6.77,5.68,...,27.52,48.05,28.93,15.56,12.05,17.45,40.79,36.46,14.56,12.02
2021-12-07,8.68,11.66,7.22,14.33,3.32,46.37,2.8,7.78,6.82,5.7,...,27.18,44.03,29.15,17.12,12.03,17.25,40.3,35.35,16.02,12.4
2021-12-08,8.67,11.76,7.25,14.71,3.34,46.88,2.88,8.0,6.79,5.67,...,27.3,44.18,29.27,16.88,12.14,17.2,40.53,36.5,15.73,12.46
2021-12-09,8.75,11.74,7.25,14.7,3.33,47.03,3.04,8.47,6.87,5.69,...,27.4,44.2,29.38,16.6,12.26,17.2,40.74,36.6,16.29,12.81


In [61]:
%%time

results = ohlc_joined_wide.apply(screen, benchmark_ann_ret=benchmark_ann_ret)
results = results.T

CPU times: user 2.79 s, sys: 13.6 ms, total: 2.8 s
Wall time: 2.94 s


In [63]:
selected_stock = results.query("meet_criterion == True").sort_values("rs", ascending=False)

In [64]:
selected_stock.head()

Unnamed: 0_level_0,rs,close,ema_50,ema_150,ema_200,high_52week,low_52week,meet_criterion
Symbol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
603196,308.28,7.67,7.541629,7.371657,7.361485,10.01,5.5,True
601700,245.35,5.6,5.422216,5.411286,5.391683,7.05,3.94,True
603099,202.45,9.76,9.725942,9.305911,9.294125,11.85,6.96,True
603600,198.03,10.64,10.342012,10.14431,10.098209,12.8,7.31,True
603238,190.55,12.73,12.359023,11.782157,11.760891,16.74,9.43,True


In [65]:
stock_info_sh_name_code_df.columns = stock_info_sh_name_code_df.columns.str.replace('symbol', 'Symbol')
selected_stock_df = pd.merge(selected_stock, stock_info_sh_name_code_df, how='left', on='Symbol')
print(selected_stock_df)

    Symbol      rs  close     ema_50    ema_150    ema_200 high_52week  \
0   603196  308.28   7.67   7.541629   7.371657   7.361485       10.01   
1   601700  245.35    5.6   5.422216   5.411286   5.391683        7.05   
2   603099  202.45   9.76   9.725942   9.305911   9.294125       11.85   
3   603600  198.03  10.64  10.342012   10.14431  10.098209        12.8   
4   603238  190.55  12.73  12.359023  11.782157  11.760891       16.74   
5   605009  153.89  49.96   48.84551  45.714001  45.631241       57.67   
6   603719  152.56  36.08  35.318649  32.833506  32.665791       43.25   
7   603530   142.4   17.1  16.187027  15.508717  15.364439       22.19   
8   603118  134.65   8.97   8.523412   8.464434   8.373683       10.05   
9   600149  128.62   6.57    6.34372   6.106273   5.996465        7.41   
10  603880  128.09    6.6   6.352265   6.234801   6.228259         8.0   
11  600312  100.25   8.63   8.337892   8.145069   8.119575        9.85   
12  603328   88.03   7.13   6.884046  

In [69]:
selected_stock_df.to_csv("results/cnstock_daily_ohlc.csv", index=True)