# 月线多头

寻找月线多头

In [1]:
from alpha.notebook import *

await init_notebook()

In [None]:
df = scan()
df.style.format({
    "预测涨幅": "{:.0%}"
})

In [None]:
df.sort_values("预测涨幅", ascending=False)

# 相似均线拟合法

In [None]:
# 给定时间点和代码，生成拟合均线。再scan所有股票和所有时间点，将欧氏距离和实际涨幅分别作为x,y

def build_sample_line(code, frame:str, ma_groups=(5, 10, 20)):
    line_len = 7
    nbars = max(ma_groups) + line_len - 1
    bars = get_bars(code, nbars, frame, unit="1M")
    
    x = np.arange(line_len)
    
    vec = []
    dist = {}
    for win in ma_groups:
        ma = moving_average(bars["close"], win)[-line_len:]
        ma /= ma[0]
        coeff, pmae = polyfit(ma)
        vec.append(np.polyval(coeff, x))
        
    return vec

jl = build_sample_line("002791.XSHE", "2019-12-31")
jl

## 坚朗五金月线拟合

In [None]:
def search_jl(sample_lines, line_len=7, ma_groups=(5, 10, 20)):
    nbars = max(ma_groups) + line_len - 1
    results = []
    
    x = np.arange(line_len)
    threshold = {
        5: 0.03,
        10: 0.025,
        20:0.01
    }
    for code, bars in load_monthly_bars().items():
        for i in range(nbars, len(bars) - 1):
            bars_ = bars[i-nbars: i]
            
            frame = bars_['date'][-1]
            close = bars_['close']
            
            c0 = close[-1]
            c1 = bars["close"][i]
            
            pcr = c1/c0 - 1

            row = [code, get_name(code), frame, pcr]
            mas = []
            for win in ma_groups:
                ma = moving_average(close, win)[-line_len:]
                ma /= ma[0]
                mas.append(ma)

            
            distances = paired_distances(sample_lines, mas)
            row.extend(distances)

            results.append(row)
            
    return pd.DataFrame(results, columns=["code", "name", "frame", "pcr", "d5", "d10", "d20"])

jl = build_sample_line("002791.XSHE", "2019-12-31")
df_jl = search_jl(jl)
df_jl

In [None]:
df_jl[df_jl.d5<0.05]

## 鸿路钢构

In [None]:
mas_hl = {}

hl = get_bars('002541.XSHE', 29, '2020-01-23', unit='1M')
print(bars['date'][-3:])
for win in [5, 10, 20]:
    ma = moving_average(bars['close'], win)[-10:]
    ma /= ma[0]
    coeff, pmae = polyfit(ma)
    
    fitma = np.polyval(coeff, x)
    mas_hl[win] = fitma
mas_hl

In [None]:
from sklearn.metrics import euclidean_distances
from sklearn.metrics.pairwise import cosine_distances

x = np.arange(10)

colors = {
    5: 'r',
    10:'g',
    20:'blue'
}
for win in [5, 10, 20]:
    ma_hl = mas_hl[win]
    ma_jl = mas_jl[win]
    
    d = euclidean_distances([ma_hl], [ma_jl])
    print(d)
    
    plt.plot(x, ma_hl, ".", color=colors[win])
    plt.plot(x, ma_jl, "-", color=colors[win])

In [None]:
secs[secs.start_date < datetime.datetime(2018,1,1,0,0,0)]

# 机器学习方法

In [None]:
import pickle
def make_ds_month(path):
    end = arrow.get("2021-08-31").date()
    data = {}
    for code in secs[secs.start_date < datetime.datetime(2018,1,1,0,0,0)].index:
        bars = get_bars(code, 68, end, unit='1M')
        data[code] = bars
        
    path = os.path.expanduser(path)
    with open(path, "wb") as f:
        pickle.dump(data, f, protocol=5)

In [None]:
   
def search():
    results = []
    pred_samples = {
        5: 7,
        10: 10,
        20: 10
    }
    for code, bars in load_monthly_bars().items():
        for i in range(29, len(bars) - 1):
            bars_ = bars[i-29: i]
            
            frame = bars_['date'][-1]
            c0 = bars_['close'][-1]
            c1 = bars[i]['close']
            pcr = c1/c0 - 1

            row = [code, get_name(code), frame, pcr]
            for win in [5, 10, 20]:
                ma = moving_average(bars_['close'], win)[-10:]
                ma /= ma[0]
                (a,b,c), pmae = polyfit(ma)
                
                ypreds, _ = predict_by_moving_average(bars_['close'], win, 1, 1, pred_samples[win])
                if ypreds is not None:
                    pred = ypreds[0] / c0 - 1
                else:
                    pred = None
                    
                row.extend((a, b, pmae, pred))

            results.append(row)
                        
    return pd.DataFrame(results, columns=["code", "name", "frame", "actual", "a5", "b5", "pmae5", "pred5",
                                         "a10", "b10", "pmae10", "pred10",
                                         "a20", "b20", "pmae20", "pred20"])
            
df = search()

In [None]:
def preprocess(df):
    """prepare dataset for xgboost classification
    
    0 不可预测
    1 可用5月线预测
    2 可用10月线
    3 可用20月线
    """
    return {
        "X": df[["a5", "b5", "pmae5", "pred5",
                "a10", "b10", "pmae10", "pred10",
                "a20", "b20", "pmae20", "pred20"]],
        
        "y": labelling(df)
    }


In [None]:
def labelling(df, threshold=0.1):
    """"""
    labels = []
    for i in range(len(df)):
        pred5 = df.loc[i, "pred5"]
        pred10 = df.loc[i, "pred10"]
        pred20 = df.loc[i, "pred20"]
        actual = df.loc[i, "actual"]
        
        d = np.abs(np.array([pred5 - actual, pred10 - actual, pred20 - actual]))
        if min(d) > threshold:
            labels.append(0)
            continue
            
        pos = np.argmin(d)
        labels.append(pos + 1)
        
    return labels

In [None]:
import xgboost
from xgboost import XGBClassifier
from sklearn.metrics import classification_report
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from scipy.stats import randint, uniform


def train(X_train, y_train, X_test, y_test):
    model = XGBClassifier()
    
    params = {
            "colsample_bytree": uniform(0.7, 0.3),
            "gamma": uniform(0, 0.5),
            "learning_rate": uniform(0.01, 1),
            "max_depth": randint(2, 6),
            "n_estimators": randint(80, 150),
            "subsample": uniform(0.6, 0.4),
        }
    
    search = RandomizedSearchCV(
        model,
        param_distributions=params,
        random_state=78,
        n_iter=200,
        cv=10,
        verbose=1,
        n_jobs=1,
        return_train_score=True,
        refit=True,  # do the refit oursel
    )
    
    fit_params = {
            "eval_set": [(X_test, y_test)],
            "early_stopping_rounds": 5,
        }
    
    search.fit(X_train, y_train, **fit_params)
    
    best_model = search.best_estimator_
    preds = best_model.predict(X_test)
    report = classification_report(y_test, preds)
    print(report)
    
data = preprocess(df)
X, y = shuffle(data["X"], data["y"], random_state=78)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=78)
train(X_train, y_train, X_test, y_test)

# 拟合均线预测法

在a5, a10, a20, a30都大于0的情况下，预测下一个月的涨幅。当拟合线与实际ma误差小于0.01时，认为拟合有效。

In [None]:
class NotFitableError(Exception):
    pass

class NotProfitableError(Exception):
    pass

def scan(fit_err=0.01):
    with open("/Users/aaronyang/data/monthly_bars.pkl", "rb") as f:
        data = pickle.load(f)
        
    results = []
    for code, bars in data.items():
        name = get_name(code)
        
        for i in range(39, len(bars)-2):
            frame = bars["date"][i-1]
            close = bars["close"][i - 39: i]
            pcr = max(bars["close"][i:i+2])/close[-1] - 1
            max_pcr_pred = 0

            row = [name, code, frame, pcr]
            try:
                for win in [5, 10, 20, 30]:
                    fit_win = 7 if win == 5 else 10
                    ma = moving_average(close, win)[-fit_win:]
                    ma /= ma[0]
                            
                    coeff, pmae = polyfit(ma)
                    a, b, _ = coeff
                    
                    row.extend((a, b))
                    ma_fit = np.polyval(coeff, np.arange(fit_win))
                    d = euclidean_distances([ma_fit], [ma]).flatten()[0]
                    
                    if d > fit_err: # 当前均线没有指示意义
                        continue
                    
                    ypreds, _ = predict_by_moving_average(close, win, 2, err_threshold=1, n=fit_win)
                    if ypreds is not None:
                        pcr_pred = ypreds[-1] / close[-1] - 1
                        # 如果当前均线指示会有较大跌幅
                        if pcr_pred < 0 and abs(pcr_pred) > max(max_pcr_pred, 0.2):
                            raise NotProfitableError
                            
                        max_pcr_pred = max(max_pcr_pred, pcr_pred)
                        
                row.append(max_pcr_pred)
                results.append(row)
            except (NotFitableError, NotProfitableError):
                pass
            
    return pd.DataFrame(results, columns=["name", "code", "frame", "pcr", "a5", "b5", 
                                          "a10", "b10", "a20", "b20", "a30", "b30", "pred"])
        

In [None]:
df = scan(0.005)
df

In [None]:
pred = df[df.pred!=0]
pred["gap"] = pred.pred - pred.pcr

In [None]:
pred.gap.describe()

In [None]:
pd.set_option('display.max_rows', 500)
pred[(pred.code=="000422.XSHE")]

# 湖北宜化相似曲线方法

In [None]:
import matplotlib.pyplot as plt

lines = []

# till 2021-05-31
hbyh = data["000422.XSHE"][:-3]
for win in [5, 10, 20, 30]:
    line = moving_average(hbyh["close"], win)[-14:]
    line /= line[0]
    lines.append(line.tolist())
    
hbyh_vec = np.array(lines, dtype=np.float32)
for vec in hbyh_vec:
    plt.plot(vec)
    
coeff, pmae = polyfit(lines[0])
yfit = np.polyval(coeff, np.arange(0, 14))
plt.plot(yfit)

In [None]:
class NotEnoughDataError(Exception):
    pass
def search_like_hbyh():
    results = []
    for code, bars in data.items():
        name = get_name(code)
        for i in range(43, len(bars)-2):
            frame = bars["date"][i-1]
            close = bars["close"][i-43:i]
            pcr = max(bars["close"][i:i+2])/close[-1] - 1
            
            row_result = [name, code, frame, pcr]
            lines = []
            try:
                for win in [5, 10, 20, 30]:
                    line = moving_average(close, win)[-14:]
                    if len(line) != 14:
                        raise NotEnoughDataError
                    line /= line[0]

                    lines.append(line.tolist())
            except NotEnoughDataError:
                pass
                
            d = paired_distances(hbyh_vec, lines)
            row_result.extend(d)
            results.append(row_result)
            
    return pd.DataFrame(results, columns=["name", "code", "frame", "pcr", "d5", "d10", "d20", "d30"])
df = search_like_hbyh()            

In [None]:
thres = 0.15
df[(df.d5<thres) & (df.d10 < thres) & (df.d20<thres) &(df.d30<thres)]

In [None]:
df["dsum"] = df.d5+df.d10+df.d20+df.d30
df.nsmallest(10, "dsum")

# 股价走势法
直接使用涨跌幅，再求相似曲线。结果不佳

In [None]:
lines = []

# till 2021-05-31
hbyh = data["000422.XSHE"][-52:-3]

close = hbyh["close"]
hbyh_pcr = close[1:]/close[:-1] - 1

In [None]:
def search_like_hbyh_pcr():
    results = []
    for code, bars in data.items():
        name = get_name(code)
        for i in range(49, len(bars)-2):
            frame = bars["date"][i-1]
            close = bars["close"][i-49:i]
            
            pcrs = close[1:]/close[:-1] - 1
            pcr = max(bars["close"][i:i+2])/close[-1] - 1
            
            row_result = [name, code, frame, pcr]
            
            d = euclidean_distances([pcrs], [hbyh_pcr]).flatten()[0]
            row_result.append(d)
            results.append(row_result)
        
    return pd.DataFrame(results, columns=["name", "code", "frame", "pcr", "d"])

df = search_like_hbyh_pcr()

In [None]:
df.nsmallest(10, "d")

# 参考库

In [None]:
pool = []
cm = {
    5: "b",
    10: "g",
    20: "c",
    30: "m",
    60: "y",
    120: "tab:orange",
    "raw": "tab:gray",
}

In [None]:
# 找出两个月内涨幅超过50%的所有个股

with open("/Users/aaronyang/data/monthly_bars.pkl", "rb") as f:
    data = pickle.load(f)
    
def find_bulls(adv=0.5):
    results = []
    ma_groups = [5, 10, 20, 30]
    samples = 14
    n = max(ma_groups) + samples - 1
    ylen=5
    
    for code, bars in data.items():
        name = get_name(code)
#         if code != "600084.XSHG":
#             continue
            
        if len(bars) < n:
            continue

        for i in range(n, len(bars) - ylen):
            xbars = bars[i-n:i].copy()
            ybars = bars[i:i+ylen].copy()
            
            frame = xbars["date"][-1]
            xclose = xbars["close"]
            yclose = ybars["close"]
            
            pcr = max(yclose) / xclose[-1] - 1
            
            if pcr < adv:
                continue

            results.append((name, code, frame, pcr))
            
            fig, ax = plt.subplots(nrows=1, ncols=1)

            xclose /= xclose[0]
            
            for win in [5, 10, 20, 30]:
                ma = moving_average(xclose, win)[-samples:]
                ax.plot(ma, color=cm[win])
                
                
            fig.savefig(f'/tmp/find_bulls/{name}-{frame}-{pcr:.2f}.png')
            plt.close(fig)

    df = pd.DataFrame(results, columns=["name", "code", "frame", "pcr"])
    return df
find_bulls(adv=2)

In [None]:
with open("/Users/aaronyang/data/monthly_bars.pkl", "rb") as f:
    data = pickle.load(f)

In [None]:
def draw_maline(code, end, inline_show=True):
    if "." not in code:
        if code.startswith("6"):
            code += ".XSHG"
        else:
            code += ".XSHE"
        
    bars = data[code].copy()
    end_pos = np.argmax(bars["date"]>=arrow.get(end).date())
    frame = bars["date"][end_pos]
    assert end_pos >= 43
    close = bars["close"][end_pos - 42: end_pos + 1]

    close /= close[0]
    
    if not inline_show:
        fig, ax = plt.subplots(nrows=1, ncols=1)
        
        for win in [5, 10, 20, 30]:
            ma = moving_average(close, win)
            ax.plot(ma[-14:])
            ax.text(13, ma[-1], f"{win}")
        fig.savefig(f"/tmp/find_bulls/{code}_{end}.png")
        plt.close(fig)
    else:
        for win in [5, 10, 20, 30]:
            ma = moving_average(close, win)
            plt.plot(ma[-14:])
            plt.text(13, ma[-1], f"{win}")

In [None]:
samples = """st众泰 2021-01-29 1折超跌，月线大阳（30%）
st众泰 2021-02-26 1折超跌，短线金叉
st众泰 2021-04-30 1折超跌，短线金叉
st双环 2021-05-31 2折底部黄金坑
st商城 2021-02-26 底部短线金叉
st天成 2021-04-29 1折超跌
st新光 2021-02-26 1折超跌
st节能 2021-03-31 1折超跌，均线粘合，金叉
st赫美 2021-05-31 1折超跌，均线粘合，金叉
st天山 2020-07-31 均线粘合，月线收阳，均线拐头
三星医疗 2021-04-30 均线粘合后金叉发散，连涨4月
三诺生物 2020-04-30 短线金叉，长线走平或者拐头
世名科技 2020-04-30 均线粘合，短线金叉。新股接近首日开盘价整理多月。
东富龙 2020-04-30 整理后均线金叉。注意有显著放量
东富龙 2021-03-31 长线多头，短线黄金坑
中国中免 2020-05-29 长线多头，短线下黄金坑，月线平台突破
中国北车 2014-10-24 均线粘合，长线多头，短线黄金坑
中毅达 2021-03-31 短线多头，月线上穿均线，长线拐头
中能电气 2020-09-30 短线金叉，多头排列，突破平台
亚厦股份 2020-03-31 均线粘合。大阳带动均线形成多头。
光启技术 2020-06-30 均线粘合后再度洗盘，当月收十字星
全志科技 2021-04-30 均线多头，短线黄金坑
凯撒文化 2020-05-29 均线粘合，大阳穿4线
华银电力 2021-02-26 短线金叉。长下影
*华鹏飞 周线机会
博晖创新 2020-06-30 均线金叉。长线拐头
双林股份 2020-07-31 均线粘合，拐头。连续两月放量
*君正集团 2020-06-30 均线粘合，底部整理两月。短线拐头。板块驱动
*国民技术 2021-05-31 均线粘合，一阳穿4线。
国科微 2021-03-31 长线向上支撑，短线止跌
国科微 2021-04-30 多头初现。短线黄金坑
大豪科技 2020-11-23 短线金叉，停牌利好？
天际股份 2020-09-30 次新股价接近首日开盘价，均线拐头
天际股份 2021-04-30 长线多头，短线整理后重拾升势
天齐锂业 2020-11-30 短线金叉。缩量整理
奥园美谷 2020-11-30 均线粘合，短线金叉，一阳穿4线
奥园美谷 2021-03-31 均线多头强势首阴
姚记科技 2019-11-29 均线金叉，短线多头，长线拐头
宝鼎科技 2019-08-30 均线金叉，近期放量，底部黄金坑
富临精工 2021-05-31 次新，均线多头，短线黄金坑。月线突破平台。
富满电子 2021-03-31 次新，均线多头，短线黄金坑，两上升小阳线表明企稳。
小康股份 2021-02-26 次新，均线金叉，底部巨量。长线拐头
小康股份 2021-03-31 次新，均线金叉，底部巨量。多头排列。
岩石股份 2021-02-26 30线支撑，5线黄金坑。均线金叉。突破平台。
川能动力 2020-10-30 金叉，多头排列。
斯莱克 2021-04-30 短线黄金坑，长线多头。
昌红科技 2020-04-30 均线多头，短期洗盘完成
*星徽股份
星期六 2019-11-29 均线粘合后发散
晨曦航空 2020-08-31 均线粘合，金叉。30线向上加速。
晶方科技 2019-11-29 10线黄金坑。5线上升态。余线拐头
晶瑞电材 2021-05-31 均线多头，5线黄金坑
智飞生物 2020-05-29 均线多头
朗姿股份 2020-10-30 均线金叉。缩量整理。
朗姿股份 2020-11-30 均线金叉，多头。
朗姿股份 2021-01-29 均线多头。短线缩量调整。
未名医药 2020-03-31 均线金叉。底部放量
未来股份 2020-05-29 2折超跌，底部放量吸筹。5线上攻，4连阳
格力地产 2020-04-30 120线支撑，均线粘合，一阳多线。
* 森特股份 2021-02-26 
模塑科技 2019-12-31 均线粘合、金叉。底部4连阳。
永兴材料 2020-10-30 均线多头
永兴材料 2021-06-30 均线多头
汇金股份 2020-07-31 均线多头，金叉
江特电机 2020-11-30 2折股，短线金叉
江特电机 2021-06-30 金叉，多头。缩量调整。
*沈阳化工 2020-07-31 仙人指路后收首阳
泉阳泉 2020-07-31 5线拐头。
泉阳泉 2020-10-30 均线金叉收阳。
泰达股份 2020-01-23 均线金叉，收大阳
洪都航空 2020-06-30 5线黄金坑
海利生物 2020-06-30 均线多头
海南椰岛 2021-03-31 均线多头、金叉。短线整理后收阳
海汽集团 2020-06-30 均线多头
*润和软件 2021-04-30 
深物业A 2020-05-29 底部3连阳。均线走平。
漫步者 2019-09-30 金叉，10线加速上攻
*王府井 2020-05-30 
盛和资源 2020-12-31 金叉，10线加速上攻
省广集团 2020-03-31 3折超跌，均线底部粘合
石大胜华 2020-08-31 均线多头，5线黄金坑
石大胜华 2021-02-26 均线多头，缩量整理
秀强股份 2020-01-23 底部企稳上攻，金叉
*美邦服饰 2021-02-26 企稳后洗盘
联创股份 2021-06-30 均线粘合、多头
联络互动 2021-04-30 1折超跌，金叉上攻
舍得酒业 2021-03-31 均线多头，缩量整理后重拾升势
航锦科技 2019-11-29 均线粘合，4月筑底，一阳多线
苏宁环球 2021-03-31 均线多头，缩量整理
蓝英装备 2020-05-29 金叉上攻，短线多头
藏格控股 2021-03-31 金叉
融捷股份 2021-06-30 均线多头，缩量整理
西藏矿业 2021-04-30 均线多头
西藏药业 2020-05-29 均线粘合后发散
达安基因 2020-05-29 均线金叉，缩量整理
*郑州煤电 2020-10-30 均线金叉
酒鬼酒 2020-05-29 均线多头，10线强势支撑
金刚玻璃 2021-03-31 30线支撑，5线黄金坑,平台突破
金力泰 2020-06-30 贴地飞行
金发拉比 2021-03-31 均线粘合，5线上攻金叉
金种子酒 2020-10-30 均线多头，5线黄金坑
锦泓集团 2021-03-31 贴地飞行后向下洗盘，收两阳
长城汽车 2021-05-31 30线黄金坑。大涨突破平台后调整。
长城汽车 2020-08-31 均线多头，短线整理后重拾升势
长川科技 2021-04-30 均线多头，整理吸引上影线
*长源电力 2021-03-31
阳光电源 2020-08-31 均线多头，强势
阳光电源 2021-05-31 均线多头，整理后重拾升势
鸿路钢构 2020-04-30 均线多头"""

patterns = []

for line in samples.split("\n"):
    if line[0].startswith("*"):
        continue
        
    items = line.split(" ")
    if len(items) == 1:
        continue
        
    name, frame, *_ = items
    patterns.append((name, frame))
    

In [None]:
pattern_pool = []
with open("/Users/aaronyang/data/monthly_bars.pkl", "rb") as f:
    data = pickle.load(f)
    
def build_pattern_pool(samples):
    for (name, end) in samples:
        try:
            code = get_code(name)

            end = arrow.get(end).date()

            bars = data[code]
            end_pos = np.argmax(bars["date"]>=end)
            frame = bars["date"][end_pos]

            close = bars["close"].copy()[end_pos - 42: end_pos + 1]
            close /= close[0]

            vectors = []
            for win in [5, 10, 20, 30]:
                ma = moving_average(close, win)[-14:]
                vectors.extend(ma)
            pattern_pool.append(vectors)
        except Exception as e:
            print(f"Error: {name} {end} {str(e)}")
        
build_pattern_pool(patterns)
print(len(pattern_pool), len(patterns))

In [None]:
for (name, end) in patterns:
    code = get_code(name)
    draw_maline(code, end, False)

In [None]:
with open("/Users/aaronyang/data/monthly_bars.pkl", "rb") as f:
    data = pickle.load(f)
def search_sim_vec():
    results = []
    for (code, bars) in data.items():
        for i in range(43, len(bars)):
            frame = bars["date"][i]
            close = bars["close"].copy()[i-43:i]
            close /= close[0]
            vec = []
            for win in [5, 10, 20, 30]:
                ma = moving_average(close, win)[-14:]
                vec.extend(ma)
                
            d = euclidean_distances(pattern_pool, [vec]).flatten()
            pos = np.argmin(d)
            results.append((get_name(code), frame, *patterns[pos], np.min(d)))
    return pd.DataFrame(results, columns=["被检", "frame", "对照", "对照时间", "距离"])

sim_vecs = search_sim_vec()  
sim_vecs

In [None]:
sim_vecs.rename(columns={"被检": "sample", "对照": "ref", "距离":"dis"}, inplace=True)

In [None]:
sim_vecs[(sim_vecs.dis > 1e-3) & (sim_vecs.dis<0.15) &(sim_vecs.frame>=datetime.date(2021,8,1))]

In [None]:
def parallel_show(name1, frame1, name2, frame2):
    code1 = get_code(name1)
    code2 = get_code(name2)

    fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(16, 5))

    codes = [code1, code2]
    frames = [frame1, frame2]
    
    for i in range(2):
        ax = axes[i]
        code = codes[i]
        frame = frames[i]
        
        bars = data[code].copy()
        end_pos = np.argmax(bars["date"]>=arrow.get(frame).date())
        close = bars["close"][end_pos - 42: end_pos + 1]

        close /= close[0]
    
        ax.text(0, 0.95, f"{code} {frame}", transform=ax.transAxes)
        for win in [5, 10, 20, 30]:
            ma = moving_average(close, win)
            ax.plot(ma[-14:])
            ax.text(13, ma[-1], f"{win}")


In [None]:
for row in sim_vecs[(sim_vecs.dis > 1e-3) & (sim_vecs.dis<0.07)].to_records():
    _, name1, frame1, name2, frame2, _ = row
    parallel_show(name1, frame1, name2, frame2)

# 月线涨幅大于35%,首月大涨的

In [None]:
for code in choose_stocks():
    bars = get_bars(code, 2, end=None, unit="1M")
    if len(bars) != 2 or bars["date"][1] != datetime.date(2021, 9, 30):
        continue
        
    close = bars["close"]
    adv = close[1]/close[0] - 1
    if 0.25 < adv < 0.4:
        print(get_name(code), code)

In [None]:
240 * 250 * 5000/(1000*1000*1000)

# 月线三连阳

底部启动，月线三连阳且放量，均线金叉

In [29]:
def features(code, name, bars, results, frame_type):    
    frame = bars["frame"][-1]
    high, low, close = bars["high"], bars["low"], bars["close"]
    wr = np.round((close[-1]-min(low))/(max(high)-min(low)),2)
    
    volume = bars["volume"]
    vr = np.round(np.max(volume[-3:]) / np.min(volume[-6:]), 1)
    
    bullish = np.all((bars["close"] > bars["open"])[-3:])
    
    if not bullish:
        return

    if len(bars) > 12:
        ma10 = moving_average(close, 10)[-4:]
        ma5 = moving_average(close, 5)[-4:]
        
        pos = np.argwhere(ma5 < ma10).flatten()
        if len(pos) == 0:
            return
        
        for p in pos:
            if p == 4:
                break
                
            if ma5[p+1] > ma10[p+1]: 
                results.append([name, code, frame, wr, vr, 4 - p])
                break

results = await scan(features, 13, '1M', nstocks=None, silent=True)
df = pd.DataFrame(results, columns=["name", "code", "frame", "wr", "vr", "since_cross"])

progress: 500/3992, elapsed: 2, ETA: 13
progress: 1000/3992, elapsed: 3, ETA: 8
progress: 1500/3992, elapsed: 5, ETA: 8
progress: 2000/3992, elapsed: 6, ETA: 5
progress: 2500/3992, elapsed: 14, ETA: 8
progress: 3000/3992, elapsed: 16, ETA: 5
progress: 3500/3992, elapsed: 19, ETA: 2


In [35]:
df[df.wr<0.6].nlargest(20, "vr")

Unnamed: 0,name,code,frame,wr,vr,since_cross
5,红宝丽,002165.XSHE,2021-10-29,0.58,17.0,2
33,德恩精工,300780.XSHE,2021-10-29,0.4,9.7,3
20,日丰股份,002953.XSHE,2021-10-29,0.5,6.6,3
8,长青股份,002391.XSHE,2021-10-29,0.35,5.5,2
57,中曼石油,603619.XSHG,2021-10-29,0.58,4.8,4
17,泰永长征,002927.XSHE,2021-10-29,0.57,4.4,2
29,丝路视觉,300556.XSHE,2021-10-29,0.53,4.3,3
18,新兴装备,002933.XSHE,2021-10-29,0.59,4.2,2
14,中坚科技,002779.XSHE,2021-10-29,0.52,4.2,3
32,海川智能,300720.XSHE,2021-10-29,0.29,3.6,3


In [None]:
import xgboost
from xgboost import XGBRegressor
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from scipy.stats import randint, uniform


def grid_search(X_train, y_train):
    params = {
        "colsample_bytree": uniform(0.7, 0.3),
        "gamma": uniform(0, 0.5),
        "learning_rate": uniform(0.03, 1),
        "max_depth": randint(2, 6),
        "n_estimators": randint(100, 150),
        "subsample": uniform(0.6, 0.4),
    }
    model = XGBRegressor()
    search = RandomizedSearchCV(
        model,
        param_distributions=params,
        random_state=42,
        n_iter=10,
        cv=3,
        verbose=2,
        n_jobs=8,
        return_train_score=True,
    )


    search.fit(X_train, y_train)

    _report_best_scores(search.cv_results_)
    return search.best_estimator_

def _report_best_scores(results, n_top=3):
    for i in range(1, n_top + 1):
        candidates = np.flatnonzero(results["rank_test_score"] == i)
        for candidate in candidates:
            print("Model with rank: {0}".format(i))
            print(
                "Mean validation score: {0:.3f} (std: {1:.3f})".format(
                    results["mean_test_score"][candidate],
                    results["std_test_score"][candidate],
                )
            )
            print("Parameters: {0}".format(results["params"][candidate]))
            print("")


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=78)
model = grid_search(np.array(X_train), np.array(y_train))

In [None]:
preds = model.predict(np.array(X_test))