# 使用信号处理函数-argrelextrema

In [1]:
import numpy as np
from alpha.notebook import *
from scipy.signal import argrelextrema
import numpy as np
pd.options.display.max_rows = 999
from alpha.core.rsi_stats import rsi30, rsiday
import talib

await init_notebook()

shday = await get_bars("000001.XSHG", 100, '1d', '2021-11-5 15:00')
shmin = await get_bars("000001.XSHG", 220, '30m', '2021-11-05 15:00')

In [None]:
def peak_and_valley(bars):
    ma = moving_average(bars["close"], 10)
    
    peak_indexes = argrelextrema(ma, np.greater)
    peaks = peak_indexes[0]

    # Find valleys(min).
    valley_indexes = argrelextrema(ma, np.less)
    valleys = valley_indexes[0]

    assert abs(len(peaks) - len(valleys)) <= 1
    
    bars = bars[9:]
    # Plot main graph.
    (fig, ax) = plt.subplots()
    ax.plot(np.arange(len(bars)), bars["close"], color='c')
    ax.plot(np.arange(len(bars)), ma, color='b')

    # Plot peaks.
    peak_x = peaks
    peak_y = bars['close'][peak_x]
    ax.plot(peak_x, peak_y, 'gv', label="Peaks")

    # Plot valleys.
    valley_x = valleys
    valley_y = bars['close'][valley_x]
    ax.plot(valley_x, valley_y, 'r^', label="Valleys")
     
    trades = []
    gains = 1
    order = None

    vertex = sorted([*peaks, *valleys])
    for x in vertex:
        close = bars["close"]
        buy = x in valleys
        sell = x in peaks
        if buy and order is None:
            order = {
                "buy": close[x] * 0.5 + close[x+1] * 0.5,
                "buy_at": bars["frame"][x+1]
            }

        elif sell and order:
            buy = order["buy"]
            sell = close[x+1] * 0.5 + close[x] * 0.5,
            gain = sell / buy
            order.update({
                "sell": sell,
                "sell_at": bars["frame"][x+1],
                "gain": gain
            })

            gains *= gain
            trades.append(order)
            order = None

    return gains - 1, trades

# 指数30分钟拐头报警

In [None]:
minclose = shmin["close"]
minframe = shmin["frame"]
ma = moving_average(minclose, 5)

# align frame with ma
close = close[4:]
frame = frame[4:]
bars = sh[4:]


local_ma = argrelextrema(ma, np.greater, order=5)[0]
local_mi = argrelextrema(ma, np.less, order=5)[0]

plt.plot(ma)
plt.plot(local_ma, ma[local_ma], 'gv')
plt.plot(local_mi, ma[local_mi], 'r^')
for i in local_ma:
    plt.text(i, ma[i], f"{str(frame[i])[5:13]}_{i}")    
for i in local_mi:
    plt.text(i, ma[i], f"{str(frame[i])[5:13]}_{i}")

# 抛物线转向

股票的均线形态常常会出现类似于抛物线的向下、向下转向，尤其以指数为甚。但研究表明，抛物线拟合中容易出现错误信号。准确率不高，看如何提升？

如果连续几个周期出现a一致，而dist每次加1，则说明这个pattern比较稳定。

In [None]:
def parabolic_features(ts, rng=7, ma_win=5, calc_ma=True):
    """检测`ts`代表的最后7个周期的均线中，是否存在抛物线特征。
    """
    if calc_ma:
        ts = moving_average(ts, ma_win)

    ts_ = ts[-rng:]
    (a, b, c), pmae = polyfit(ts_)

    # predict till next frame
    y_ = np.polyval((a,b,c), np.arange(rng + 1))

    # uncomment this to draw the lines
    # plt.plot(np.arange(len(ts)-rng, len(ts)), y_)
    # plt.plot(ts)

    vx = round(-b/(2*a),1)
    
    next_ts = reverse_moving_average(y_, rng, ma_win)
    pred_roc = next_ts/ts[-1] - 1
    
    return np.sign(pred_roc), pred_roc, rng - vx, round(a, 4), round(pmae, 5)

In [None]:
for i in range(-10, -1):
    print(minframe[i], parabolic_features(minclose[:i]))

# 机器学习模型

目标： 需要能判断

33 10-25 10:00 底

10-26 10:30 顶 RSI

10-29 10:00 底 RSI

11-02 10:00 顶 抛物线转向？要求11：00能发出信号

11-03 13:30 底

11-03 14:30 底 RSI底背离

11-04 15:00 顶 抛物线转向？

## 数据标注

In [2]:
shmin_12000 = await get_bars("000001.XSHG", 12000, '30m', end="2011-11-05 15:00")
peaks, valleys = peaks_and_valleys(shmin_12000["close"], min_altitude_ratio=1e-3)

print(len(peaks), len(valleys))

peak_frames = [shmin_12000[p]['frame'] for p in peaks]
valley_frames = [shmin_12000[p]['frame'] for p in valleys]

data = {
    "bars": shmin_12000,
    "peaks": peaks,
    "valleys": valleys
}

import pickle

with open("/apps/alpha/data/sh_30m_pv_labbelled.pkl", "wb") as f:
    pickle.dump(data, f)

734 738


## 训练

In [3]:
import random
features = []

from sklearn.model_selection import train_test_split

for i in peaks:
    if i < 100 or i == 12000-1:
        continue
        
    feature, desc = reversal_features("000001.XSHG", shmin_12000[i-99:i+1], FrameType.MIN30, peak_altitude=1e-3)
    features.append([*feature, 0, i])
    
for i in valleys:
    if i < 100 or i == 12000-1:
        continue
        
    feature, desc = reversal_features("000001.XSHG", shmin_12000[i-99:i+1], FrameType.MIN30, peak_altitude=1e-3)
    features.append([*feature, 1, i])
    
excluded = set(peaks)
excluded.update(valleys)

for i in random.sample(range(100, 9999), 1000):
    if i not in excluded:
        feature, desc = reversal_features("000001.XSHG", shmin_12000[i-99:i+1], FrameType.MIN30, peak_altitude=1e-3)
        features.append([*feature, 2, i])
        
features = np.array(features)
total = len(features)
train_indices = random.sample([i for i in range(total)], int(0.8 * total ))
test_indices = list(set(np.arange(total)) - set(train_indices))

X_train = features[train_indices][:,0:-2]
y_train = features[train_indices][:,-2].astype('i4')
meta_train = features[train_indices][:,-1].astype('i4')

X_test = features[test_indices][:,0:-2]
y_test = features[test_indices][:,-2].astype('i4')
meta_test = features[test_indices][:,-1].astype('i4')
print(X_train[0])
print(y_train[0])

[ 7.18026936e-01  9.10000000e-01  9.30000000e-01  7.50000000e-01
  7.91300000e+01  7.98300000e+01  6.73700000e+01  1.00000000e+00
  8.55746492e-03  3.47000000e+01  1.56300000e-01  2.10000000e-04
  0.00000000e+00  0.00000000e+00  0.00000000e+00  3.24630737e-03
  2.23994255e-03  0.00000000e+00 -1.00000000e+00 -1.00000000e+00
 -1.00000000e+00]
2


In [4]:
from xgboost import XGBClassifier
from sklearn.model_selection import RandomizedSearchCV
from scipy.stats import randint, uniform


model = XGBClassifier(eval_metric='mlogloss',use_label_encoder=False)
params = {
    "colsample_bytree": uniform(0.7, 0.3),
    "gamma": uniform(0, 0.5),
    "learning_rate": uniform(0.01, 1),
    "max_depth": randint(2, 6),
    "n_estimators": randint(80, 150),
    "subsample": uniform(0.6, 0.4),
}

search = RandomizedSearchCV(
    model,
    param_distributions=params,
    random_state=78,
    n_iter=100,
    cv=3,
    verbose=1,
    n_jobs=4,
    return_train_score=True,
    refit=True,
)

search.fit(X_train, y_train)

Fitting 3 folds for each of 100 candidates, totalling 300 fits


RandomizedSearchCV(cv=3,
                   estimator=XGBClassifier(base_score=None, booster=None,
                                           colsample_bylevel=None,
                                           colsample_bynode=None,
                                           colsample_bytree=None,
                                           enable_categorical=False,
                                           eval_metric='mlogloss', gamma=None,
                                           gpu_id=None, importance_type=None,
                                           interaction_constraints=None,
                                           learning_rate=None,
                                           max_delta_step=None, max_depth=None,
                                           min_child_weight=None, missing=n...
                                        'learning_rate': <scipy.stats._distn_infrastructure.rv_frozen object at 0xffff174d8e20>,
                                        'max_depth':

In [None]:
from sklearn.metrics import classification_report
model = search.best_estimator_
preds = model.predict(X_test)
report = classification_report(y_test, preds)
print(report)

for i in range(len(y_test)):
    if i == 20:
        break
    if y_test[i] != preds[i]:
        pos = int(meta_test[i])
        
        cs = Candlestick({'30m': [5, 10, 20, 30]}, n_plot_bars=60)
        bars = shmin_12000[pos-50:pos+10]

        vec, desc = reversal_features("000001.XSHG", bars[:-9], FrameType.MIN30, peak_altitude=1e-3)
        
        features = [f"{d}: {v:.2f}" for d, v in zip(desc, vec)]
        plt.text(0, 0, "\n".join(features))
        
        cs.plot_bars(bars, title = f"True: {y_test[i]}/ Pred: {preds[i]}", signals=[(50, 'x', 'r')])


# revisions
v1: 
              precision    recall  f1-score   support

           0       0.70      0.81      0.75       158
           1       0.68      0.76      0.72       134
           2       0.57      0.44      0.50       177

    accuracy                           0.66       469
   macro avg       0.65      0.67      0.66       469
weighted avg       0.65      0.66      0.65       469


[x] v2: parab_vx 取值范围过宽，可能导致收敛困难，应该限制在-win:win之间。

              precision    recall  f1-score   support

           0       0.64      0.79      0.71       144
           1       0.71      0.85      0.77       143
           2       0.62      0.42      0.50       185

    accuracy                           0.66       472
   macro avg       0.66      0.68      0.66       472
weighted avg       0.65      0.66      0.65       472


In [None]:
with open("/home/aaron/zillionare/alpha/models/reversal-v1.pkl", "wb") as f:
    pickle.dump(model, f)

In [9]:
8.0002*0.9+1.656+1.656+1.656+3.312+74.521

90.00118