In [1]:
"""
- Skearn库是Python机器中常用的第三方库，其中对常用的机器学习方法、模型评估、数据预处理和特征提取方法进行了封装
"""
# 数据准备
import tushare as ts
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import time


def get_data(code, start, end, window_len=20):
    # 数据获取
    # 1.数据准备
    pro = ts.pro_api("20231208200557-1a9c3fbc-4615-474e-8445-26c2a0019fc0")
    pro._DataApi__http_url = "http://tsapi.majors.ltd:7000"
    df = pro.daily(
        ts_code=code,
        start_date=start,
        end_date=end,
        fields="ts_code,trade_date,open,close",
    )
    df = df.reindex(index=df.index[::-1])
    # 索引重新编号
    df = df.reset_index(drop=True)
    # 判断涨跌
    df["label"] = df["close"] - df["open"]
    # 转换为二维数组
    arr = df[["close", "label"]].values
    # 特征提取及标注标签
    X = []
    Label = []
    window_len = 20
    # 遍历数组
    for i in range(len(arr) - window_len - 1):
        X.append(arr[i : i + window_len, 0])
        if arr[i + window_len, 1] > 0:
            Label.append(1)
        else:
            Label.append(0)

    # 划分训练集和测试集
    train_len = int(len(X) / 3) * 2
    X_train = X[:train_len]
    Label_train = Label[:train_len]
    X_test = X[train_len:]
    Label_test = Label[train_len:]
    return X_train, Label_train, X_test, Label_test



In [2]:
X_train, Label_train, X_test, Label_test = get_data("000001.SZ", "20230101", "20231201")

In [33]:
# X_train

In [3]:
from sklearn.svm import SVC
# 实例化SVM模型
svm = SVC(kernel='linear')
# 模型训练
svm.fit(X_train, Label_train)
# 模型预测结果
prediction = svm.predict(X_train)
print(prediction)

[0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 1 1 0 0 0 0 0 1 1 1 0 0 0 0 0 1 1 0 0 0 0
 1 1 1 1 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 1 1 1 0 0 1 0 0 0 1 1 1 0 0 0 1 1 0 0 0 0 0 0 0 1 1 1 1 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]


In [4]:
len(Label_train)

134

In [5]:
from sklearn.metrics import accuracy_score
# 计算模型预测准确率
print(accuracy_score(Label_train, prediction))

0.6492537313432836


In [6]:
# 模型预测结果
prediction = svm.predict(X_test)
print(accuracy_score(Label_test, prediction))

0.4925373134328358


In [6]:
import talib
# 通过ATR,RSI,CCI,MA等指标及收盘价来预测第二天的涨跌
def get_data2(code, start, end, timeperiod=14):
    # 数据获取
    # 1.数据准备
    pro = ts.pro_api("20231208200557-1a9c3fbc-4615-474e-8445-26c2a0019fc0")
    pro._DataApi__http_url = "http://tsapi.majors.ltd:7000"
    df = pro.daily(
        ts_code=code,
        start_date=start,
        end_date=end,
        fields="ts_code,trade_date,open,close, high, low",
    )
    df = df.reindex(index=df.index[::-1])
    # 索引重新编号
    df = df.reset_index(drop=True)
    # 判断涨跌
    df["label"] = df["close"] - df["open"]
    # 计算其他指标
    df['MA'] = talib.MA(df['close'], timeperiod=timeperiod)
    df['ATR'] = talib.ATR(df['high'], df['low'], df['close'], timeperiod=timeperiod)
    df['RSI'] = talib.RSI(df['close'], timeperiod=timeperiod)
    df['CCI'] = talib.CCI(df['high'], df['low'], df['close'], timeperiod=timeperiod)
    # 删除包含空值的行数据
    df = df.dropna()
    # 转换为二维数组
    arr = df[["close", "label", 'MA', 'ATR', 'RSI', 'CCI']].values
    # 特征提取及标注标签
    X = []
    Label = []
    window_len = 20
    # 遍历数组
    for i in range(len(arr) - window_len - 1):
        X.append(arr[i : i + window_len, 0])
        if arr[i + window_len, 1] > 0:
            Label.append(1)
        else:
            Label.append(0)

    # 划分训练集和测试集
    train_len = int(len(X) / 3) * 2
    X_train = X[:train_len]
    Label_train = Label[:train_len]
    X_test = X[train_len:]
    Label_test = Label[train_len:]
    return X_train, Label_train, X_test, Label_test

In [15]:
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

X_train, Label_train, X_test, Label_test = get_data2("000001.SZ", "20180101", "20231201")
# 实例化SVM模型
svm = SVC(kernel='linear')
# 模型训练
svm.fit(X_train, Label_train)
# 模型预测结果
prediction = svm.predict(X_test)
print(accuracy_score(Label_test, prediction))

0.5576923076923077


In [16]:
# 实例化SVM模型
svm = SVC(kernel="linear")
# 模型训练
svm.fit(X_train, Label_train)
# 模型预测结果
prediction = svm.predict(X_train)
print(accuracy_score(Label_train, prediction))

0.5578158458244111


In [7]:
"""
- Skearn库是Python机器中常用的第三方库，其中对常用的机器学习方法、模型评估、数据预处理和特征提取方法进行了封装
"""

# 数据准备
import tushare as ts
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import time


def get_data(code, start, end, window_len=20):
    # 数据获取
    # 1.数据准备
    pro = ts.pro_api("20231208200557-1a9c3fbc-4615-474e-8445-26c2a0019fc0")
    pro._DataApi__http_url = "http://tsapi.majors.ltd:7000"
    df = pro.daily(
        ts_code=code,
        start_date=start,
        end_date=end,
        fields="ts_code,trade_date,open,close",
    )
    df = df.reindex(index=df.index[::-1])
    # 索引重新编号
    df = df.reset_index(drop=True)
    # 判断涨跌
    df["label"] = df["close"] - df["open"]
    # 转换为二维数组
    arr = df[["close", "label"]].values
    # 特征提取及标注标签
    X = []
    Label = []
    window_len = 20
    # 遍历数组
    for i in range(len(arr) - window_len - 1):
        X.append(arr[i : i + window_len, 0])
        if arr[i + window_len, 1] > 0:
            Label.append(1)
        else:
            Label.append(0)
    return X, Label

In [26]:
df = pd.read_csv('./backtrade实战/data/000001.SZ.csv')

In [28]:
df.trade_date = pd.to_datetime(df.trade_date.apply(str))

In [30]:
df[(df["trade_date"] >= "2022-01-01") & (df["trade_date"] <= "2024-02-08")]

Unnamed: 0,ts_code,trade_date,open,high,low,close,vol
5171,000001.SZ,2022-01-04,16.48,16.66,16.18,16.66,1169259.33
5172,000001.SZ,2022-01-05,16.58,17.22,16.55,17.15,1961998.17
5173,000001.SZ,2022-01-06,17.11,17.27,17.00,17.12,1107885.19
5174,000001.SZ,2022-01-07,17.10,17.28,17.06,17.20,1126630.70
5175,000001.SZ,2022-01-10,17.29,17.42,17.03,17.19,909774.01
...,...,...,...,...,...,...,...
5677,000001.SZ,2024-02-01,9.41,9.48,9.34,9.41,1055259.87
5678,000001.SZ,2024-02-02,9.42,9.47,9.07,9.27,1634214.31
5679,000001.SZ,2024-02-05,9.19,9.43,9.11,9.38,1803591.90
5680,000001.SZ,2024-02-06,9.30,9.68,9.29,9.68,1995409.54


In [60]:
"""
测试svm在多种股票中的效果
"""

# 数据准备
import tushare as ts
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import time


def get_data(stockPath, start, end, window_len=20):
    # 数据获取
    # 1.数据准备
    df = pd.read_csv(stockPath)
    # 索引重新编号
    df = df.reset_index(drop=True)
    df.trade_date = pd.to_datetime(df.trade_date.apply(str))
    df = df[
        (df["trade_date"] >= start) & (df["trade_date"] <= end)
    ]  # 日期格式参考："2022-01-01"
    # 判断涨跌
    df["label"] = df["close"] - df["open"]
    # 转换为二维数组
    arr = df[["close", "label"]].values
    # 特征提取及标注标签
    X = []
    Label = []
    window_len = 15
    # 遍历数组
    for i in range(len(arr) - window_len - 1):
        X.append(arr[i : i + window_len, 0])
        if arr[i + window_len, 1] > 0:
            Label.append(1)
        else:
            Label.append(0)
    # 划分训练集和测试集
    train_len = len(X) - 15
    X_train = X[:train_len]
    Label_train = Label[:train_len]
    X_test = X[train_len:]
    Label_test = Label[train_len:]
    return X_train, Label_train, X_test, Label_test

In [71]:
def random_stocks(stock_num):
    import os
    import random
    # random.seed(0.1)
    filenames = os.listdir("./backtrade实战/data")
    stock_choices = random.choices(filenames, k=stock_num)
    return ["./backtrade实战/data/{}".format(filename) for filename in stock_choices]

choice_stocks = random_stocks(50)
train_scores = []
test_scores = []
for stockPath in choice_stocks:
    X_train, Label_train, X_test, Label_test = get_data(stockPath, '2020-01-01', '2024-02-07')
    # 实例化SVM模型
    svm = SVC(kernel="linear")
    # 模型训练
    svm.fit(X_train, Label_train)
    # 模型预测结果
    prediction = svm.predict(X_train)
    train_score = accuracy_score(Label_train, prediction)
    print('训练集分数：',stockPath, train_score)
    train_scores.append(train_score)
    prediction = svm.predict(X_test)
    test_score = accuracy_score(Label_test, prediction)
    test_scores.append(test_score)
    print('测试集分数：', stockPath, test_score)
print('训练集分数平均：', np.average(train_scores))
print('训练集分数最高：', np.max(train_scores), '最低：', np.min(train_scores))
print('测试集分数平均:', np.average(test_scores))
print('测试集分数最高:', np.max(test_scores), '最低：', np.min(test_scores))

训练集分数： ./backtrade实战/data/002095.SZ.csv 0.5248447204968945
测试集分数： ./backtrade实战/data/002095.SZ.csv 0.4
训练集分数： ./backtrade实战/data/002224.SZ.csv 0.5403726708074534
测试集分数： ./backtrade实战/data/002224.SZ.csv 0.7333333333333333
训练集分数： ./backtrade实战/data/605056.SH.csv 0.5726351351351351
测试集分数： ./backtrade实战/data/605056.SH.csv 0.4
训练集分数： ./backtrade实战/data/600606.SH.csv 0.5598335067637877
测试集分数： ./backtrade实战/data/600606.SH.csv 0.6
训练集分数： ./backtrade实战/data/300810.SZ.csv 0.5227743271221532
测试集分数： ./backtrade实战/data/300810.SZ.csv 0.2
训练集分数： ./backtrade实战/data/688266.SH.csv 0.5509989484752892
测试集分数： ./backtrade实战/data/688266.SH.csv 0.6666666666666666
训练集分数： ./backtrade实战/data/002370.SZ.csv 0.5579710144927537
测试集分数： ./backtrade实战/data/002370.SZ.csv 0.7333333333333333
训练集分数： ./backtrade实战/data/605303.SH.csv 0.5363372093023255
测试集分数： ./backtrade实战/data/605303.SH.csv 0.7333333333333333
训练集分数： ./backtrade实战/data/836270.BJ.csv 0.5488958990536278
测试集分数： ./backtrade实战/data/836270.BJ.csv 0.333333333333333

- 2020-01-01 ~ 2024-02-07 50随机股 预测15天 测试集分数平均 窗口20: 0.5533333333333332  0.5559999999999999 0.5946666666666667
- 2020-01-01 ~ 2024-02-07 100随机股 预测15天 测试集分数平均 窗口20: 0.586
- 2020-01-01 ~ 2024-02-07 50随机股 预测15天 测试集分数平均 窗口30: 0.5586666666666668
- 2020-01-01 ~ 2024-02-07 50随机股 预测15天 测试集分数平均 窗口15: 0.5613333333333334 0.588

- 2023-01-01 ~ 2024-02-07 50随机股 预测15天 测试集分数平均: 0.5146666666666666
- 2022-01-01 ~ 2024-02-07 50随机股 预测15天 测试集分数平均: 0.5266666666666666
- 2021-01-01 ~ 2024-02-07 50随机股 预测15天 测试集分数平均: 0.5479999999999999
- 2018-01-01 ~ 2024-02-07 50随机股 预测15天 测试集分数平均: 0.5479999999999999
- 2010-01-01 ~ 2024-02-07 50随机股 预测15天 测试集分数平均: 0.5213333333333333

In [19]:
X_test, Label_test = get_data("000004.SZ", "20230101", "20231231")
# 模型预测结果
prediction = svm.predict(X_test)
print(accuracy_score(Label_test, prediction))  # 0.6380090497737556

0.5545454545454546


In [62]:
list(prediction)

[1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1]