In [1]:
"""
- Skearn库是Python机器中常用的第三方库，其中对常用的机器学习方法、模型评估、数据预处理和特征提取方法进行了封装
"""
# 数据准备
import tushare as ts
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import time


def get_data(code, start, end, window_len=20):
    # 数据获取
    # 1.数据准备
    pro = ts.pro_api("20231208200557-1a9c3fbc-4615-474e-8445-26c2a0019fc0")
    pro._DataApi__http_url = "http://tsapi.majors.ltd:7000"
    df = pro.daily(
        ts_code=code,
        start_date=start,
        end_date=end,
        fields="ts_code,trade_date,open,close",
    )
    df = df.reindex(index=df.index[::-1])
    # 索引重新编号
    df = df.reset_index(drop=True)
    # 判断涨跌
    df["label"] = df["close"] - df["open"]
    # 转换为二维数组
    arr = df[["close", "label"]].values
    # 特征提取及标注标签
    X = []
    Label = []
    window_len = 20
    # 遍历数组
    for i in range(len(arr) - window_len - 1):
        X.append(arr[i : i + window_len, 0])
        if arr[i + window_len, 1] > 0:
            Label.append(1)
        else:
            Label.append(0)

    # 划分训练集和测试集
    train_len = int(len(X) / 3) * 2
    X_train = X[:train_len]
    Label_train = Label[:train_len]
    X_test = X[train_len:]
    Label_test = Label[train_len:]
    return X_train, Label_train, X_test, Label_test



In [26]:
X_train, Label_train, X_test, Label_test = get_data("000001.SZ", "20230101", "20231201")

In [33]:
# X_train

In [30]:
from sklearn.svm import SVC
# 实例化SVM模型
svm = SVC(kernel='linear')
# 模型训练
svm.fit(X_train, Label_train)
# 模型预测结果
prediction = svm.predict(X_train)
print(prediction)

[0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 1 1 0 0 0 0 0 1 1 1 0 0 0 0 0 1 1 0 0 0 0
 1 1 1 1 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 1 1 1 0 0 1 0 0 0 1 1 1 0 0 0 1 1 0 0 0 0 0 0 0 1 1 1 1 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]


In [32]:
len(Label_train)

134

In [24]:
from sklearn.metrics import accuracy_score
# 计算模型预测准确率
print(accuracy_score(Label_train, prediction))

0.5614406779661016


In [25]:
# 模型预测结果
prediction = svm.predict(X_test)
print(accuracy_score(Label_test, prediction))

0.5508474576271186


In [6]:
import talib
# 通过ATR,RSI,CCI,MA等指标及收盘价来预测第二天的涨跌
def get_data2(code, start, end, timeperiod=14):
    # 数据获取
    # 1.数据准备
    pro = ts.pro_api("20231208200557-1a9c3fbc-4615-474e-8445-26c2a0019fc0")
    pro._DataApi__http_url = "http://tsapi.majors.ltd:7000"
    df = pro.daily(
        ts_code=code,
        start_date=start,
        end_date=end,
        fields="ts_code,trade_date,open,close, high, low",
    )
    df = df.reindex(index=df.index[::-1])
    # 索引重新编号
    df = df.reset_index(drop=True)
    # 判断涨跌
    df["label"] = df["close"] - df["open"]
    # 计算其他指标
    df['MA'] = talib.MA(df['close'], timeperiod=timeperiod)
    df['ATR'] = talib.ATR(df['high'], df['low'], df['close'], timeperiod=timeperiod)
    df['RSI'] = talib.RSI(df['close'], timeperiod=timeperiod)
    df['CCI'] = talib.CCI(df['high'], df['low'], df['close'], timeperiod=timeperiod)
    # 删除包含空值的行数据
    df = df.dropna()
    # 转换为二维数组
    arr = df[["close", "label", 'MA', 'ATR', 'RSI', 'CCI']].values
    # 特征提取及标注标签
    X = []
    Label = []
    window_len = 20
    # 遍历数组
    for i in range(len(arr) - window_len - 1):
        X.append(arr[i : i + window_len, 0])
        if arr[i + window_len, 1] > 0:
            Label.append(1)
        else:
            Label.append(0)

    # 划分训练集和测试集
    train_len = int(len(X) / 3) * 2
    X_train = X[:train_len]
    Label_train = Label[:train_len]
    X_test = X[train_len:]
    Label_test = Label[train_len:]
    return X_train, Label_train, X_test, Label_test

In [15]:
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

X_train, Label_train, X_test, Label_test = get_data2("000001.SZ", "20180101", "20231201")
# 实例化SVM模型
svm = SVC(kernel='linear')
# 模型训练
svm.fit(X_train, Label_train)
# 模型预测结果
prediction = svm.predict(X_test)
print(accuracy_score(Label_test, prediction))

0.5576923076923077


In [16]:
# 实例化SVM模型
svm = SVC(kernel="linear")
# 模型训练
svm.fit(X_train, Label_train)
# 模型预测结果
prediction = svm.predict(X_train)
print(accuracy_score(Label_train, prediction))

0.5578158458244111


In [34]:
"""
- Skearn库是Python机器中常用的第三方库，其中对常用的机器学习方法、模型评估、数据预处理和特征提取方法进行了封装
"""

# 数据准备
import tushare as ts
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import time


def get_data(code, start, end, window_len=20):
    # 数据获取
    # 1.数据准备
    pro = ts.pro_api("20231208200557-1a9c3fbc-4615-474e-8445-26c2a0019fc0")
    pro._DataApi__http_url = "http://tsapi.majors.ltd:7000"
    df = pro.daily(
        ts_code=code,
        start_date=start,
        end_date=end,
        fields="ts_code,trade_date,open,close",
    )
    df = df.reindex(index=df.index[::-1])
    # 索引重新编号
    df = df.reset_index(drop=True)
    # 判断涨跌
    df["label"] = df["close"] - df["open"]
    # 转换为二维数组
    arr = df[["close", "label"]].values
    # 特征提取及标注标签
    X = []
    Label = []
    window_len = 20
    # 遍历数组
    for i in range(len(arr) - window_len - 1):
        X.append(arr[i : i + window_len, 0])
        if arr[i + window_len, 1] > 0:
            Label.append(1)
        else:
            Label.append(0)
    return X, Label

In [101]:
X, Label = get_data("000042.SZ", "20000101", "20231231")
# 实例化SVM模型
svm = SVC(kernel="linear")
# 模型训练
svm.fit(X, Label)
# 模型预测结果
prediction = svm.predict(X)
print(accuracy_score(Label, prediction))
# 0.5209294075913444

0.5248046520079956


In [102]:
X_test, Label_test = get_data("000042.SZ", "20230101", "20231231")
# 模型预测结果
prediction = svm.predict(X_test)
print(accuracy_score(Label_test, prediction))  # 0.6380090497737556

0.47058823529411764


In [62]:
list(prediction)

[1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1]