In [1]:
# 最小可运行项目：基于LSTM的股票走势预测系统
# 步骤：选股 + 技术指标生成 + 滑窗样本构建 + LSTM模型训练 + 预测 + 可视化 + 买入策略判断 + 数据保存

import pandas as pd
import numpy as np
import akshare as ak
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score, mean_squared_error
from sklearn.preprocessing import MinMaxScaler
import os




In [2]:
###############################
# 1. 选股逻辑：获取沪深300中前10只股票（按代码排序）
###############################

stocks = ak.index_stock_cons(symbol="000300")
stocks = stocks.sort_values("品种代码").reset_index(drop=True)

In [3]:
stocks

Unnamed: 0,品种代码,品种名称,纳入日期
0,000001,深发展A,2005-04-08
1,000001,深发展A,2005-04-08
2,000002,万科A,2005-04-08
3,000063,中兴通讯,2005-04-08
4,000063,中兴通讯,2005-04-08
...,...,...,...
295,688396,华润微,2021-06-15
296,688472,阿特斯,2024-12-16
297,688506,百利天恒,2024-12-16
298,688599,天合光能,2021-12-13


In [4]:
selected_codes = list(stocks.loc[:9, '品种代码'])

In [5]:
selected_codes

['000001',
 '000001',
 '000002',
 '000063',
 '000063',
 '000100',
 '000157',
 '000157',
 '000166',
 '000301']

In [6]:
df = ak.stock_zh_a_hist(symbol="000063", period="daily", start_date="20250801", adjust="qfq")

In [7]:
df

Unnamed: 0,日期,股票代码,开盘,收盘,最高,最低,成交量,成交额,振幅,涨跌幅,涨跌额,换手率
0,2025-08-01,63,34.23,33.9,34.58,33.56,785985,2674809000.0,2.97,-1.14,-0.39,1.95
1,2025-08-04,63,33.6,33.87,33.88,33.59,379288,1279439000.0,0.86,-0.09,-0.03,0.94
2,2025-08-05,63,33.93,34.42,34.55,33.93,614925,2104821000.0,1.83,1.62,0.55,1.53
3,2025-08-06,63,34.35,34.55,35.05,34.08,604127,2085995000.0,2.82,0.38,0.13,1.5
4,2025-08-07,63,34.54,34.19,34.69,34.02,598500,2049472000.0,1.94,-1.04,-0.36,1.49
5,2025-08-08,63,34.16,33.78,34.19,33.75,484312,1642794000.0,1.29,-1.2,-0.41,1.2
6,2025-08-11,63,33.79,34.0,34.16,33.78,500378,1702019000.0,1.12,0.65,0.22,1.24


In [8]:
try:
    import pandas_ta as ta
except ModuleNotFoundError:
    raise ModuleNotFoundError("pandas_ta 未安装，请运行 pip install pandas-ta")

In [10]:
df = df.rename(columns={"日期": "date", "开盘": "open", "收盘": "close", "最高": "high", "最低": "low", "成交量": "volume"})

In [11]:
df.ta.sma(length=5, append=True)

0       NaN
1       NaN
2       NaN
3       NaN
4    34.186
5    34.162
6    34.188
Name: SMA_5, dtype: float64

In [12]:
df

Unnamed: 0,date,股票代码,open,close,high,low,volume,成交额,振幅,涨跌幅,涨跌额,换手率,SMA_5
0,2025-08-01,63,34.23,33.9,34.58,33.56,785985,2674809000.0,2.97,-1.14,-0.39,1.95,
1,2025-08-04,63,33.6,33.87,33.88,33.59,379288,1279439000.0,0.86,-0.09,-0.03,0.94,
2,2025-08-05,63,33.93,34.42,34.55,33.93,614925,2104821000.0,1.83,1.62,0.55,1.53,
3,2025-08-06,63,34.35,34.55,35.05,34.08,604127,2085995000.0,2.82,0.38,0.13,1.5,
4,2025-08-07,63,34.54,34.19,34.69,34.02,598500,2049472000.0,1.94,-1.04,-0.36,1.49,34.186
5,2025-08-08,63,34.16,33.78,34.19,33.75,484312,1642794000.0,1.29,-1.2,-0.41,1.2,34.162
6,2025-08-11,63,33.79,34.0,34.16,33.78,500378,1702019000.0,1.12,0.65,0.22,1.24,34.188


In [None]:
os.makedirs("data", exist_ok=True)


###############################
# 2. 获取数据 + 生成技术指标 + 保存CSV
###############################

def fetch_and_process(code):
    df = ak.stock_zh_a_hist(symbol=code, period="daily", start_date="20220101", adjust="qfq")
    df = df.rename(columns={"日期": "date", "开盘": "open", "收盘": "close", "最高": "high", "最低": "low", "成交量": "volume"})
    df['code'] = code
    df['date'] = pd.to_datetime(df['date'])
    df = df.sort_values('date')

    df.ta.sma(length=5, append=True)
    df.ta.rsi(length=14, append=True)
    df.ta.macd(append=True)

    df = df.dropna().reset_index(drop=True)
    df.to_csv(f"data/{code}_features.csv", index=False, encoding="utf-8-sig")
    return df

stock_dfs = [fetch_and_process(code) for code in selected_codes]
data = pd.concat(stock_dfs)


###############################
# 3. 构建滑动窗口样本（10天预测1天）
###############################
class StockDataset(Dataset):
    def __init__(self, df, seq_len=10):
        features = ['close', 'SMA_5', 'RSI_14', 'MACD_12_26_9']
        df = df.copy()
        scaler = MinMaxScaler()
        df[features] = scaler.fit_transform(df[features])

        self.x, self.y_cls, self.y_reg, self.last_close = [], [], [], []

        for _, stock in df.groupby('code'):
            stock = stock.reset_index(drop=True)
            for i in range(len(stock) - seq_len - 1):
                window = stock.iloc[i:i+seq_len]
                target = stock.iloc[i+seq_len]
                delta = (target['close'] - stock.iloc[i+seq_len-1]['close']) / stock.iloc[i+seq_len-1]['close']

                self.x.append(window[features].values)
                self.y_reg.append(delta)
                self.y_cls.append(1 if delta > 0 else 0)
                self.last_close.append(stock.iloc[i+seq_len-1]['close'])

    def __len__(self):
        return len(self.x)

    def __getitem__(self, idx):
        return (
            torch.tensor(self.x[idx], dtype=torch.float32),
            torch.tensor(self.y_cls[idx], dtype=torch.long),
            torch.tensor(self.y_reg[idx], dtype=torch.float32),
            self.last_close[idx]
        )

dataset = StockDataset(data)
train_size = int(0.8 * len(dataset))
train_set, test_set = torch.utils.data.random_split(dataset, [train_size, len(dataset) - train_size])
train_loader = DataLoader(train_set, batch_size=32, shuffle=True)
test_loader = DataLoader(test_set, batch_size=32)


###############################
# 4. 构建 LSTM 模型
###############################
class StockLSTM(nn.Module):
    def __init__(self, input_dim, hidden_dim):
        super().__init__()
        self.lstm = nn.LSTM(input_dim, hidden_dim, batch_first=True)
        self.fc_cls = nn.Linear(hidden_dim, 2)
        self.fc_reg = nn.Linear(hidden_dim, 1)

    def forward(self, x):
        out, _ = self.lstm(x)
        out = out[:, -1, :]
        return self.fc_cls(out), self.fc_reg(out).squeeze()

model = StockLSTM(input_dim=4, hidden_dim=64)
criterion_cls = nn.CrossEntropyLoss()
criterion_reg = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)


###############################
# 5. 训练模型
###############################
for epoch in range(10):
    model.train()
    total_loss = 0
    for x, y_cls, y_reg, _ in train_loader:
        out_cls, out_reg = model(x)
        loss = criterion_cls(out_cls, y_cls) + criterion_reg(out_reg, y_reg)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    print(f"Epoch {epoch+1}, Loss: {total_loss:.4f}")


###############################
# 6. 评估 + 可视化 + 买入建议
###############################
model.eval()
y_true_cls, y_pred_cls = [], []
y_true_reg, y_pred_reg = [], []
last_closes, predicted_deltas = [], []

with torch.no_grad():
    for x, y_cls, y_reg, last_close in test_loader:
        out_cls, out_reg = model(x)
        y_true_cls += y_cls.tolist()
        y_pred_cls += out_cls.argmax(dim=1).tolist()
        y_true_reg += y_reg.tolist()
        y_pred_reg += out_reg.tolist()
        last_closes += last_close
        predicted_deltas += out_reg.tolist()

acc = accuracy_score(y_true_cls, y_pred_cls)
print(f"分类准确率: {acc:.4f}")

rmse = mean_squared_error(y_true_reg, y_pred_reg, squared=False)
print(f"回归RMSE: {rmse:.4f}")

plt.plot(y_true_reg[:100], label="True")
plt.plot(y_pred_reg[:100], label="Pred")
plt.title("未来1日收益率预测")
plt.legend()
plt.show()

print("\n📈 买入建议判断：")
threshold = 0.01
for i in range(len(predicted_deltas)):
    flag = "✅ 买入" if predicted_deltas[i] > threshold else "❌ 观望"
    print(f"样本{i}: 预测涨幅 {predicted_deltas[i]*100:.2f}%，建议：{flag}")
