In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler

In [2]:
file_path = "/Users/rbw/Desktop/binanceus/BTC_USDT_1h.csv"  #路径
df = pd.read_csv(file_path)

# 转换时间戳为datetime并设为索引
df['datetime'] = pd.to_datetime(df['datetime'])
df.set_index('datetime', inplace=True)

# 原始列：timestamp, datetime, open, high, low, close, volume
features = ['open', 'high', 'low', 'close', 'volume']  # 选择需要生成滞后特征的列

In [3]:
scaler = MinMaxScaler(feature_range=(0, 1))
scaled = scaler.fit_transform(df[features])

In [4]:
def series_to_supervised(data, features, n_in=3, n_out=2, dropnan=True):
    """
    将时间序列转换为监督学习格式
    :param data: 标准化后的数据（NumPy数组）
    :param features: 原始特征名列表（如 ['open', 'high', 'close', ...]）
    :param n_in: 滞后步数（过去n_in个时间点）
    :param n_out: 预测步数（未来n_out个时间点）
    :param dropnan: 是否删除NaN行
    :return: 包含滞后特征的DataFrame
    """
    n_vars = len(features)
    df = pd.DataFrame(data)
    cols, names = [], []
    
    # 输入序列 (t-n, ..., t-1)
    for i in range(n_in, 0, -1):
        cols.append(df.shift(i))
        names += [f'{features[j]}(t-{i})' for j in range(n_vars)]
    
    # 预测序列 (t, t+1, ..., t+n)
    for i in range(0, n_out):
        cols.append(df.shift(-i))
        if i == 0:
            names += [f'{features[j]}(t)' for j in range(n_vars)]
        else:
            names += [f'{features[j]}(t+{i})' for j in range(n_vars)]
    
    agg = pd.concat(cols, axis=1)
    agg.columns = names
    if dropnan:
        agg.dropna(inplace=True)
    return agg

# 生成监督学习格式数据（过去3小时预测未来2小时）
n_lag = 3
n_pred = 2
reframed = series_to_supervised(scaled, features, n_lag, n_pred)

In [5]:
# 输入特征：所有滞后列（如 open(t-3), high(t-3), ..., volume(t-1)）
input_features = [col for col in reframed.columns if any(f in col for f in features) and 't-' in col]
X = reframed[input_features]

# 输出目标：未来close价格（close(t)和close(t+1)）
output_features = ['close(t)', 'close(t+1)']
Y = reframed[output_features]

In [6]:
print("\n--------------- 特征工程后的数据样例 ---------------")
print(reframed.head())

print("\n--------------- 输入特征(X) ---------------")
print(X.head())

print("\n--------------- 输出目标(Y) ---------------")
print(Y.head())


--------------- 特征工程后的数据样例 ---------------
   open(t-3)  high(t-3)  low(t-3)  close(t-3)  volume(t-3)  open(t-2)  \
3   0.050235   0.052045  0.053541    0.053040     0.175634   0.053259   
4   0.053259   0.053874  0.055658    0.054207     0.110720   0.054864   
5   0.054864   0.052856  0.056838    0.054042     0.025297   0.054706   
6   0.054706   0.052562  0.053276    0.050625     0.077367   0.051099   
7   0.051099   0.049762  0.052784    0.051103     0.027226   0.051321   

   high(t-2)  low(t-2)  close(t-2)  volume(t-2)  ...   open(t)   high(t)  \
3   0.053874  0.055658    0.054207     0.110720  ...  0.054706  0.052562   
4   0.052856  0.056838    0.054042     0.025297  ...  0.051099  0.049762   
5   0.052562  0.053276    0.050625     0.077367  ...  0.051321  0.049770   
6   0.049762  0.052784    0.051103     0.027226  ...  0.049891  0.050685   
7   0.049770  0.052576    0.049523     0.025616  ...  0.051740  0.051295   

     low(t)  close(t)  volume(t)  open(t+1)  high(t+1)  low(