In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import numpy as np


In [2]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np 

df = pd.read_csv('./data/df.csv',sep=',')
df  = df.sort_values(by='time',ascending=True).reset_index(drop=True)

BATCHSIZE = 256
LOOKBACK = 10


date = ["2024-02-19",
"2024-03-15",
"2024-04-19",
"2024-05-17",
"2024-06-21",
"2024-07-19",
"2024-08-16",
"2024-09-20",
"2024-10-18",
"2024-11-15",
"2024-12-20",
"2025-01-17"]
df.head()

df['time'] = pd.to_datetime(df['time'])
df['date'] = df['time'].dt.date
df['date'] = df['date'].astype(str)
df['hour'] = df['time'].dt.hour
df['minute'] = df['time'].dt.minute 
# for i in range(1,5):
#     df[f'spread_shift_{i}'] = df.groupby(['hour','minute'])['spread'].shift(i)

df['Expiration_Date'] = df['date'].apply(lambda x: 1 if x in date else 0)
# df.fillna(0,inplace=True)


df.dropna(inplace=True)

df.head()


Unnamed: 0,time,IF00_close,IF01_close,spread,date,hour,minute,Expiration_Date
0,2024-01-24 09:30:00,3240.0,3236.8,-3.2,2024-01-24,9,30,0
1,2024-01-24 09:31:00,3241.4,3243.0,1.6,2024-01-24,9,31,0
2,2024-01-24 09:32:00,3240.0,3243.0,3.0,2024-01-24,9,32,0
3,2024-01-24 09:33:00,3234.4,3235.6,1.2,2024-01-24,9,33,0
4,2024-01-24 09:34:00,3235.8,3236.0,0.2,2024-01-24,9,34,0


In [4]:
df[df['date']=="2025-01-15"] # 56634 57117

Unnamed: 0,time,IF00_close,IF01_close,spread,date,hour,minute,Expiration_Date
56635,2025-01-15 09:30:00,3816.4,3800.0,-16.4,2025-01-15,9,30,0
56636,2025-01-15 09:31:00,3811.2,3809.8,-1.4,2025-01-15,9,31,0
56637,2025-01-15 09:32:00,3809.6,3808.4,-1.2,2025-01-15,9,32,0
56638,2025-01-15 09:33:00,3810.8,3809.2,-1.6,2025-01-15,9,33,0
56639,2025-01-15 09:34:00,3809.4,3806.6,-2.8,2025-01-15,9,34,0
...,...,...,...,...,...,...,...,...
56871,2025-01-15 14:56:00,3798.0,3795.2,-2.8,2025-01-15,14,56,0
56872,2025-01-15 14:57:00,3797.6,3795.0,-2.6,2025-01-15,14,57,0
56873,2025-01-15 14:58:00,3797.4,3794.4,-3.0,2025-01-15,14,58,0
56874,2025-01-15 14:59:00,3795.4,3792.4,-3.0,2025-01-15,14,59,0


In [3]:
# train-test split for time series



import torch

def create_dataset(dataset, lookback,split_index):
    """Transform a time series into a prediction dataset

    Args:
        dataset: A numpy array of time series, first dimension is the time steps
        lookback: Size of window for prediction
    """


    X_train, y_train = [], []
    X_test, y_test = [], []
    for i in range(len(dataset)-lookback):
        if i < split_index:
            feature = dataset[i:i+lookback]
            target = dataset[i+lookback:i+lookback+1][:,0:1]
            X_train.append(feature)
            y_train.append(target)
        else:
            feature = dataset[i:i+lookback]
            target = dataset[i+lookback:i+lookback+1][:,0:1]
            X_test.append(feature)
            y_test.append(target)
    # X = np.array(X)
    # y = np.array(y)
    return torch.tensor(X_train), torch.tensor(y_train),torch.tensor(X_test), torch.tensor(y_test)





In [4]:

# 设置随机种子以确保结果可复现
torch.manual_seed(42)

# 检查是否有可用的GPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'Using device: {device}')

# 定义全连接神经网络模型
class FCNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(FCNN, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size, hidden_size)
        self.fc3 = nn.Linear(hidden_size, output_size)
        
    def forward(self, x):
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        x = self.relu(x)
        x = self.fc3(x)
        return x

# 定义早停类（从context中借鉴）
class EarlyStopping:
    def __init__(self, patience=7, min_delta=0, verbose=False):
        self.patience = patience
        self.min_delta = min_delta
        self.verbose = verbose
        self.counter = 0
        self.best_loss = None
        self.early_stop = False
        self.val_loss_min = float('inf')
        
    def __call__(self, val_loss, model, optimizer, epoch):
        if self.best_loss is None:
            self.best_loss = val_loss
            self.save_checkpoint(val_loss, model, optimizer, epoch)
        elif val_loss > self.best_loss + self.min_delta:
            self.counter += 1
            if self.verbose:
                print(f'EarlyStopping counter: {self.counter} out of {self.patience}')
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_loss = val_loss
            self.save_checkpoint(val_loss, model, optimizer, epoch)
            self.counter = 0
            
    def save_checkpoint(self, val_loss, model, optimizer, epoch):
        if self.verbose:
            print(f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}). Saving model ...')
        torch.save(model.state_dict(), 'fcnn_checkpoint.pt')
        self.val_loss_min = val_loss


Using device: cuda


In [5]:
import numpy as np
import torch.optim as optim
import torch.utils.data as data
import time 
begin_time = time.time()
df_result = pd.DataFrame()

timeseries = df['spread'].astype(np.float32).values.reshape(-1,1) 

for split_index in range(39237, len(timeseries),100):
    train_size = split_index # 
    # test_size = len(timeseries) - train_size
    # train, test = timeseries[:train_size], timeseries[train_size:]
    lookback = LOOKBACK 
    X_train_all, y_train_all,X_test, y_test = create_dataset(timeseries, lookback=lookback,split_index=split_index)
    # X_test, y_test = create_dataset(test, lookback=lookback)
    print(X_train_all.shape, y_train_all.shape)
    print(X_test.shape, y_test.shape)

    indices = np.arange(len(X_train_all))   
    np.random.shuffle(indices)
    train_val_size = int(len(X_train_all) * 0.8)
    train_indices = indices[:train_val_size]
    val_indices = indices[train_val_size:]
    X_train = X_train_all[train_indices]
    y_train = y_train_all[train_indices]
    X_val = X_train_all[val_indices]
    y_val = y_train_all[val_indices]

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    X_train = X_train.to(device).squeeze(-1)
    y_train = y_train.to(device).squeeze(-1)
    X_val = X_val.to(device).squeeze(-1)
    y_val = y_val.to(device).squeeze(-1)
    X_test = X_test.to(device).squeeze(-1)
    y_test = y_test.to(device).squeeze(-1)

    model = FCNN(10,100,1).to(device)
    model 


    optimizer = optim.Adam(model.parameters())
    loss_fn = nn.MSELoss()
    loader = data.DataLoader(data.TensorDataset(X_train, y_train), shuffle=True, batch_size=BATCHSIZE)
    early_stopping = EarlyStopping(patience=10, min_delta=0.001)
    n_epochs = 1000
    for epoch in range(n_epochs):
        model.train()
        for X_batch, y_batch in loader:
            # X_batch = X_batch.squeeze(-1)
            # y_batch = y_batch.squeeze(-1)
            # print(X_batch.shape,y_batch.shape)
            # X_batch = X_batch.to(device)
            # y_batch = y_batch.to(device)
            y_pred = model(X_batch)
            y_pred_val = model(X_val)
            # print(y_pred.shape,y_batch.shape)
            loss_train = loss_fn(y_pred, y_batch)
            
            optimizer.zero_grad()
            loss_train.backward()
            optimizer.step()
        # Validation
        loss_val = loss_fn(y_pred_val, y_val)
        early_stopping(loss_val, model, optimizer, epoch)

        if early_stopping.early_stop:
            print("Early stopping")
            break

    model.eval()
    with torch.no_grad():
        # X_train = X_train.to(device)
        # y_train = y_train.to(device)
        y_pred_train = model(X_train)
        # X_test = X_test.to(device)
        # y_test = y_test.to(device)
        train_rmse = torch.sqrt(loss_fn(y_pred_train, y_train))
        y_pred_test = model(X_test)
        test_rmse = torch.sqrt(loss_fn(y_pred_test, y_test))
    print("Epoch %d: train RMSE %.4f, test RMSE %.4f" % (epoch, train_rmse, test_rmse))
    end_time = time.time()
    print(f"训练时间: {end_time - begin_time} 秒")

    predict_value = model(X_test)
    predict_value = predict_value.detach().cpu().numpy().flatten()


    df_result_tmp = pd.DataFrame([[split_index, predict_value[0],y_test[0].flatten().cpu().numpy()[0]]],columns=['split_index','predict','true'])
    df_result_tmp
    # df_result = pd.concat([df_result,df_result_tmp],ignore_index=True)
    df_result_tmp.to_csv(f'./data/fcnn_result_{LOOKBACK}_{BATCHSIZE}.csv',index=False,mode='a')
    
    del model
    del optimizer
    del loader
    del early_stopping
    del X_train
    del y_train
    del X_train_all
    del y_train_all
    del X_val
    del y_val
    del X_test
    del y_test
    del y_pred_train
    del y_pred_test
    del y_pred_val
    del y_pred
    del y_batch
    del X_batch
    torch.cuda.empty_cache()
    

  return torch.tensor(X_train), torch.tensor(y_train),torch.tensor(X_test), torch.tensor(y_test)


torch.Size([39237, 10, 1]) torch.Size([39237, 1, 1])
torch.Size([19316, 10, 1]) torch.Size([19316, 1, 1])


  from .autonotebook import tqdm as notebook_tqdm


Early stopping
Epoch 54: train RMSE 0.7571, test RMSE 2.0222
训练时间: 27.878331422805786 秒
torch.Size([39337, 10, 1]) torch.Size([39337, 1, 1])
torch.Size([19216, 10, 1]) torch.Size([19216, 1, 1])
Early stopping
Epoch 35: train RMSE 0.7612, test RMSE 2.0970
训练时间: 44.12215828895569 秒
torch.Size([39437, 10, 1]) torch.Size([39437, 1, 1])
torch.Size([19116, 10, 1]) torch.Size([19116, 1, 1])
Early stopping
Epoch 20: train RMSE 0.8482, test RMSE 2.5935
训练时间: 54.96302390098572 秒
torch.Size([39537, 10, 1]) torch.Size([39537, 1, 1])
torch.Size([19016, 10, 1]) torch.Size([19016, 1, 1])
Early stopping
Epoch 11: train RMSE 0.8585, test RMSE 1.5250
训练时间: 61.00601840019226 秒
torch.Size([39637, 10, 1]) torch.Size([39637, 1, 1])
torch.Size([18916, 10, 1]) torch.Size([18916, 1, 1])
Early stopping
Epoch 29: train RMSE 0.8506, test RMSE 1.9174
训练时间: 74.72605633735657 秒
torch.Size([39737, 10, 1]) torch.Size([39737, 1, 1])
torch.Size([18816, 10, 1]) torch.Size([18816, 1, 1])
Early stopping
Epoch 31: train RMS

In [6]:

# 训练函数
def train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs, early_stopping):
    model = model.to(device)
    
    for epoch in range(num_epochs):
        # 训练阶段
        model.train()
        train_loss = 0
        for inputs, targets in train_loader:
            inputs, targets = inputs.to(device), targets.to(device)
            
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            loss.backward()
            optimizer.step()
            
            train_loss += loss.item()
            
        train_loss = train_loss / len(train_loader)
        
        # 验证阶段
        model.eval()
        val_loss = 0
        with torch.no_grad():
            for inputs, targets in val_loader:
                inputs, targets = inputs.to(device), targets.to(device)
                outputs = model(inputs)
                loss = criterion(outputs, targets)
                val_loss += loss.item()
                
        val_loss = val_loss / len(val_loader)
        
        print(f'Epoch [{epoch+1}/{num_epochs}], Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}')
        
        # 早停检查
        early_stopping(val_loss, model, optimizer, epoch)
        if early_stopping.early_stop:
            print("Early stopping triggered")
            break
            
    return model

# 预测函数
def predict(model, test_loader):
    model.eval()
    predictions = []
    
    with torch.no_grad():
        for inputs in test_loader:
            inputs = inputs.to(device)
            outputs = model(inputs)
            predictions.extend(outputs.cpu().numpy())
            
    return np.array(predictions)

# 使用示例：
"""
# 初始化模型和训练参数
input_size = YOUR_INPUT_SIZE
hidden_size = 64
output_size = YOUR_OUTPUT_SIZE
learning_rate = 0.001
num_epochs = 100

model = FCNN(input_size, hidden_size, output_size)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
early_stopping = EarlyStopping(patience=10, verbose=True)

# 训练模型
trained_model = train_model(model, train_loader, val_loader, criterion, 
                          optimizer, num_epochs, early_stopping)

# 进行预测
predictions = predict(trained_model, test_loader)
"""


'\n# 初始化模型和训练参数\ninput_size = YOUR_INPUT_SIZE\nhidden_size = 64\noutput_size = YOUR_OUTPUT_SIZE\nlearning_rate = 0.001\nnum_epochs = 100\n\nmodel = FCNN(input_size, hidden_size, output_size)\ncriterion = nn.MSELoss()\noptimizer = optim.Adam(model.parameters(), lr=learning_rate)\nearly_stopping = EarlyStopping(patience=10, verbose=True)\n\n# 训练模型\ntrained_model = train_model(model, train_loader, val_loader, criterion, \n                          optimizer, num_epochs, early_stopping)\n\n# 进行预测\npredictions = predict(trained_model, test_loader)\n'