# End-to-End Trading Project
## Часть 4: Deep Learning

### В этом ноутбуке:

1. **Подготовка последовательностей** для временных рядов
2. **LSTM** - Long Short-Term Memory
3. **1D CNN** - свёрточная сеть для паттернов
4. **CNN-LSTM** - комбинация подходов
5. **Сравнение с baseline**

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, roc_auc_score, f1_score
import json
import warnings
warnings.filterwarnings('ignore')

plt.style.use('seaborn-v0_8-whitegrid')
np.random.seed(42)
torch.manual_seed(42)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'Device: {device}')
print(f'PyTorch: {torch.__version__}')

In [None]:
# Загружаем данные
data_dir = 'data'
df = pd.read_parquet(f'{data_dir}/processed_data.parquet')

with open(f'{data_dir}/feature_sets.json', 'r') as f:
    feature_sets = json.load(f)

print(f'Загружено: {len(df):,} записей')

## 1. Подготовка Последовательностей

Для RNN/CNN нужны последовательности фиксированной длины.

In [None]:
class TradingSequenceDataset(Dataset):
    """
    Dataset для последовательностей торговых данных.
    """
    def __init__(self, df, feature_cols, target_col, seq_length=30, scaler=None):
        self.seq_length = seq_length
        self.sequences = []
        self.targets = []
        
        # Обрабатываем каждую акцию отдельно
        for ticker in df['ticker'].unique():
            ticker_df = df[df['ticker'] == ticker].sort_values('date')
            
            features = ticker_df[feature_cols].values
            targets = ticker_df[target_col].values
            
            # Создаём последовательности
            for i in range(len(ticker_df) - seq_length):
                seq = features[i:i + seq_length]
                target = targets[i + seq_length - 1]
                
                if not np.isnan(seq).any() and not np.isnan(target):
                    self.sequences.append(seq)
                    self.targets.append(target)
        
        self.sequences = np.array(self.sequences)
        self.targets = np.array(self.targets)
        
        # Нормализация
        if scaler is None:
            self.scaler = StandardScaler()
            original_shape = self.sequences.shape
            self.sequences = self.scaler.fit_transform(
                self.sequences.reshape(-1, len(feature_cols))
            ).reshape(original_shape)
        else:
            self.scaler = scaler
            original_shape = self.sequences.shape
            self.sequences = self.scaler.transform(
                self.sequences.reshape(-1, len(feature_cols))
            ).reshape(original_shape)
    
    def __len__(self):
        return len(self.sequences)
    
    def __getitem__(self, idx):
        return (
            torch.FloatTensor(self.sequences[idx]),
            torch.FloatTensor([self.targets[idx]])
        )

# Параметры
feature_cols = feature_sets['extended_features']
feature_cols = [f for f in feature_cols if f in df.columns]
target_col = 'target_direction_1d'
seq_length = 30

print(f'Признаков: {len(feature_cols)}')
print(f'Длина последовательности: {seq_length}')

In [None]:
# Time-based split
df = df.sort_values('date')
n_samples = len(df)
train_end = int(n_samples * 0.6)
val_end = int(n_samples * 0.8)

train_df = df.iloc[:train_end]
val_df = df.iloc[train_end:val_end]
test_df = df.iloc[val_end:]

# Создаём datasets
train_dataset = TradingSequenceDataset(train_df, feature_cols, target_col, seq_length)
val_dataset = TradingSequenceDataset(val_df, feature_cols, target_col, seq_length, 
                                     scaler=train_dataset.scaler)
test_dataset = TradingSequenceDataset(test_df, feature_cols, target_col, seq_length,
                                      scaler=train_dataset.scaler)

print(f'Train sequences: {len(train_dataset):,}')
print(f'Val sequences: {len(val_dataset):,}')
print(f'Test sequences: {len(test_dataset):,}')

# DataLoaders
batch_size = 64
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

# Проверяем размеры
sample_x, sample_y = next(iter(train_loader))
print(f'\nBatch X shape: {sample_x.shape}')  # [batch, seq, features]
print(f'Batch Y shape: {sample_y.shape}')

## 2. LSTM Model

In [None]:
class LSTMClassifier(nn.Module):
    """
    LSTM для классификации направления цены.
    """
    def __init__(self, input_dim, hidden_dim=64, num_layers=2, dropout=0.2):
        super().__init__()
        
        self.lstm = nn.LSTM(
            input_size=input_dim,
            hidden_size=hidden_dim,
            num_layers=num_layers,
            batch_first=True,
            dropout=dropout if num_layers > 1 else 0,
            bidirectional=False
        )
        
        self.fc = nn.Sequential(
            nn.Linear(hidden_dim, 32),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(32, 1),
            nn.Sigmoid()
        )
        
    def forward(self, x):
        # x: [batch, seq, features]
        lstm_out, (h_n, c_n) = self.lstm(x)
        # Берём последний hidden state
        last_hidden = h_n[-1]  # [batch, hidden]
        out = self.fc(last_hidden)
        return out

# Инициализация
input_dim = len(feature_cols)
lstm_model = LSTMClassifier(input_dim, hidden_dim=64, num_layers=2, dropout=0.2).to(device)

print(f'LSTM параметров: {sum(p.numel() for p in lstm_model.parameters()):,}')

In [None]:
def train_model(model, train_loader, val_loader, epochs=30, lr=0.001):
    """
    Обучение модели с early stopping.
    """
    criterion = nn.BCELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=5, factor=0.5)
    
    train_losses = []
    val_losses = []
    best_val_loss = float('inf')
    patience_counter = 0
    
    for epoch in range(epochs):
        # Training
        model.train()
        train_loss = 0
        for batch_x, batch_y in train_loader:
            batch_x = batch_x.to(device)
            batch_y = batch_y.to(device)
            
            optimizer.zero_grad()
            output = model(batch_x)
            loss = criterion(output, batch_y)
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
            optimizer.step()
            train_loss += loss.item()
        
        train_loss /= len(train_loader)
        train_losses.append(train_loss)
        
        # Validation
        model.eval()
        val_loss = 0
        with torch.no_grad():
            for batch_x, batch_y in val_loader:
                batch_x = batch_x.to(device)
                batch_y = batch_y.to(device)
                output = model(batch_x)
                loss = criterion(output, batch_y)
                val_loss += loss.item()
        
        val_loss /= len(val_loader)
        val_losses.append(val_loss)
        scheduler.step(val_loss)
        
        # Early stopping
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            patience_counter = 0
            best_state = model.state_dict().copy()
        else:
            patience_counter += 1
        
        if patience_counter >= 10:
            print(f'Early stopping at epoch {epoch+1}')
            break
        
        if (epoch + 1) % 5 == 0:
            print(f'Epoch {epoch+1}: train_loss={train_loss:.4f}, val_loss={val_loss:.4f}')
    
    model.load_state_dict(best_state)
    return train_losses, val_losses

In [None]:
# Обучаем LSTM
print('Обучение LSTM...')
lstm_train_losses, lstm_val_losses = train_model(lstm_model, train_loader, val_loader, epochs=30)

## 3. 1D CNN Model

In [None]:
class CNNClassifier(nn.Module):
    """
    1D CNN для выявления локальных паттернов.
    """
    def __init__(self, input_dim, seq_length):
        super().__init__()
        
        # Convolutions
        self.conv1 = nn.Conv1d(input_dim, 64, kernel_size=3, padding=1)
        self.conv2 = nn.Conv1d(64, 128, kernel_size=3, padding=1)
        self.conv3 = nn.Conv1d(128, 64, kernel_size=3, padding=1)
        
        self.pool = nn.MaxPool1d(2)
        self.dropout = nn.Dropout(0.2)
        
        # Вычисляем размер после convolutions
        conv_out_size = seq_length // 4  # После 2 pooling слоёв
        
        self.fc = nn.Sequential(
            nn.Linear(64 * conv_out_size, 64),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(64, 1),
            nn.Sigmoid()
        )
        
    def forward(self, x):
        # x: [batch, seq, features] -> [batch, features, seq]
        x = x.transpose(1, 2)
        
        x = F.relu(self.conv1(x))
        x = self.pool(x)
        x = self.dropout(x)
        
        x = F.relu(self.conv2(x))
        x = self.pool(x)
        x = self.dropout(x)
        
        x = F.relu(self.conv3(x))
        
        x = x.flatten(1)
        out = self.fc(x)
        return out

# Инициализация
cnn_model = CNNClassifier(input_dim, seq_length).to(device)
print(f'CNN параметров: {sum(p.numel() for p in cnn_model.parameters()):,}')

# Обучаем
print('\nОбучение CNN...')
cnn_train_losses, cnn_val_losses = train_model(cnn_model, train_loader, val_loader, epochs=30)

## 4. CNN-LSTM Hybrid

In [None]:
class CNNLSTMClassifier(nn.Module):
    """
    Гибрид: CNN извлекает локальные паттерны, LSTM моделирует временные зависимости.
    """
    def __init__(self, input_dim, hidden_dim=64):
        super().__init__()
        
        # CNN для извлечения признаков
        self.conv1 = nn.Conv1d(input_dim, 64, kernel_size=3, padding=1)
        self.conv2 = nn.Conv1d(64, 32, kernel_size=3, padding=1)
        
        # LSTM для временных зависимостей
        self.lstm = nn.LSTM(
            input_size=32,
            hidden_size=hidden_dim,
            num_layers=1,
            batch_first=True
        )
        
        self.fc = nn.Sequential(
            nn.Linear(hidden_dim, 32),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(32, 1),
            nn.Sigmoid()
        )
        
    def forward(self, x):
        # x: [batch, seq, features]
        x = x.transpose(1, 2)  # [batch, features, seq]
        
        # CNN
        x = F.relu(self.conv1(x))
        x = F.relu(self.conv2(x))
        
        x = x.transpose(1, 2)  # [batch, seq, channels]
        
        # LSTM
        _, (h_n, _) = self.lstm(x)
        
        out = self.fc(h_n[-1])
        return out

# Инициализация и обучение
cnn_lstm_model = CNNLSTMClassifier(input_dim).to(device)
print(f'CNN-LSTM параметров: {sum(p.numel() for p in cnn_lstm_model.parameters()):,}')

print('\nОбучение CNN-LSTM...')
cnn_lstm_train_losses, cnn_lstm_val_losses = train_model(cnn_lstm_model, train_loader, val_loader, epochs=30)

## 5. Оценка Моделей

In [None]:
def evaluate_dl_model(model, loader, name='Model'):
    """
    Оценка DL модели.
    """
    model.eval()
    all_preds = []
    all_targets = []
    all_probs = []
    
    with torch.no_grad():
        for batch_x, batch_y in loader:
            batch_x = batch_x.to(device)
            output = model(batch_x)
            
            probs = output.cpu().numpy().flatten()
            preds = (probs > 0.5).astype(int)
            targets = batch_y.numpy().flatten()
            
            all_probs.extend(probs)
            all_preds.extend(preds)
            all_targets.extend(targets)
    
    metrics = {
        'accuracy': accuracy_score(all_targets, all_preds),
        'f1': f1_score(all_targets, all_preds),
        'roc_auc': roc_auc_score(all_targets, all_probs)
    }
    
    return metrics, all_preds, all_probs

# Оцениваем все модели
results = {}

for name, model in [('LSTM', lstm_model), ('CNN', cnn_model), ('CNN-LSTM', cnn_lstm_model)]:
    train_metrics, _, _ = evaluate_dl_model(model, train_loader)
    val_metrics, _, _ = evaluate_dl_model(model, val_loader)
    test_metrics, preds, probs = evaluate_dl_model(model, test_loader)
    
    results[name] = {
        'train': train_metrics,
        'val': val_metrics,
        'test': test_metrics
    }
    
    print(f'\n{name}:')
    print(f'  Test Accuracy: {test_metrics["accuracy"]:.4f}')
    print(f'  Test ROC-AUC: {test_metrics["roc_auc"]:.4f}')

In [None]:
# Визуализация
fig, axes = plt.subplots(1, 3, figsize=(15, 4))

# 1. Training curves
axes[0].plot(lstm_train_losses, label='LSTM train')
axes[0].plot(lstm_val_losses, label='LSTM val')
axes[0].plot(cnn_train_losses, label='CNN train', linestyle='--')
axes[0].plot(cnn_val_losses, label='CNN val', linestyle='--')
axes[0].set_xlabel('Epoch')
axes[0].set_ylabel('Loss')
axes[0].set_title('Training Progress')
axes[0].legend()

# 2. Model comparison
models = list(results.keys())
test_acc = [results[m]['test']['accuracy'] for m in models]
test_auc = [results[m]['test']['roc_auc'] for m in models]

x = np.arange(len(models))
width = 0.35
axes[1].bar(x - width/2, test_acc, width, label='Accuracy')
axes[1].bar(x + width/2, test_auc, width, label='ROC-AUC')
axes[1].set_xticks(x)
axes[1].set_xticklabels(models)
axes[1].set_ylabel('Score')
axes[1].set_title('Test Metrics')
axes[1].legend()
axes[1].set_ylim(0.45, 0.6)

# 3. Train vs Val vs Test
for i, model_name in enumerate(models):
    train_acc = results[model_name]['train']['accuracy']
    val_acc = results[model_name]['val']['accuracy']
    test_acc = results[model_name]['test']['accuracy']
    axes[2].bar(i*3, train_acc, label='Train' if i==0 else '')
    axes[2].bar(i*3+1, val_acc, label='Val' if i==0 else '')
    axes[2].bar(i*3+2, test_acc, label='Test' if i==0 else '')

axes[2].set_xticks([1, 4, 7])
axes[2].set_xticklabels(models)
axes[2].set_ylabel('Accuracy')
axes[2].set_title('Accuracy by Dataset')
axes[2].legend()
axes[2].set_ylim(0.45, 0.6)

plt.tight_layout()
plt.show()

In [None]:
# Сохраняем модели
import os
models_dir = 'models'
os.makedirs(models_dir, exist_ok=True)

torch.save(lstm_model.state_dict(), f'{models_dir}/lstm_model.pt')
torch.save(cnn_model.state_dict(), f'{models_dir}/cnn_model.pt')
torch.save(cnn_lstm_model.state_dict(), f'{models_dir}/cnn_lstm_model.pt')

# Сохраняем результаты
with open(f'{models_dir}/dl_results.json', 'w') as f:
    json.dump(results, f, indent=2)

print('Модели сохранены')

## Итоги

### Результаты:

- Deep Learning модели показывают схожие результаты с classical ML (~51-53%)
- Нет значительного переобучения
- CNN-LSTM комбинирует преимущества обоих подходов

### Выводы:

1. **LSTM** хорошо улавливает временные зависимости
2. **CNN** находит локальные паттерны
3. **CNN-LSTM** - лучший баланс
4. Сложность моделей не гарантирует лучший результат на эффективном рынке

### Следующий шаг:

В ноутбуке 05 попробуем TFT - state-of-the-art для временных рядов с:
- Attention механизмами
- Variable Selection
- Quantile прогнозы