In [None]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

data_file = '../data/data.csv'
df = pd.read_csv(data_file, parse_dates=True, index_col=0)

feature_columns = [col for col in df.columns if col != 'target']
X = df[feature_columns].values
y = df['target'].values

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

split_index = int(0.8 * len(X_scaled))
X_train = X_scaled[:split_index]
X_test = X_scaled[split_index:]
y_train = y[:split_index]
y_test = y[split_index:]

# noise = np.random.normal(0, 0.01, X_train.shape)
# X_train = X_train + noise


X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32).unsqueeze(1)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32).unsqueeze(1)

batch_size = 32
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

class TradingNet(nn.Module):
    def __init__(self, input_dim):
        super(TradingNet, self).__init__()
        self.net = nn.Sequential(
            nn.Linear(input_dim, 128),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, 1),
            nn.Sigmoid()
        )
        
    def forward(self, x):
        return self.net(x)

input_dim = X_train.shape[1]
model = TradingNet(input_dim).to(device)

criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-5)


def evaluate(model, X_tensor, y_tensor):
    model.eval()
    with torch.no_grad():
        outputs = model(X_tensor.to(device))
        loss = criterion(outputs, y_tensor.to(device))
        preds = (outputs > 0.5).float()
        accuracy = (preds.eq(y_tensor.to(device)).sum().item()) / len(y_tensor)
    return loss.item(), accuracy

epochs = 100
for epoch in range(1, epochs + 1):
    model.train()
    running_loss = 0.0
    for batch_X, batch_y in train_loader:
        batch_X, batch_y = batch_X.to(device), batch_y.to(device)
        outputs = model(batch_X)
        loss = criterion(outputs, batch_y)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        running_loss += loss.item() * batch_X.size(0)
    
    epoch_loss = running_loss / len(train_loader.dataset)
    
    if epoch % 10 == 0 or epoch == 1:
        train_loss_epoch, train_acc_epoch = evaluate(model, X_train_tensor, y_train_tensor)
        print(f"Epoch {epoch}/{epochs} - Loss: {epoch_loss:.4f} - Training Accuracy: {train_acc_epoch:.2f}")

train_loss, train_acc = evaluate(model, X_train_tensor, y_train_tensor)
test_loss, test_acc = evaluate(model, X_test_tensor, y_test_tensor)
print(f"\nFinal Training Loss: {train_loss:.4f}, Training Accuracy: {train_acc:.2f}")
print(f"Final Test Loss: {test_loss:.4f}, Test Accuracy: {test_acc:.2f}")

model.eval()
with torch.no_grad():
    y_pred_prob = model(X_test_tensor.to(device))
    y_pred = (y_pred_prob > 0.5).float().cpu().numpy().flatten()

if 'close' in df.columns:
    analysis_df = df.iloc[split_index:].copy()
    analysis_df['predicted_direction'] = y_pred
    analysis_df['next_day_return'] = analysis_df['close'].pct_change().shift(-1)
    analysis_df['strategy_return'] = analysis_df['next_day_return'] * analysis_df['predicted_direction']
    analysis_df['cumulative_strategy_return'] = (1 + analysis_df['strategy_return']).cumprod()
    analysis_df['cumulative_market_return'] = (1 + analysis_df['next_day_return']).cumprod()
    strategy_return = analysis_df['cumulative_strategy_return'].iloc[-2] - 1
    market_return = analysis_df['cumulative_market_return'].iloc[-2] - 1
    print(f"Strategy Return: {strategy_return:.2%}")
    print(f"Market Return: {market_return:.2%}")


Using device: cpu
Epoch 1/100 - Loss: 0.6932 - Training Accuracy: 0.54
Epoch 10/100 - Loss: 0.6634 - Training Accuracy: 0.61
Epoch 20/100 - Loss: 0.6307 - Training Accuracy: 0.66
Epoch 30/100 - Loss: 0.5934 - Training Accuracy: 0.73
Epoch 40/100 - Loss: 0.5636 - Training Accuracy: 0.76
Epoch 50/100 - Loss: 0.5422 - Training Accuracy: 0.78
Epoch 60/100 - Loss: 0.5108 - Training Accuracy: 0.80
Epoch 70/100 - Loss: 0.5000 - Training Accuracy: 0.82
Epoch 80/100 - Loss: 0.4806 - Training Accuracy: 0.83
Epoch 90/100 - Loss: 0.4814 - Training Accuracy: 0.84
Epoch 100/100 - Loss: 0.4614 - Training Accuracy: 0.84

Final Training Loss: 0.3541, Training Accuracy: 0.84
Final Test Loss: 0.8396, Test Accuracy: 0.52
Strategy Return: 193.91%
Market Return: -28.49%


In [5]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import TimeSeriesSplit
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader
import torch.nn.functional as F

In [1]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader

# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Load data
data_file = '/Users/devshah/Documents/WorkSpace/University/year 3/CSC392/Trading_Simulator/data/data.csv'
df = pd.read_csv(data_file, parse_dates=True, index_col=0)

feature_columns = [col for col in df.columns if col != 'target']
X = df[feature_columns].values
y = df['target'].values

# Standard scaling
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Train-test split
split_index = int(0.8 * len(X_scaled))
X_train = X_scaled[:split_index]
X_test = X_scaled[split_index:]
y_train = y[:split_index]
y_test = y[split_index:]

# Convert to tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32).unsqueeze(1)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32).unsqueeze(1)

batch_size = 32
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

# ------------------------------
# Updated Model with BatchNorm and Increased Dropout
# ------------------------------
class TradingNet(nn.Module):
    def __init__(self, input_dim):
        super(TradingNet, self).__init__()
        self.net = nn.Sequential(
            nn.Linear(input_dim, 128),
            nn.BatchNorm1d(128),
            nn.ReLU(),
            nn.Dropout(0.5),  # Increased dropout
            nn.Linear(128, 64),
            nn.BatchNorm1d(64),
            nn.ReLU(),
            nn.Dropout(0.5),  # Increased dropout
            nn.Linear(64, 32),
            nn.BatchNorm1d(32),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(32, 1),
            nn.Sigmoid()
        )
        
    def forward(self, x):
        return self.net(x)

input_dim = X_train.shape[1]
model = TradingNet(input_dim).to(device)

criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-5)

# ------------------------------
# Evaluation Function
# ------------------------------
def evaluate(model, X_tensor, y_tensor):
    model.eval()
    with torch.no_grad():
        outputs = model(X_tensor.to(device))
        loss = criterion(outputs, y_tensor.to(device))
        preds = (outputs > 0.5).float()
        accuracy = (preds.eq(y_tensor.to(device)).sum().item()) / len(y_tensor)
    return loss.item(), accuracy

# ------------------------------
# Training Loop with Early Stopping
# ------------------------------
epochs = 100
best_val_loss = float('inf')
patience = 10
trigger_times = 0

for epoch in range(1, epochs + 1):
    model.train()
    running_loss = 0.0
    for batch_X, batch_y in train_loader:
        batch_X, batch_y = batch_X.to(device), batch_y.to(device)
        outputs = model(batch_X)
        loss = criterion(outputs, batch_y)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        running_loss += loss.item() * batch_X.size(0)
    
    epoch_loss = running_loss / len(train_loader.dataset)
    val_loss, val_acc = evaluate(model, X_test_tensor, y_test_tensor)
    
    # Early stopping check
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        trigger_times = 0
        # Optionally, save the model checkpoint here
    else:
        trigger_times += 1
        if trigger_times >= patience:
            print(f"Early stopping triggered at epoch {epoch}")
            break
    
    if epoch % 10 == 0 or epoch == 1:
        train_loss_epoch, train_acc_epoch = evaluate(model, X_train_tensor, y_train_tensor)
        print(f"Epoch {epoch}/{epochs} - Train Loss: {epoch_loss:.4f} - Train Acc: {train_acc_epoch:.2f} - Val Loss: {val_loss:.4f} - Val Acc: {val_acc:.2f}")

# ------------------------------
# Final Evaluation and Analysis
# ------------------------------
train_loss, train_acc = evaluate(model, X_train_tensor, y_train_tensor)
test_loss, test_acc = evaluate(model, X_test_tensor, y_test_tensor)
print(f"\nFinal Training Loss: {train_loss:.4f}, Training Accuracy: {train_acc:.2f}")
print(f"Final Test Loss: {test_loss:.4f}, Test Accuracy: {test_acc:.2f}")

model.eval()
with torch.no_grad():
    y_pred_prob = model(X_test_tensor.to(device))
    y_pred = (y_pred_prob > 0.5).float().cpu().numpy().flatten()

if 'close' in df.columns:
    analysis_df = df.iloc[split_index:].copy()
    analysis_df['predicted_direction'] = y_pred
    analysis_df['next_day_return'] = analysis_df['close'].pct_change().shift(-1)
    analysis_df['strategy_return'] = analysis_df['next_day_return'] * analysis_df['predicted_direction']
    analysis_df['cumulative_strategy_return'] = (1 + analysis_df['strategy_return']).cumprod()
    analysis_df['cumulative_market_return'] = (1 + analysis_df['next_day_return']).cumprod()
    strategy_return = analysis_df['cumulative_strategy_return'].iloc[-2] - 1
    market_return = analysis_df['cumulative_market_return'].iloc[-2] - 1
    print(f"Strategy Return: {strategy_return:.2%}")
    print(f"Market Return: {market_return:.2%}")


Using device: cpu
Epoch 1/100 - Train Loss: 0.7045 - Train Acc: 0.54 - Val Loss: 0.6922 - Val Acc: 0.53
Epoch 10/100 - Train Loss: 0.6891 - Train Acc: 0.57 - Val Loss: 0.6907 - Val Acc: 0.54
Early stopping triggered at epoch 20

Final Training Loss: 0.6703, Training Accuracy: 0.59
Final Test Loss: 0.6918, Test Accuracy: 0.53
Strategy Return: 437.57%
Market Return: -28.49%


In [4]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler, RobustScaler
from sklearn.model_selection import TimeSeriesSplit
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader
import torch.nn.functional as F

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Data loading and preprocessing
data_file = '/Users/devshah/Documents/WorkSpace/University/year 3/CSC392/Trading_Simulator/data/data.csv'
df = pd.read_csv(data_file, parse_dates=True, index_col=0)

# Feature engineering
def add_technical_indicators(df):
    # Moving averages
    df['SMA_5'] = df['close'].rolling(window=5).mean()
    df['SMA_20'] = df['close'].rolling(window=20).mean()
    df['SMA_50'] = df['close'].rolling(window=50).mean()
    
    # Relative Strength Index (RSI)
    delta = df['close'].diff()
    gain = (delta.where(delta > 0, 0)).rolling(window=14).mean()
    loss = (-delta.where(delta < 0, 0)).rolling(window=14).mean()
    rs = gain / loss
    df['RSI'] = 100 - (100 / (1 + rs))
    
    # MACD
    exp1 = df['close'].ewm(span=12, adjust=False).mean()
    exp2 = df['close'].ewm(span=26, adjust=False).mean()
    df['MACD'] = exp1 - exp2
    df['Signal_Line'] = df['MACD'].ewm(span=9, adjust=False).mean()
    
    # Volatility
    df['Volatility'] = df['close'].rolling(window=20).std()
    
    # Remove NaN values
    return df.dropna()

df = add_technical_indicators(df)

feature_columns = [col for col in df.columns if col != 'target']
X = df[feature_columns].values
y = df['target'].values

# Use RobustScaler instead of StandardScaler to handle outliers better
scaler = RobustScaler()
X_scaled = scaler.fit_transform(X)

# Time series split
tscv = TimeSeriesSplit(n_splits=5, test_size=int(0.2 * len(X_scaled)))
split = list(tscv.split(X_scaled))
train_index, test_index = split[-1]  # Get the last split
X_train, X_test = X_scaled[train_index], X_scaled[test_index]
y_train, y_test = y[train_index], y[test_index]



# for train_index, test_index in tscv:
#     X_train, X_test = X_scaled[train_index], X_scaled[test_index]
#     y_train, y_test = y[train_index], y[test_index]
#     break  # We'll use the last split

# Data augmentation with subtle variations
def augment_data(X, y, noise_factor=0.02):
    X_aug = np.concatenate([
        X,
        X + np.random.normal(0, noise_factor, X.shape),
        X * (1 + np.random.normal(0, noise_factor, X.shape))
    ])
    y_aug = np.concatenate([y, y, y])
    return X_aug, y_aug

X_train, y_train = augment_data(X_train, y_train)

# Convert to PyTorch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32).unsqueeze(1)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32).unsqueeze(1)

# Improved DataLoader with weighted sampling for imbalanced classes
class_weights = torch.tensor([1.0 / (y_train == i).mean() for i in [0, 1]])
sample_weights = torch.tensor([class_weights[int(t)] for t in y_train])
sampler = torch.utils.data.WeightedRandomSampler(sample_weights, len(sample_weights))

batch_size = 64  # Increased batch size
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
train_loader = DataLoader(train_dataset, batch_size=batch_size, sampler=sampler)

class ImprovedTradingNet(nn.Module):
    def __init__(self, input_dim):
        super(ImprovedTradingNet, self).__init__()
        
        self.batch_norm1 = nn.BatchNorm1d(input_dim)
        
        self.layer1 = nn.Sequential(
            nn.Linear(input_dim, 256),
            nn.BatchNorm1d(256),
            nn.LeakyReLU(),
            nn.Dropout(0.4)
        )
        
        self.layer2 = nn.Sequential(
            nn.Linear(256, 128),
            nn.BatchNorm1d(128),
            nn.LeakyReLU(),
            nn.Dropout(0.4)
        )
        
        self.layer3 = nn.Sequential(
            nn.Linear(128, 64),
            nn.BatchNorm1d(64),
            nn.LeakyReLU(),
            nn.Dropout(0.3)
        )
        
        self.output_layer = nn.Sequential(
            nn.Linear(64, 1),
            nn.Sigmoid()
        )
        
    def forward(self, x):
        x = self.batch_norm1(x)
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        return self.output_layer(x)

input_dim = X_train.shape[1]
model = ImprovedTradingNet(input_dim).to(device)

# Custom loss function combining BCE and focal loss
class CombinedLoss(nn.Module):
    def __init__(self, alpha=0.25, gamma=2.0):
        super().__init__()
        self.alpha = alpha
        self.gamma = gamma
        
    def forward(self, inputs, targets):
        bce_loss = F.binary_cross_entropy(inputs, targets, reduction='none')
        pt = torch.exp(-bce_loss)
        focal_loss = self.alpha * (1-pt)**self.gamma * bce_loss
        return (bce_loss + focal_loss).mean()

criterion = CombinedLoss()
optimizer = optim.AdamW(model.parameters(), lr=0.001, weight_decay=1e-4)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=5, verbose=True)

def evaluate(model, X_tensor, y_tensor, threshold=0.5):
    model.eval()
    with torch.no_grad():
        outputs = model(X_tensor.to(device))
        loss = criterion(outputs, y_tensor.to(device))
        preds = (outputs > threshold).float()
        accuracy = (preds.eq(y_tensor.to(device)).sum().item()) / len(y_tensor)
        
        # Calculate precision, recall, and F1 score
        tp = ((preds == 1) & (y_tensor.to(device) == 1)).sum().item()
        fp = ((preds == 1) & (y_tensor.to(device) == 0)).sum().item()
        fn = ((preds == 0) & (y_tensor.to(device) == 1)).sum().item()
        
        precision = tp / (tp + fp) if (tp + fp) > 0 else 0
        recall = tp / (tp + fn) if (tp + fn) > 0 else 0
        f1 = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0
        
    return loss.item(), accuracy, precision, recall, f1

# Training with early stopping
epochs = 200
best_val_loss = float('inf')
patience = 15
patience_counter = 0

for epoch in range(1, epochs + 1):
    model.train()
    running_loss = 0.0
    
    for batch_X, batch_y in train_loader:
        batch_X, batch_y = batch_X.to(device), batch_y.to(device)
        outputs = model(batch_X)
        loss = criterion(outputs, batch_y)
        
        optimizer.zero_grad()
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)  # Gradient clipping
        optimizer.step()
        
        running_loss += loss.item() * batch_X.size(0)
    
    epoch_loss = running_loss / len(train_loader.dataset)
    val_loss, val_acc, val_prec, val_rec, val_f1 = evaluate(model, X_test_tensor, y_test_tensor)
    
    scheduler.step(val_loss)
    
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        patience_counter = 0
        torch.save(model.state_dict(), 'best_model.pth')
    else:
        patience_counter += 1
    
    if patience_counter >= patience:
        print(f"Early stopping triggered at epoch {epoch}")
        break
    
    if epoch % 10 == 0:
        print(f"Epoch {epoch}/{epochs}")
        print(f"Train Loss: {epoch_loss:.4f}")
        print(f"Val Loss: {val_loss:.4f}, Acc: {val_acc:.3f}, F1: {val_f1:.3f}")

# Load best model and evaluate
model.load_state_dict(torch.load('best_model.pth'))
test_loss, test_acc, test_prec, test_rec, test_f1 = evaluate(model, X_test_tensor, y_test_tensor)
print(f"\nFinal Test Results:")
print(f"Accuracy: {test_acc:.3f}")
print(f"Precision: {test_prec:.3f}")
print(f"Recall: {test_rec:.3f}")
print(f"F1 Score: {test_f1:.3f}")

# Trading strategy evaluation
model.eval()
with torch.no_grad():
    y_pred_prob = model(X_test_tensor.to(device))
    y_pred = (y_pred_prob > 0.5).float().cpu().numpy().flatten()

if 'close' in df.columns:
    analysis_df = df.iloc[test_index].copy()
    analysis_df['predicted_direction'] = y_pred
    analysis_df['next_day_return'] = analysis_df['close'].pct_change().shift(-1)
    
    # Risk management: Only take trades with high confidence
    analysis_df['strategy_return'] = np.where(
        y_pred_prob.cpu().numpy() > 0.7,  # High confidence threshold
        analysis_df['next_day_return'] * analysis_df['predicted_direction'],
        0
    )
    
    # Add position sizing based on prediction confidence
    confidence_scores = y_pred_prob.cpu().numpy()
    analysis_df['position_size'] = np.where(
        confidence_scores > 0.7,
        (confidence_scores - 0.7) / 0.3,  # Scale position size based on confidence
        0
    )
    analysis_df['strategy_return'] *= analysis_df['position_size']
    
    # Calculate performance metrics
    analysis_df['cumulative_strategy_return'] = (1 + analysis_df['strategy_return']).cumprod()
    analysis_df['cumulative_market_return'] = (1 + analysis_df['next_day_return']).cumprod()
    
    # Calculate Sharpe Ratio (assuming risk-free rate of 2%)
    rf_rate = 0.02
    daily_rf = (1 + rf_rate) ** (1/252) - 1
    excess_returns = analysis_df['strategy_return'] - daily_rf
    sharpe_ratio = np.sqrt(252) * (excess_returns.mean() / excess_returns.std())
    
    # Maximum drawdown
    cumulative_returns = analysis_df['cumulative_strategy_return']
    rolling_max = cumulative_returns.expanding().max()
    drawdowns = (cumulative_returns - rolling_max) / rolling_max
    max_drawdown = drawdowns.min()
    
    strategy_return = analysis_df['cumulative_strategy_return'].iloc[-1] - 1
    market_return = analysis_df['cumulative_market_return'].iloc[-1] - 1
    
    print("\nTrading Performance Metrics:")
    print(f"Strategy Return: {strategy_return:.2%}")
    print(f"Market Return: {market_return:.2%}")
    print(f"Sharpe Ratio: {sharpe_ratio:.2f}")
    print(f"Maximum Drawdown: {max_drawdown:.2%}")

Using device: cpu
Epoch 00007: reducing learning rate of group 0 to 5.0000e-04.
Epoch 10/200
Train Loss: 0.6742
Val Loss: 0.7588, Acc: 0.533, F1: 0.621
Epoch 00013: reducing learning rate of group 0 to 2.5000e-04.
Early stopping triggered at epoch 16

Final Test Results:
Accuracy: 0.523
Precision: 0.529
Recall: 0.884
F1 Score: 0.662


ValueError: Expected a 1D array, got an array with shape (1257, 1257)