<a href="https://colab.research.google.com/github/ucsd-cse-spis-2025/SPIS25-Lauren-Diana-Final_Project/blob/main/SPIS_Stock_MLP.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [190]:
import kagglehub

path = kagglehub.dataset_download("iamtanmayshukla/tesla-stocks-dataset")

print("Path to dataset files:", path)

Path to dataset files: /kaggle/input/tesla-stocks-dataset


In [191]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report

In [192]:
df = pd.read_csv("/root/.cache/kagglehub/datasets/iamtanmayshukla/tesla-stocks-dataset/versions/5/HistoricalData_1726367135218.csv")
#df = df[df["Name"] == "AAPL"]

In [193]:
names = ['Close/Last', 'Open', 'High', 'Low']

for col in names:
  df[col] = df[col].replace('[/$]', '', regex=True).astype(float)

In [194]:
splitted = df['Date'].str.split('/', expand=True)

df['day'] = splitted[1].astype('int')
df['month'] = splitted[0].astype('int')
df['year'] = splitted[2].astype('int')

In [195]:
# Feature engineering
df['open-close'] = df['Open'] - df['Close/Last']
df['low-high'] = df['Low'] - df['High']
df['daily_return'] = df['Close/Last'].pct_change()
df['is_quarter_end'] = np.where(df['month'] % 3 == 0, 1, 0)

In [196]:
#!pip install ta
import ta  # technical analysis library

df['rsi'] = ta.momentum.RSIIndicator(df['Close/Last'], window=14).rsi()
df['macd'] = ta.trend.MACD(df['Close/Last']).macd_diff()
df['bollinger_h'] = ta.volatility.BollingerBands(df['Close/Last']).bollinger_hband()
df['bollinger_l'] = ta.volatility.BollingerBands(df['Close/Last']).bollinger_lband()
df['ema_12'] = ta.trend.EMAIndicator(df['Close/Last'], window=12).ema_indicator()
df['ema_26'] = ta.trend.EMAIndicator(df['Close/Last'], window=26).ema_indicator()

In [197]:
# Drop NA from pct_change
df = df.dropna()

In [198]:
df.head()

Unnamed: 0,Date,Close/Last,Volume,Open,High,Low,day,month,year,open-close,low-high,daily_return,is_quarter_end,rsi,macd,bollinger_h,bollinger_l,ema_12,ema_26
33,07/29/2024,232.1,129201800,224.9,234.27,224.7,29,7,2024,-7.2,-9.57,0.042584,0,59.968837,3.994924,235.264484,188.382516,215.220166,214.379828
34,07/26/2024,219.8,94604150,221.19,222.2799,215.33,26,7,2024,1.39,-6.9499,-0.052994,0,52.637426,3.438416,235.201855,188.393145,215.924756,214.781322
35,07/25/2024,220.25,100636500,216.8,226.0,216.231,25,7,2024,-3.45,-9.769,0.002047,0,52.864466,2.959001,235.959785,188.594215,216.590178,215.18641
36,07/24/2024,215.99,167942900,225.42,225.99,214.71,24,7,2024,9.43,-11.28,-0.019342,0,50.401316,2.245712,235.127918,188.698082,216.497843,215.245935
37,07/23/2024,246.38,111928200,253.6,255.7594,245.63,23,7,2024,7.22,-10.1294,0.140701,0,63.475554,3.629392,240.622514,185.731486,221.095098,217.552162


In [199]:
# Target: 1 if next day close is higher
df['target'] = np.where(df['Close/Last'].shift(-1) > df['Close/Last'], 1, 0)
df = df.dropna()

# Define feature columns
feature_cols = ['open-close', 'low-high', 'daily_return', 'Volume', 'is_quarter_end']
X = df[feature_cols]
y = df['target']

In [200]:
# Scale
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [201]:
# Train/Test Split
train_size = int(len(X_scaled) * 0.8)
X_train, X_valid = X_scaled[:train_size], X_scaled[train_size:]
y_train, y_valid = y[:train_size], y[train_size:]

In [202]:
# Convert to tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32).unsqueeze(1)
X_valid_tensor = torch.tensor(X_valid, dtype=torch.float32)
y_valid_tensor = torch.tensor(y_valid.values, dtype=torch.float32).unsqueeze(1)

In [203]:
class StrongerMLP(nn.Module):
    def __init__(self, input_dim):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(input_dim, 128),
            nn.ReLU(),
            nn.Dropout(0.4),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Dropout(0.4),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, 1)
        )

    def forward(self, x):
        return self.net(x)

model = StrongerMLP(input_dim=X_train.shape[1])


In [204]:
criterion = nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-4)


In [205]:
epochs = 10
for epoch in range(epochs):
    model.train()
    optimizer.zero_grad()
    outputs = model(X_train_tensor)
    loss = criterion(outputs, y_train_tensor)
    loss.backward()
    optimizer.step()

    with torch.no_grad():
        train_preds = torch.sigmoid(outputs)
        train_preds_cls = (train_preds > 0.5).float()
        train_acc = accuracy_score(y_train_tensor.numpy(), train_preds_cls.numpy())

    # Validation
    model.eval()
    with torch.no_grad():
        val_outputs = model(X_valid_tensor)
        val_preds = torch.sigmoid(val_outputs)
        val_preds_cls = (val_preds > 0.5).float()
        val_acc = accuracy_score(y_valid_tensor, val_preds_cls)

    print(f"Epoch {epoch+1}/{epochs}, Loss: {loss.item():.4f}, Train Acc: {train_acc:.4f}, Val Accuracy: {val_acc:.4f}")


Epoch 1/10, Loss: 0.6945, Train Acc: 0.4917, Val Accuracy: 0.5010
Epoch 2/10, Loss: 0.6894, Train Acc: 0.5340, Val Accuracy: 0.5131
Epoch 3/10, Loss: 0.6858, Train Acc: 0.5888, Val Accuracy: 0.5372
Epoch 4/10, Loss: 0.6814, Train Acc: 0.5969, Val Accuracy: 0.5614
Epoch 5/10, Loss: 0.6777, Train Acc: 0.6311, Val Accuracy: 0.5835
Epoch 6/10, Loss: 0.6730, Train Acc: 0.6467, Val Accuracy: 0.5734
Epoch 7/10, Loss: 0.6689, Train Acc: 0.6688, Val Accuracy: 0.5694
Epoch 8/10, Loss: 0.6662, Train Acc: 0.6568, Val Accuracy: 0.5614
Epoch 9/10, Loss: 0.6602, Train Acc: 0.6558, Val Accuracy: 0.5674
Epoch 10/10, Loss: 0.6564, Train Acc: 0.6593, Val Accuracy: 0.5654


In [206]:
model.eval()
with torch.no_grad():
    preds = model(X_valid_tensor)
    preds_prob = torch.sigmoid(preds)
    preds_cls = (preds_prob > 0.5).float()

print("Classification Report:")
print(classification_report(y_valid_tensor, preds_cls))


Classification Report:
              precision    recall  f1-score   support

         0.0       0.56      0.62      0.59       247
         1.0       0.58      0.51      0.54       250

    accuracy                           0.57       497
   macro avg       0.57      0.57      0.56       497
weighted avg       0.57      0.57      0.56       497

