In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
!pip install torch
from sklearn.preprocessing import MinMaxScaler



In [2]:
import torch

In [3]:
raw_data = pd.read_csv(r'C:\Users\vijay\Downloads\kaggle_projects\stock_market_data_tickers\stock_data.csv')

In [4]:
raw_data.head(5)

Unnamed: 0,Date,Ticker,Open,High,Low,Close,Volume,Dividends,Stock Splits
0,1962-01-02,ED,0.0,0.265828,0.261788,0.261788,25600.0,0.0,0.0
1,1962-01-02,CVX,0.0,0.046809,0.046069,0.046809,105840.0,0.0,0.0
2,1962-01-02,GD,0.0,0.210033,0.203061,0.20829,2648000.0,0.0,0.0
3,1962-01-02,BP,0.0,0.141439,0.139528,0.139528,77440.0,0.0,0.0
4,1962-01-02,MSI,0.0,0.764923,0.745254,0.75181,65671.0,0.0,0.0


In [5]:
def load_data(raw_data, ticker):
    ticker_data = raw_data[raw_data['Ticker'] == ticker]
    ticker_data.loc[:,'Date'] = pd.to_datetime(ticker_data['Date'])
    ticker_data = ticker_data.sort_values('Date')
    return ticker_data[['Date', 'Close']]

In [6]:
def preprocess_data(data, time_steps=60):
    scaler = MinMaxScaler();
    scaled_data = scaler.fit_transform(data[['Close']])
    
    X,y = [], []
    
    for i in range(time_steps, len(scaled_data)):
        X.append(scaled_data[i-time_steps:i, 0])
        y.append(scaled_data[i,0])
        
    return np.array(X), np.array(y), scaler

In [7]:
from torch.utils.data import DataLoader, Dataset

In [8]:
class TimeSeriesDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.tensor(X, dtype=torch.float32)
        self.y = torch.tensor(y, dtype=torch.float32)

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

In [9]:
import torch.nn as nn

In [10]:
class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers):
        super(LSTMModel, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, 1)

    def forward(self, x):
        _, (hidden, _) = self.lstm(x)
        return self.fc(hidden[-1])

In [11]:
def forward(self, x):
    lstm_out, _ = self.lstm(x)
    
    last_output = lstm_out[:, -1, :]  
    out = self.fc(last_output)       
    return out


In [12]:
def train_model(model, dataloader, epochs, criterion, optimizer, device):
    model.to(device)
    for epoch in range(epochs):
        model.train()
        epoch_loss = 0
        for X_batch, y_batch in dataloader:
            X_batch, y_batch = X_batch.to(device), y_batch.to(device)
            optimizer.zero_grad()
            predictions = model(X_batch).squeeze()
            y_batch = y_batch.view(-1)
            loss = criterion(predictions, y_batch)
            loss.backward()
            optimizer.step()
            epoch_loss += loss.item()
        print(f"Epoch {epoch+1}/{epochs}, Loss: {epoch_loss:.4f}")

In [13]:
def predict_and_visualize(model, X_test, y_test, scaler, device):
    model.eval()
    with torch.no_grad():
        X_test = torch.tensor(X_test, dtype=torch.float32).to(device)
        predictions = model(X_test).cpu().numpy()
        predictions = scaler.inverse_transform(predictions)
        y_test = scaler.inverse_transform(y_test.reshape(-1, 1))
    
    # Visualization
    plt.plot(y_test, color='blue', label='Actual Prices')
    plt.plot(predictions, color='red', label='Predicted Prices')
    plt.legend()
    plt.show()

In [14]:
file_path = pd.read_csv(r'C:\Users\vijay\Downloads\kaggle_projects\stock_market_data_tickers\stock_data.csv')
ticker = 'AAPL'
data = load_data(file_path, ticker)

X, y, scaler = preprocess_data(data)
dataset = TimeSeriesDataset(X, y)
dataloader = DataLoader(dataset, batch_size=32, shuffle=True)

input_size = 60
hidden_size = 64
num_layers = 2
epochs = 20
learning_rate = 0.001

device = 'cuda' if torch.cuda.is_available() else 'cpu'
model = LSTMModel(input_size, hidden_size, num_layers)
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

train_model(model, dataloader, epochs, criterion, optimizer, device)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ticker_data.loc[:,'Date'] = pd.to_datetime(ticker_data['Date'])
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)


Epoch 1/20, Loss: 14.5081
Epoch 2/20, Loss: 14.1732
Epoch 3/20, Loss: 14.1416
Epoch 4/20, Loss: 14.1459
Epoch 5/20, Loss: 14.1585
Epoch 6/20, Loss: 14.1818
Epoch 7/20, Loss: 14.1068
Epoch 8/20, Loss: 14.1316
Epoch 9/20, Loss: 14.1057
Epoch 10/20, Loss: 14.1895
Epoch 11/20, Loss: 14.2263
Epoch 12/20, Loss: 14.1854
Epoch 13/20, Loss: 14.1316
Epoch 14/20, Loss: 14.2191
Epoch 15/20, Loss: 14.1475
Epoch 16/20, Loss: 14.1324
Epoch 17/20, Loss: 14.1499
Epoch 18/20, Loss: 14.1223
Epoch 19/20, Loss: 14.1453
Epoch 20/20, Loss: 14.1460


In [15]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader

# Load and preprocess data
def load_data(filepath, ticker):
    # Load data
    data = pd.read_csv(filepath)
    
    # Filter by ticker
    ticker_data = data[data['Ticker'] == ticker].copy()
    ticker_data['Date'] = pd.to_datetime(ticker_data['Date'])
    ticker_data.sort_values('Date', inplace=True)

    # Normalize numerical columns
    scaler = MinMaxScaler()
    ticker_data[['Open', 'High', 'Low', 'Close', 'Volume']] = scaler.fit_transform(
        ticker_data[['Open', 'High', 'Low', 'Close', 'Volume']]
    )
    
    return ticker_data, scaler

# Create dataset for LSTM
class StockDataset(Dataset):
    def __init__(self, data, target_col, window_size=60):
        self.data = data
        self.target_col = target_col
        self.window_size = window_size

    def __len__(self):
        return len(self.data) - self.window_size

    def __getitem__(self, idx):
        x = self.data.iloc[idx:idx + self.window_size, :].values
        y = self.data.iloc[idx + self.window_size][self.target_col]
        return torch.tensor(x, dtype=torch.float32), torch.tensor(y, dtype=torch.float32)

# Define the LSTM model
class StockLSTM(nn.Module):
    def __init__(self, input_size=5, hidden_size=128, num_layers=2):
        super(StockLSTM, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, 1)

    def forward(self, x):
        _, (hidden, _) = self.lstm(x)
        return self.fc(hidden[-1])

# Train the model
def train_model(model, dataloader, epochs, criterion, optimizer, device):
    model.train()
    for epoch in range(epochs):
        epoch_loss = 0
        for X_batch, y_batch in dataloader:
            X_batch, y_batch = X_batch.to(device), y_batch.to(device)
            
            # Reset gradients
            optimizer.zero_grad()

            # Forward pass
            predictions = model(X_batch).squeeze()

            # Compute loss
            loss = criterion(predictions, y_batch)

            # Backward pass
            loss.backward()
            optimizer.step()

            epoch_loss += loss.item()

        print(f"Epoch {epoch + 1}/{epochs}, Loss: {epoch_loss / len(dataloader):.4f}")

# Predict future stock prices
def predict(model, data, window_size, device):
    model.eval()
    with torch.no_grad():
        last_sequence = data.iloc[-window_size:, :].values
        last_sequence = torch.tensor(last_sequence, dtype=torch.float32).unsqueeze(0).to(device)
        prediction = model(last_sequence).item()
    return prediction

# Main execution
if __name__ == "__main__":
    # Filepath and ticker
    filepath = r'C:\Users\vijay\Downloads\kaggle_projects\stock_market_data_tickers\stock_data.csv'
    ticker = 'AAPL'

    # Load data
    ticker_data, scaler = load_data(filepath, ticker)

    # Prepare dataset and dataloader
    window_size = 60
    dataset = StockDataset(ticker_data[['Open', 'High', 'Low', 'Close', 'Volume']], target_col='Close', window_size=window_size)
    dataloader = DataLoader(dataset, batch_size=32, shuffle=True)

    # Define model, loss, optimizer
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = StockLSTM(input_size=5).to(device)
    criterion = nn.MSELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)

    # Train model
    epochs = 3
    train_model(model, dataloader, epochs, criterion, optimizer, device)

    # Predict future stock price
    future_price = predict(model, ticker_data[['Open', 'High', 'Low', 'Close', 'Volume']], window_size, device)
    future_price = scaler.inverse_transform([[0, 0, 0, future_price, 0]])[0][3]  # Inverse transform to get actual price
    print(f"Predicted Future Close Price for {ticker}: {future_price:.2f}")


Epoch 1/3, Loss: 0.0094
Epoch 2/3, Loss: 0.0001
Epoch 3/3, Loss: 0.0001
Predicted Future Close Price for AAPL: 222.75


In [16]:
torch.save(model.state_dict(), "stock_model.pth")
