In [98]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
from torch.utils.data import Dataset
from torch.utils.data import DataLoader, TensorDataset

In [171]:
df = pd.read_csv("./data/processed/Mastercard_stock_history_processed.csv")
df.head()
# df = df[["Open", "Volume",  "lag_1","lag_2","MA","M_STD", "month", "day","quarter","Close"]]
df = df[["Volume", "Open", "day", "MA", "M_STD", "day", "quarter", "Close"]]

In [172]:
train_df = df[:-100]
test_df = df[-100:]

In [178]:
def create_dataset(dataset, n):
    """Transform a time series into a prediction dataset
    
    Args:
        dataset: A numpy array of time series, first dimension is the time steps
        n: Size of window for prediction
    """
    X, y = [], [] 
    for i in range(len(dataset)-n):
        feature = dataset[i:i+n, :] # (n, feature_size)
        target = dataset[i+n, -1] # scalar (of next value)
        X.append(feature)
        y.append(target)
    return torch.tensor(X, dtype=torch.float32), torch.tensor(y, dtype=torch.float32).reshape(-1, 1)

# train_df_torch = torch.from_numpy(train_df.to_numpy()).type(torch.float32)
# test_df_torch = torch.from_numpy(test_df.to_numpy()).type(torch.float32)
X_train, y_train = create_dataset(train_df.to_numpy(), 4)
X_test, y_test = create_dataset(test_df.to_numpy(), 4)
X_train.shape, y_train.shape, X_test.shape, y_test.shape
# X_train, y_train = train_df_torch[:, :-1], train_df_torch[:, -1].reshape(-1, 1)
# X_test, y_test= test_df_torch[:, :-1], test_df_torch[:, -1].reshape(-1, 1)


(torch.Size([5482, 4, 8]),
 torch.Size([5482, 1]),
 torch.Size([96, 4, 8]),
 torch.Size([96, 1]))

In [179]:
class TimeSeriesLSTM(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, dropout_rate):
        super().__init__()
        
        self.hidden_size = hidden_size
        self.input_size = input_size
        self.num_layers = num_layers

        # Define the LSTM layer
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True, dropout=dropout_rate)
        self.fc = nn.Linear(hidden_size, 1)
        
    
    def forward(self, x):
        # assume x is (N,L,input_size)
        out, _ = self.lstm(x)
        # print("out shape:", out.shape)
        return self.fc(out[:, -1, :])

In [180]:
class TimeSeriesANN(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size, dropout_rate):
        super().__init__()
        
        self.hidden_size = hidden_size
        self.num_layers = num_layers

        # Define the LSTM layer
        # self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        
        # Define the fully connected layer
        self.input = nn.Sequential(
            nn.Linear(input_size, 256),
            nn.Sigmoid(),
        )
        self.mlp_block1 = nn.Sequential(
            nn.Linear(256, 256),
            nn.Tanh(),
            nn.Dropout(dropout_rate),
            nn.Linear(256, 512),
            nn.ELU(alpha=0.5),
            nn.Linear(512, 512),
            nn.LeakyReLU(negative_slope = 0.15),
        )

      
        self.output_lyr = nn.Linear(512, output_size)
        
    
    def forward(self, x):
        a0 = self.input(x)
        a1 = self.mlp_block1(a0)
        a2 = self.output_lyr(a1)
        return a2


In [181]:
def initialize_weights(m):
    if isinstance(m, nn.Linear):
        nn.init.xavier_normal_(m.weight)
        nn.init.zeros_(m.bias)
    elif isinstance(m, nn.LSTM):
        for name, param in m.named_parameters():
            if "bias" in name:
                nn.init.zeros_(param)
            elif "weight" in name:
                nn.init.orthogonal_(param)

In [184]:
model = TimeSeriesLSTM(
    input_size=8,
    hidden_size=40,
    num_layers=5,
    dropout_rate=0.1
)

# model.apply(initialize_weights)
loss_fn = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.0005)
# scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[16, 50, 80], gamma=0.0001)
# loader = load_data("data/processed/mastercard_stock_history_processed.csv", batch_size=8)
loader = DataLoader(TensorDataset(X_train, y_train), shuffle = False, batch_size = 8)
train_rmse_lst = []
test_rmse_lst = []
 
n_epochs = 100
for epoch in range(n_epochs):
    model.train()
    for X_batch, y_batch in loader:
        y_pred = model(X_batch)
        loss = loss_fn(y_pred, y_batch)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    # scheduler.step()
    # Validation
    if epoch % 1 != 0:
        continue
    model.eval()
    with torch.no_grad():
        y_pred = model(X_train)
        train_rmse = np.sqrt(loss_fn(y_pred, y_train))
        # train_rmse = loss_fn(y_pred, y_train)
        y_pred = model(X_test)
        test_rmse = np.sqrt(loss_fn(y_pred, y_test))

        train_rmse_lst.append(train_rmse)
        test_rmse_lst.append(test_rmse)
        # test_rmse = loss_fn(y_pred, y_test)
    print("Epoch %d: train RMSE %.4f, test RMSE %.4f" % (epoch, train_rmse, test_rmse))

Epoch 0: train RMSE 124.6372, test RMSE 333.2244
Epoch 1: train RMSE 119.7145, test RMSE 324.4249
Epoch 2: train RMSE 115.8077, test RMSE 316.7710
Epoch 3: train RMSE 112.6246, test RMSE 309.8889
Epoch 4: train RMSE 110.0356, test RMSE 303.6576
Epoch 5: train RMSE 107.7444, test RMSE 297.4245
Epoch 6: train RMSE 106.1161, test RMSE 292.3638
Epoch 7: train RMSE 104.5217, test RMSE 286.5466
Epoch 8: train RMSE 103.6135, test RMSE 282.6058
Epoch 9: train RMSE 102.9297, test RMSE 279.1304
Epoch 10: train RMSE 102.4224, test RMSE 276.0694
Epoch 11: train RMSE 102.0486, test RMSE 273.3635
Epoch 12: train RMSE 101.6046, test RMSE 269.1047
Epoch 13: train RMSE 101.3110, test RMSE 264.2186
Epoch 14: train RMSE 94.8217, test RMSE 258.6304
Epoch 15: train RMSE 87.6555, test RMSE 252.5172
Epoch 16: train RMSE 88.7415, test RMSE 246.3384
Epoch 17: train RMSE 97.0931, test RMSE 240.2852
Epoch 18: train RMSE 94.7422, test RMSE 234.2229
Epoch 19: train RMSE 85.7767, test RMSE 228.2627
Epoch 20: train 

In [121]:
plt.plot(train_rmse_lst, color = "blue")
plt.plot(test_rmse_lst, color = "orange")
plt.show()

# To experiment with some deep learning models on the time series data given