In [1]:
import pandas as pd
import torch.utils.data as data
import torch
from torch.utils.data import DataLoader
import torch.nn as nn
import numpy as np

In [2]:
df_sales_train_v = pd.read_csv("sales_train_validation.csv")
df_sales_train_v

Unnamed: 0,id,item_id,dept_id,cat_id,store_id,state_id,d_1,d_2,d_3,d_4,...,d_1904,d_1905,d_1906,d_1907,d_1908,d_1909,d_1910,d_1911,d_1912,d_1913
0,HOBBIES_1_001_CA_1_validation,HOBBIES_1_001,HOBBIES_1,HOBBIES,CA_1,CA,0,0,0,0,...,1,3,0,1,1,1,3,0,1,1
1,HOBBIES_1_002_CA_1_validation,HOBBIES_1_002,HOBBIES_1,HOBBIES,CA_1,CA,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0
2,HOBBIES_1_003_CA_1_validation,HOBBIES_1_003,HOBBIES_1,HOBBIES,CA_1,CA,0,0,0,0,...,2,1,2,1,1,1,0,1,1,1
3,HOBBIES_1_004_CA_1_validation,HOBBIES_1_004,HOBBIES_1,HOBBIES,CA_1,CA,0,0,0,0,...,1,0,5,4,1,0,1,3,7,2
4,HOBBIES_1_005_CA_1_validation,HOBBIES_1_005,HOBBIES_1,HOBBIES,CA_1,CA,0,0,0,0,...,2,1,1,0,1,1,2,2,2,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
30485,FOODS_3_823_WI_3_validation,FOODS_3_823,FOODS_3,FOODS,WI_3,WI,0,0,2,2,...,2,0,0,0,0,0,1,0,0,1
30486,FOODS_3_824_WI_3_validation,FOODS_3_824,FOODS_3,FOODS,WI_3,WI,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
30487,FOODS_3_825_WI_3_validation,FOODS_3_825,FOODS_3,FOODS,WI_3,WI,0,6,0,2,...,2,1,0,2,0,1,0,0,1,0
30488,FOODS_3_826_WI_3_validation,FOODS_3_826,FOODS_3,FOODS,WI_3,WI,0,0,0,0,...,0,0,1,0,0,1,0,3,1,3


In [3]:
df_sales_ca1 = df_sales_train_v.loc[df_sales_train_v['store_id'] == 'CA_1']
df_sales_ca1 = df_sales_ca1.loc[df_sales_ca1['item_id'] == 'HOBBIES_1_001']
df_sales_ca1

Unnamed: 0,id,item_id,dept_id,cat_id,store_id,state_id,d_1,d_2,d_3,d_4,...,d_1904,d_1905,d_1906,d_1907,d_1908,d_1909,d_1910,d_1911,d_1912,d_1913
0,HOBBIES_1_001_CA_1_validation,HOBBIES_1_001,HOBBIES_1,HOBBIES,CA_1,CA,0,0,0,0,...,1,3,0,1,1,1,3,0,1,1


In [4]:
df_sales_ca1_info = df_sales_ca1.iloc[:, 1:6]
df_sales_ca1_d = df_sales_ca1.iloc[:, 6:]

df_sales_ca1_d

Unnamed: 0,d_1,d_2,d_3,d_4,d_5,d_6,d_7,d_8,d_9,d_10,...,d_1904,d_1905,d_1906,d_1907,d_1908,d_1909,d_1910,d_1911,d_1912,d_1913
0,0,0,0,0,0,0,0,0,0,0,...,1,3,0,1,1,1,3,0,1,1


In [5]:
data_seq = []
data_targets = []
sequence_len = 28
for i in range(df_sales_ca1_d.shape[1] - sequence_len - 1):
    sequence = df_sales_ca1_d.iloc[:, i:i + sequence_len].to_numpy()
    data_seq.append((torch.from_numpy(sequence)).float())
    targets = df_sales_ca1_d.iloc[:, i + sequence_len + 1].to_numpy()
    data_targets.append((torch.from_numpy(targets)).float())

In [6]:
data = torch.stack(data_seq).float()
data_targets = torch.stack(data_targets).float()

train_split = int(len(data) * 0.7)
train_set = torch.utils.data.TensorDataset(data[:train_split].squeeze(), data_targets[:train_split])
train_loader = DataLoader(train_set, batch_size=32, drop_last=True)
test_data, test_targets = data[train_split:], data_targets[train_split:]
test_data = test_data.squeeze()

In [7]:
class LSTMRegressor(nn.Module):

    def __init__(self, input_size, hidden_size, num_layers, out_size):
        super().__init__()
        self.num_layers = num_layers
        self.hidden_size = hidden_size
        self.lstm = nn.LSTM(input_size=input_size, hidden_size=hidden_size, num_layers=num_layers)
        self.fc = nn.Linear(hidden_size, out_size)
        
    def init_hidden(self, batch_size):
        hidden = torch.zeros(self.num_layers, batch_size, self.hidden_size)
        state = torch.zeros(self.num_layers, batch_size, self.hidden_size)
        return hidden, state
    
    def forward(self, x, hidden):
        x = torch.transpose(x, 0, 1)
        all_outputs, hidden = self.lstm(x, hidden)
        out = all_outputs[-1] # We are interested only in the last output
        x = self.fc(out)
        return x, hidden
    
model = LSTMRegressor(1, 28, 2, 1)
model

LSTMRegressor(
  (lstm): LSTM(1, 28, num_layers=2)
  (fc): Linear(in_features=28, out_features=1, bias=True)
)

In [8]:
optimizer = torch.optim.Adam(model.parameters(), lr = 0.001)
loss_fun = nn.MSELoss()

# Training loop
for epoch in range(101):
    for x, targets in train_loader:
        x = x.unsqueeze(2)
        hidden, state = model.init_hidden(x.size(0))
        preds, last_hidden = model(x, (hidden,state))
        preds = preds.squeeze(1)
        optimizer.zero_grad() 
        loss = loss_fun(preds, targets)
        loss.backward()
        optimizer.step()
    if epoch % 10 == 0:
        print(f"Epoch: {epoch}, loss: {loss.item():.3}")

  return F.mse_loss(input, target, reduction=self.reduction)


Epoch: 0, loss: 1.03
Epoch: 10, loss: 0.803
Epoch: 20, loss: 0.801
Epoch: 30, loss: 0.799
Epoch: 40, loss: 0.799
Epoch: 50, loss: 0.798
Epoch: 60, loss: 0.798
Epoch: 70, loss: 0.797
Epoch: 80, loss: 0.797
Epoch: 90, loss: 0.796
Epoch: 100, loss: 0.796


In [9]:
with torch.no_grad():
    hidden, state = model.init_hidden(test_data.size(0))
    preds, _ = model(test_data.unsqueeze(2), (hidden, state))
print(f"Accuracy: {(torch.round(preds)==test_targets).sum().item()/len(test_targets):.3}")

Accuracy: 0.359


In [19]:
class LSTMRegressor(nn.Module):

    def __init__(self, input_size, hidden_size, num_layers, out_size, bidirectional = False):
        super().__init__()
        self.num_layers = num_layers
        self.hidden_size = hidden_size
        if bidirectional:
            self.bidirectional = 2
        else:
            self.bidirectional = 1
        self.lstm = nn.LSTM(input_size = input_size, hidden_size = hidden_size, num_layers = num_layers, bidirectional=bidirectional, dropout=0.4)
        self.fc = nn.Linear(hidden_size*28*self.bidirectional, out_size)
        
    def init_hidden(self, batch_size):
        hidden = torch.zeros(self.num_layers*self.bidirectional , batch_size, self.hidden_size)
        state = torch.zeros(self.num_layers*self.bidirectional , batch_size, self.hidden_size)
        return hidden, state
    
    def forward(self, x, hidden):
        x = torch.transpose(x,0,1)
        all_outputs, hidden = self.lstm(x, hidden)
        all_outputs = torch.transpose(all_outputs,0,1)
        out = torch.flatten(all_outputs,1)
        x = self.fc(out)
        return x, hidden
    
model = LSTMRegressor(1,28,2,1)
model

LSTMRegressor(
  (lstm): LSTM(1, 28, num_layers=2, dropout=0.4)
  (fc): Linear(in_features=784, out_features=1, bias=True)
)

In [20]:
optimizer = torch.optim.Adam(model.parameters(), lr = 0.001)
loss_fun = nn.MSELoss()

# Training loop
for epoch in range(51):
    for x, targets in train_loader:
        x = x.unsqueeze(2)
        hidden, state = model.init_hidden(x.size(0))
        preds, last_hidden = model(x, (hidden,state))
        preds = preds.squeeze(1)
        optimizer.zero_grad() 
        loss = loss_fun(preds, targets)
        loss.backward()
        optimizer.step()
    if epoch % 10 == 0:
        print(f"Epoch: {epoch}, loss: {loss.item():.3}")

Epoch: 0, loss: 0.802
Epoch: 10, loss: 0.798
Epoch: 20, loss: 0.801
Epoch: 30, loss: 0.806
Epoch: 40, loss: 0.808
Epoch: 50, loss: 0.804


In [21]:
with torch.no_grad():
    hidden, state = model.init_hidden(test_data.size(0))
    preds, _ = model(test_data.unsqueeze(2), (hidden, state))
print(f"Accuracy: {(torch.round(preds)==test_targets).sum().item()/len(test_targets):.3}")

Accuracy: 0.359


In [23]:
sorted(torch.round(preds), reverse=True)

[tensor([1.]),
 tensor([1.]),
 tensor([1.]),
 tensor([1.]),
 tensor([1.]),
 tensor([1.]),
 tensor([1.]),
 tensor([1.]),
 tensor([1.]),
 tensor([1.]),
 tensor([1.]),
 tensor([1.]),
 tensor([1.]),
 tensor([1.]),
 tensor([1.]),
 tensor([1.]),
 tensor([1.]),
 tensor([1.]),
 tensor([1.]),
 tensor([1.]),
 tensor([1.]),
 tensor([1.]),
 tensor([1.]),
 tensor([1.]),
 tensor([1.]),
 tensor([1.]),
 tensor([1.]),
 tensor([1.]),
 tensor([1.]),
 tensor([1.]),
 tensor([1.]),
 tensor([1.]),
 tensor([1.]),
 tensor([1.]),
 tensor([1.]),
 tensor([1.]),
 tensor([1.]),
 tensor([1.]),
 tensor([1.]),
 tensor([1.]),
 tensor([1.]),
 tensor([1.]),
 tensor([1.]),
 tensor([1.]),
 tensor([1.]),
 tensor([1.]),
 tensor([1.]),
 tensor([1.]),
 tensor([1.]),
 tensor([1.]),
 tensor([1.]),
 tensor([1.]),
 tensor([1.]),
 tensor([1.]),
 tensor([1.]),
 tensor([1.]),
 tensor([1.]),
 tensor([1.]),
 tensor([1.]),
 tensor([1.]),
 tensor([1.]),
 tensor([1.]),
 tensor([1.]),
 tensor([1.]),
 tensor([1.]),
 tensor([1.]),
 tensor([1