In [10]:
import os
import torch
from torch import nn, optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

In [4]:
batch_size = 100
training_data = datasets.FashionMNIST(root="../fashion_mnist", train=True, transform=transforms.ToTensor())
test_data = datasets.FashionMNIST(root="../fashion_mnist", train=False, transform=transforms.ToTensor())

train_dataloader = DataLoader(training_data, batch_size=batch_size)
test_dataloader = DataLoader(test_data, batch_size=batch_size)

In [6]:
#define hyperparameters
sequence_len = 28
input_len = 28
hidden_size = 128
num_layers = 2
num_classes = 10
num_epochs = 5
learning_rate = 0.01

In [15]:
class LSTM(nn.Module):
    def __init__(self, input_len, hidden_size, num_classes, num_layers):
        super(LSTM, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_len, hidden_size, num_layers, batch_first=True)
        self.output_layer = nn.Linear(hidden_size, num_classes)

    def forward(self, X):
        hidden_states = torch.zeros(self.num_layers, X.size(0), self.hidden_size)
        cell_states = torch.zeros(self.num_layers, X.size(0), self.hidden_size)
        out, _ = self.lstm(X, (hidden_states, cell_states))
        out = self.output_layer(out[:, -1, :])
        return out


In [16]:
model = LSTM(input_len, hidden_size, num_classes, num_layers)
model

LSTM(
  (lstm): LSTM(28, 128, num_layers=2, batch_first=True)
  (output_layer): Linear(in_features=128, out_features=10, bias=True)
)

In [21]:
loss_function = nn.CrossEntropyLoss()
# optimizer = optim.SGD(model.parameters(), lr=learning_rate)
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

In [22]:
def train(num_epochs, model, train_dataloader, loss_function):
    total_steps = len(train_dataloader)
    for epoch in range(num_epochs):
        for batch, [images, labels] in enumerate(train_dataloader):
            images = images.reshape(-1, sequence_len, input_len)

            output = model(images)
            loss = loss_function(output, labels)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            if(batch+1)%100 == 0:
                print(f"Epoch {epoch+1}; Batch {batch+1} / {total_steps}; Loss: {loss.item():>4f}")

In [23]:
train(num_epochs, model, train_dataloader, loss_function)

Epoch 1; Batch 100 / 600; Loss: 1.069049
Epoch 1; Batch 200 / 600; Loss: 0.698185
Epoch 1; Batch 300 / 600; Loss: 0.486172
Epoch 1; Batch 400 / 600; Loss: 0.500845
Epoch 1; Batch 500 / 600; Loss: 0.619615
Epoch 1; Batch 600 / 600; Loss: 0.328353
Epoch 2; Batch 100 / 600; Loss: 0.410029
Epoch 2; Batch 200 / 600; Loss: 0.330370
Epoch 2; Batch 300 / 600; Loss: 0.317504
Epoch 2; Batch 400 / 600; Loss: 0.340645
Epoch 2; Batch 500 / 600; Loss: 0.478204
Epoch 2; Batch 600 / 600; Loss: 0.281845
Epoch 3; Batch 100 / 600; Loss: 0.316833
Epoch 3; Batch 200 / 600; Loss: 0.287839
Epoch 3; Batch 300 / 600; Loss: 0.289350
Epoch 3; Batch 400 / 600; Loss: 0.338230
Epoch 3; Batch 500 / 600; Loss: 0.489929
Epoch 3; Batch 600 / 600; Loss: 0.251440
Epoch 4; Batch 100 / 600; Loss: 0.315510
Epoch 4; Batch 200 / 600; Loss: 0.333396
Epoch 4; Batch 300 / 600; Loss: 0.231799
Epoch 4; Batch 400 / 600; Loss: 0.275805
Epoch 4; Batch 500 / 600; Loss: 0.399932
Epoch 4; Batch 600 / 600; Loss: 0.190961
Epoch 5; Batch 1

The loss using SGD optimizer was 1.7

The loss using ADAM optimizer is 0.18