## Modified version of Training with Pytorch Example from pytorch.org
https://pytorch.org/tutorials/beginner/introyt/trainingyt.html

In [None]:
import torch
import torchvision
import torchvision.transforms as transforms

# # PyTorch TensorBoard support
from torch.utils.tensorboard import SummaryWriter
from datetime import datetime
import torch.nn as nn
import torch.nn.functional as F
import time
import tqdm
import pandas as pd

In [61]:
# example of tqdm loader:

from tqdm import tqdm
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

transform = transforms.ToTensor()
train_dataset = datasets.MNIST(root='./data', train=True, download=True, transform=transform)
train_dataloader = DataLoader(train_dataset, batch_size=8096)

for batch_idx, (data, target) in enumerate(tqdm(train_dataloader)):
    # Perform training operations
    time.sleep(0.01)

100%|██████████| 1875/1875 [00:27<00:00, 67.69it/s]


In [63]:
## example of loss function 
loss_fn = torch.nn.CrossEntropyLoss()

# NB: Loss functions expect data in batches, so we're creating batches of 4
# Represents the model's confidence in each of the 10 classes for a given input
dummy_outputs = torch.rand(4, 10)
# Represents the correct class among the 10 being tested
dummy_labels = torch.tensor([1, 5, 3, 7])

print(dummy_outputs)
print(dummy_labels)

loss = loss_fn(dummy_outputs, dummy_labels)
print('Total loss for this batch: {}'.format(loss.item()))

tensor([[0.0471, 0.3683, 0.0417, 0.6410, 0.1431, 0.7615, 0.3012, 0.5396, 0.5125,
         0.3239],
        [0.3157, 0.7032, 0.3115, 0.5444, 0.5529, 0.8769, 0.5162, 0.8574, 0.4591,
         0.7291],
        [0.3332, 0.8718, 0.3636, 0.9934, 0.8329, 0.9697, 0.8218, 0.1405, 0.2814,
         0.4694],
        [0.6791, 0.0784, 0.3677, 0.4327, 0.1969, 0.6881, 0.9821, 0.1743, 0.1682,
         0.6890]])
tensor([1, 5, 3, 7])
Total loss for this batch: 2.2345452308654785


In [None]:
def get_gpu():
    if torch.cuda.is_available():
        device = torch.device("cuda")
        print(f"CUDA is available. Using GPU: {torch.cuda.get_device_name(0)}")
    else:
        device = torch.device("cpu")
        print("CUDA not available . Using CPU.")

    return device

def train_with_data(model, 
                train_loader,
                loss_function,
                optimizer, 
                device,
                num_epochs = 5,
                **kwargs):
    start_time = time.time()
    model.to(device)
    model.train()
    epoch_loss = []
    epoch_time = []
    for epoch in range(num_epochs):
        epoch_start_time = time.time()
        running_loss = 0.0
        for i, (inputs, labels) in enumerate(tqdm(train_loader)):

            inputs = inputs.to(device)
            labels = labels.to(device)

            ##forward pass
            outputs = model(inputs)
            loss = loss_function(outputs, labels)

            ##backward pass
            optimizer.zero_grad() # clear previous gradients
            loss.backward()  # compute gradients
            optimizer.step() # Update weights

            running_loss += loss.item()
            # if (i+1)% 100 == 0:
                # print(f'Epoch [{epoch+1}/{num_epochs}], Batch [{i+1}/{len(train_loader)}], Loss: {loss.item():.4f}')
        avg_loss = running_loss/len(train_loader)
        epoch_loss += [avg_loss]
        epoch_end_time = time.time()
        epoch_time += [epoch_end_time - epoch_start_time]
        print(f'---------- Epoch {epoch+1} Finished ---------- Average Loss: {avg_loss:.4f}')

    end_time = time.time()
    print(f"----Training Finished-----")
    print(f"Total Training Time: {end_time - start_time:.2f} seconds")
    train_history = pd.DataFrame(
        {'epoch':range(num_epochs),
        'loss': epoch_loss,
        'training_time': epoch_time}
    )
    return train_history

train_config = dict()

In [71]:
BATCH_SIZE = 4096
N_EPOCHS = 20
LR = 1e-4
WD = 1e-5


In [72]:
# load data 
transform = transforms.Compose(
    [transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))])

# Create datasets for training & validation, download if necessary
training_set = torchvision.datasets.FashionMNIST('./data', train=True, transform=transform, download=True)
validation_set = torchvision.datasets.FashionMNIST('./data', train=False, transform=transform, download=True)

# Create data loaders for our datasets; shuffle for training, not for validation
training_loader = torch.utils.data.DataLoader(training_set, batch_size=BATCH_SIZE, shuffle=True)
validation_loader = torch.utils.data.DataLoader(validation_set, batch_size=BATCH_SIZE, shuffle=False)

# Class labels
classes = ('T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat',
        'Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle Boot')

# Report split sizes
print('Training set has {} instances'.format(len(training_set)))
print('Validation set has {} instances'.format(len(validation_set)))

Training set has 60000 instances
Validation set has 10000 instances


In [73]:

# PyTorch models inherit from torch.nn.Module
class GarmentClassifier(nn.Module):
    def __init__(self):
        super(GarmentClassifier, self).__init__()
        self.conv1 = nn.Conv2d(1, 6, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16 * 4 * 4, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 16 * 4 * 4)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x


In [74]:
# Optimizers specified in the torch.optim package
model = GarmentClassifier()
training_set = torchvision.datasets.FashionMNIST('./data', train=True, transform=transform, download=True)
validation_set = torchvision.datasets.FashionMNIST('./data', train=False, transform=transform, download=True)
training_loader = torch.utils.data.DataLoader(training_set, batch_size=BATCH_SIZE, shuffle=True)
validation_loader = torch.utils.data.DataLoader(validation_set, batch_size=BATCH_SIZE, shuffle=False)
train_loader = training_loader
loss_function = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.AdamW(model.parameters(), lr=LR, weight_decay = WD)
device = get_gpu()

CUDA is available. Using GPU: NVIDIA RTX 2000 Ada Generation Laptop GPU


In [75]:
train_with_data(model, 
            train_loader = train_loader,
            loss_function =  loss_function, 
            optimizer = optimizer, 
            device = device,
)

100%|██████████| 15/15 [00:08<00:00,  1.68it/s]


---------- Epoch 1 Finished ---------- Average Loss: 2.2984


100%|██████████| 15/15 [00:08<00:00,  1.69it/s]


---------- Epoch 2 Finished ---------- Average Loss: 2.2838


100%|██████████| 15/15 [00:08<00:00,  1.67it/s]


---------- Epoch 3 Finished ---------- Average Loss: 2.2602


100%|██████████| 15/15 [00:09<00:00,  1.61it/s]


---------- Epoch 4 Finished ---------- Average Loss: 2.2191


100%|██████████| 15/15 [00:08<00:00,  1.68it/s]

---------- Epoch 5 Finished ---------- Average Loss: 2.1469
----Training Finished-----
Total Training Time: 45.01 seconds





Unnamed: 0,epoch,loss,training_time
0,0,2.29839,8.915429
1,1,2.283805,8.903847
2,2,2.260237,8.972782
3,3,2.219064,9.302376
4,4,2.146907,8.915325
