In [6]:
import torch_babysteps as bstep
import torchvision.transforms as transforms
from torchvision import datasets
from torch import nn
import torch
import torch.nn.functional as F
import time

In [16]:
BATCH_SIZE = 8092
N_EPOCHS = 20
LR = 1e-4
WD = 1e-5

In [28]:
# load data 
transform = transforms.Compose(
    [transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))])

data_transforms = {

    'train': transforms.Compose([
    transforms.RandomResizedCrop(224),
    transforms.RandomHorizontalFlip(),
    # add random rotation
    transforms.ToTensor(),
    transforms.Normalize( mean = [0.485, 0.456, 0.406], 
                         std = [0.229, 0.224, 0.225])

]),
    'val': transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize( mean = [0.485, 0.456, 0.406], 
                         std = [0.229, 0.224, 0.225])
    ]),

}

# Create datasets for training & validation, download if necessary
training_set = datasets.FashionMNIST('./data', transform=transform, train=True, download=True) #FashionMNIST('./data', train=True, transform=transform, download=True)
validation_set = datasets.FashionMNIST('./data', train=False, transform=transform, download=True) #FashionMNIST(

# Create data loaders for our datasets; shuffle for training, not for validation
training_loader = torch.utils.data.DataLoader(training_set, batch_size=BATCH_SIZE, shuffle=True)
validation_loader = torch.utils.data.DataLoader(validation_set, batch_size=BATCH_SIZE, shuffle=False)

# Class labels
classes = ('T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat',
        'Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle Boot')

# Report split sizes
print('Training set has {} instances'.format(len(training_set)))
print('Validation set has {} instances'.format(len(validation_set)))

Training set has 60000 instances
Validation set has 10000 instances


In [29]:
# Optimizers specified in the torch.optim packagemodel = tbstep.FashionMNISTClassifier()
model = bstep.FashionMNISTClassifier()
training_set = datasets.FashionMNIST('./data', train=True, transform=transform, download=True)
validation_set = datasets.FashionMNIST('./data', train=False, transform=transform, download=True)
training_loader = torch.utils.data.DataLoader(training_set, batch_size=BATCH_SIZE, shuffle=True)
validation_loader = torch.utils.data.DataLoader(validation_set, batch_size=BATCH_SIZE, shuffle=False)
loss_function = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.AdamW(model.parameters(), lr=LR, weight_decay = WD)
device = bstep.get_gpu()

CUDA is available. Using GPU: NVIDIA RTX 2000 Ada Generation Laptop GPU


In [30]:
bstep.train_classifier(model = model, 
            train_ds = training_loader,
            loss_function =  loss_function, 
            optimizer = optimizer, 
            device = device,
)

100%|██████████| 8/8 [00:09<00:00,  1.14s/it]


---------- Epoch 1 Finished ---------- Average Loss: 2.3034


100%|██████████| 8/8 [00:08<00:00,  1.12s/it]


---------- Epoch 2 Finished ---------- Average Loss: 2.2971


100%|██████████| 8/8 [00:08<00:00,  1.02s/it]


---------- Epoch 3 Finished ---------- Average Loss: 2.2897


100%|██████████| 8/8 [00:08<00:00,  1.06s/it]


---------- Epoch 4 Finished ---------- Average Loss: 2.2806


100%|██████████| 8/8 [00:08<00:00,  1.12s/it]

---------- Epoch 5 Finished ---------- Average Loss: 2.2683
----Training Finished-----
Total Training Time: 43.65 seconds





Unnamed: 0,epoch,loss,training_time
0,0,2.303388,9.09804
1,1,2.29711,9.000985
2,2,2.289659,8.135369
3,3,2.280595,8.464772
4,4,2.268314,8.945993


In [33]:
class WarmupCosineAnnealingLR():
    def __init__(self, optimizer, warmup_steps, max_lr, decay_steps, final_lr = 0, last_epoch = -1):
        self.warmup_steps = warmup_steps
        self.max_lr = max_lr
        self.decay_steps = decay_steps
        self.final_lr = final_lr
        super().__init__(optimizer, last_epoch)

    def get_lr(self):
        if self.last_epoch < self.warmup_steps: 
            return [self.initial_lr + (self.max_lr - self.initial_lr)*(self.last_epoch +1)/ self.warmup_steps for base_lr in self.base_lrs]
        
        else:
            progress = (self.last_epoch - self.warmup_steps)
            cosine_val = 0.5* (1 + torch.cos( torch.pi*  progress))
            return [self.final_lr + (self.max_lr - self.final_lr)* cosine_val for base_lr in self.base_lrs]
        
total_steps = 100
warmup_epochs = 5
plateau_epochs = 10
total_epochs = 30
decay_epochs = total_steps - warmup_epochs - plateau_epochs
warmup_steps = total_steps * warmup_epochs/total_epochs #epochs = 
decay_steps = total_steps - warmup_steps


lr_scheduler = WarmupCosineAnnealingLR(
    torch.optim.AdamW(model.parameters(), lr= 1e-4, weight_decay = 1e-5),
    warmup_steps = warmup_steps, 
    max_lr = 1e-1,
    decay_steps = decay_steps,
    final_lr = 5e-4
)



TypeError: object.__init__() takes exactly one argument (the instance to initialize)