In [23]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt
import time
import os
import copy

In [24]:
device = torch.device("cuda" if torch.cuda.is_available() else 'cpu')

mean = np.array([0.485, 0.456, 0.406])
std = np.array([0.229, 0.224, 0.225])

data_transforms = {
    'train': transforms.Compose([
        transforms.RandomResizedCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize(mean, std)
    ]),
    'val': transforms.Compose([
        transforms.RandomResizedCrop(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize(mean, std)
    ])
}

In [25]:
# import data
data_dir = 'data/hymenoptera_data'
sets = ['train', 'val']
image_datasets = {x: datasets.ImageFolder(os.path.join(data_dir, x),
                                         data_transforms[x])
                 for x in sets}

dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=4,
                                             shuffle=True, num_workers=0)
              for x in sets}

dataset_sizes = {x: len(image_datasets[x]) for x in sets}
class_names = image_datasets['train'].classes
print(class_names)

['ants', 'bees']


In [33]:
def train_model(model, criterion, optimizer, scheduler, n_epochs=3):
    """
    1. train # optimizer.step()
    2. evaluate
    3. scheduler # scheduler.step()
    
    """
    since = time.time()
    
    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0
    
    for epoch in range(n_epochs):
        print(f"Epoch {epoch+1}/{n_epochs}")
        print('-'*10)
        
        # Each epoch has a training and a validation phase
        for phase in sets:
            if phase == 'train':
                model.train() # Set model to training mode
            else:
                model.eval() # Set model to evaluating mode
            
            running_loss = 0.0
            running_corrects = 0
            
            # Interate over data
            for inputs, labels in dataloaders[phase]:
                inputs = inputs.to(device)
                labels = labels.to(device)
                
                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, dim=1)
                    loss = criterion(outputs, labels)
                    
                    # backward
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()
                        optimizer.zero_grad()
                
                # statistics
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)
            
            if phase == 'train':
                scheduler.step()
            
            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects.double() / dataset_sizes[phase]
            
            print(f"{phase} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}")
            
            # deep copy the model
            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())
        
        print()
    
    time_elapsed = time.time() - since
    print(f"Training complete in {time_elapsed//60:.0f}m {time_elapsed%60:.0f}s")
    print(f"Best val Acc: {best_acc:.4f}")
    
    # load best model weights
    model.load_state_dict(best_model_wts)
    return(model)

In [34]:
# Import pre-trained model to utilize transfer learning

# Option 1
# Fine-tuning all the weights based on the data

model = models.resnet18(pretrained=True)
num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, 2) # Change the last layer's output ftrs
model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr = 0.001)

# lr schedular
# Every {step_size} epoch, the learning rate is multiplied by {gamma} (decay factor)
step_lr_scheduler = lr_scheduler.StepLR(optimizer,
                                        step_size=7,
                                        gamma=0.1)

model = train_model(model, criterion, optimizer, step_lr_scheduler, n_epochs = 3)

Epoch 1/3
----------
train Loss: 0.6584 Acc: 0.5984
val Loss: 0.4846 Acc: 0.8105

Epoch 2/3
----------
train Loss: 0.5219 Acc: 0.7500
val Loss: 0.4062 Acc: 0.8366

Epoch 3/3
----------
train Loss: 0.4545 Acc: 0.7910
val Loss: 0.3044 Acc: 0.9281

Training complete in 2m 43s
Best val Acc: 0.9281


In [35]:
# Option 2
# Freeze all the layers in the beginning and only train the very last layer
# Faster than the first option

model = models.resnet18(pretrained=True)
for param in model.parameters():
    param.requires_grad = False # Freeze all the layers in the beginning
    
num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, 2) # Set a new last layer, defaulting requires_grad = True
model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr = 0.001)

# lr schedular
# Every {step_size} epoch, the learning rate is multiplied by {gamma} (decay factor)
step_lr_scheduler = lr_scheduler.StepLR(optimizer,
                                        step_size=7,
                                        gamma=0.1)

model = train_model(model, criterion, optimizer, step_lr_scheduler, n_epochs = 3)

Epoch 1/3
----------
train Loss: 0.6543 Acc: 0.6270
val Loss: 0.5885 Acc: 0.7320

Epoch 2/3
----------
train Loss: 0.5730 Acc: 0.7172
val Loss: 0.5284 Acc: 0.7451

Epoch 3/3
----------
train Loss: 0.5629 Acc: 0.7295
val Loss: 0.4282 Acc: 0.7778

Training complete in 1m 14s
Best val Acc: 0.7778


Compare the two results above we can see that freezing all the parameters except those of the last layer can make the training much faster, but the accuracy increases much slower. The **torchvision.models** package provides lots of pre-trained models that are confirmed powerful in lots of papers in numerous tasks. Utilizing those models with transfer learning is a great idea to quickly put your idea into practice.