In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
import numpy as np
import torchvision

from torchvision import datasets, models, transforms

import matplotlib.pyplot as plt
import time
import os
import copy



In [1]:
"""
METHODS 

    - transforms.Compose([transforamtions...])  from torchvision
    - transforms.RandomResizeCrop()
                .RendomHorizontalFlip(),
                .ToTensor()
                .Normalize(mean, std)

    - datasets.ImageFolder(root:str, transform:Transforms.Compose)  from torchvision
    - ImageFolder.classes : returns the classes 
    
    - dataLoader(:ImageFolder, batchsize:int, shuffle: bool, numworkers:int)  from torch.utils.data 

    - model.train()
        will set the model to taining mod, it applies batch normalization layers, dropout layers
        also gradients are computed and stored during the forward pass for backpropagation
        we call it before we start the training loop 

    - model.eval() 
        sets the model to evaluation mode, gradients are not computed or stored during forward pass 
        typically we call it when evaluating the model on a validation set or when making predictions on unseen data.

    - with torch.set_grad_enabled(expression:bool): 
        * If expression is True, gradient tracking is enabled within the block, 
        and PyTorch will track gradients for all tensor operations.
        * If expression is False, gradient tracking is disabled within the block, 
        and PyTorch will not track gradients for tensor operations, even if the tensors involved have requires_grad=True

    - these two are the same : 
        with torch.no_grad():
            # block of code

        with torch.set_grad_enabled(False):
            # block of code

    - torch.max() given a  tensor returns a scalar whatever the shape, 
      torch.max(tensor, dim) : will operate on varier les indices de cette dimension seulement et fixer les atures et prendre le max 
NOTES: 
    - DataLoader() : takes the images folder and all with transformations, meaning when an image is loadded
        it will actually be applied to that list of transformations, it's not a data augmentation technique

    - In training we multiply the loss with the number of samples in the batch 
        loss.item() * inputs.size(0), because the loss is over the whole dataset
        so at the end we just divide by the total number of the dataset

    - setting the params of model.parameters to required_grad = True
    so that we freeze them 
"""
print()




In [None]:
mean = np.array([0.5, 0.5, 0.5])
std = np.array([0.25, 0.25, 0.25])

data_transforms = {
    'train': transforms.Compose([
        transforms.RandomResizedCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize(mean, std)
    ]),
    'val': transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize(mean, std)
    ]),
}

data_dir = 'data/hymenoptera_data'

# Image Folder documentation 
# A generic data loader where the images are arranged in this way by default 
# Image folder is to have your file structure like this : 
# root/
# root/train/
#           /class1
#           /classe2 
#            /..etc
# root/val/
#           /class1
#           /classe2 
#            /..etc

# takes as argumtes : path , and transformations callbacks

# imageFolder c'est comme un dataset
image_datasets = {
    x: datasets.ImageFolder( root= os.path.join(data_dir, x), transform = data_transforms[x])
    for x in ['train', 'val']
}


# DataLoader : takes a dataset, batch, shuffle, workers
dataloaders = {
    x: torch.utils.data.DataLoader(image_datasets[x], batch_size=4,shuffle=True, num_workers=0)
    for x in ['train', 'val']
}


dataset_sizes = {
    x: len(image_datasets[x]) 
    for x in ['train', 'val']
}


# IMage folder can return the classes
class_names = image_datasets['train'].classes


device = torch.device(
    "cuda:0" if torch.cuda.is_available() else "cpu"
)
print(class_names)

# for plotting the images 
def imshow(inp, title):
    """Imshow for Tensor."""
    inp = inp.numpy().transpose((1, 2, 0))
    inp = std * inp + mean
    inp = np.clip(inp, 0, 1)
    plt.imshow(inp)
    plt.title(title)
    plt.show()


# Get a batch of training data
inputs, classes = next(iter(dataloaders['train']))

# Make a grid from batch
out = torchvision.utils.make_grid(inputs)

imshow(out, title=[class_names[x] for x in classes])

# initialize the optimizer with the model parameters 
def train_model(model, criterion, optimizer, scheduler, num_epochs=25):
    since = time.time()

    # model.state_dict()
    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)


        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:

            if phase == 'train':
                model.train()  # Set model to training mode, I guess dropout and regularization
            else:
                model.eval()   # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0

            # Iterate over data.
            for inputs, labels in dataloaders[phase]:

                inputs = inputs.to(device)
                labels = labels.to(device)

                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train'): # this can be even true or false, if it's true then we will track otherwise none, but the code inside is executed both ways
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)
                    
                    # backward + optimize only if in training phase
                    if phase == 'train':
                        optimizer.zero_grad()
                        loss.backward()
                        optimizer.step()

                    

                # statistics
                # .item() only works with tensors with one element init
                running_loss += loss.item() * inputs.size(0) # we need to multiply it by batch_size 
                running_corrects += torch.sum(preds == labels.data)
                # In PyTorch, .data is not a method, but rather an attribute of tensor objects. 
                # It provides access to the underlying tensor data without any gradient 
                # tracking or computation graph information attached to it

            if phase == 'train':
                scheduler.step()

            epoch_loss = running_loss / dataset_sizes[phase] # now here we will divide it back 
            epoch_acc = running_corrects.double() / dataset_sizes[phase]

            print('{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss, epoch_acc))

            # deep copy the model
            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())

        print()

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))

    # load best model weights
    model.load_state_dict(best_model_wts)
    
    return model

In [None]:

#### Finetuning the convnet ####
# Load a pretrained model and reset final fully connected layer.

model = models.resnet18(pretrained=True)
num_ftrs = model.fc.in_features


# Here the size of each output sample is set to 2.
# Alternatively, it can be generalized to nn.Linear(num_ftrs, len(class_names)).
model.fc = nn.Linear(num_ftrs, 2)
model = model.to(device)
criterion = nn.CrossEntropyLoss()
# Observe that all parameters are being optimized
optimizer = optim.SGD(model.parameters(), lr=0.001)



# StepLR Decays the learning rate of each parameter group by gamma every step_size epochs
# Decay LR by a factor of 0.1 every 7 epochs
# Learning rate scheduling should be applied after optimizerâ€™s update
# e.g., you should write your code this way:
# for epoch in range(100):
#     train(...)
#     validate(...)
#     scheduler.step()

step_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)
model = train_model(model, criterion, optimizer, step_lr_scheduler, num_epochs=25)


#### ConvNet as fixed feature extractor ####
# Here, we need to freeze all the network except the final layer.
# We need to set requires_grad == False to freeze the parameters so that the gradients are not computed in backward()
model_conv = torchvision.models.resnet18(pretrained=True)
for param in model_conv.parameters():
    param.requires_grad = False

# Parameters of newly constructed modules have requires_grad=True by default
num_ftrs = model_conv.fc.in_features
model_conv.fc = nn.Linear(num_ftrs, 2) # this changes the last layer, and reaplace it with a fully connected layer with only 2 classes output

model_conv = model_conv.to(device)

criterion = nn.CrossEntropyLoss()

# Observe that only parameters of final layer are being optimized as
# opposed to before.
optimizer_conv = optim.SGD(model_conv.fc.parameters(), lr=0.001, momentum=0.9)

# Decay LR by a factor of 0.1 every 7 epochs
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_conv, step_size=7, gamma=0.1)

model_conv = train_model(model_conv, criterion, optimizer_conv,
                         exp_lr_scheduler, num_epochs=25)

In [None]:
# In tensorflow I remember you can use 
