In [2]:
import argparse
import os, sys
import time
import datetime
from tqdm import tqdm_notebook as tqdm

import torch
import torch.nn as nn
import torch.nn.functional as F

In [3]:
class Softloss(nn.Module):
    def __init__(self,T=4,loss_portion=[1,0,0]) -> None:
        '''
        T: temperature
        loss_portion: KLD, cosine, mse
        '''
        super(Softloss,self).__init__()
        self.T=T
        self.portion=loss_portion
    def forward(self,x,y):
        soft_x=F.log_softmax(x/self.T,dim=-1)
        soft_y=F.softmax(y/self.T,dim=-1)
        loss=self.portion[0]*F.kl_div(soft_x,soft_y,reduction="batchmean")\
            +self.portion[1]*F.cosine_embedding_loss(soft_x,soft_y,torch.ones(soft_x.shape[0]).to(soft_x.device))\
            +self.portion[2]*F.mse_loss(soft_x,soft_y)
        return loss*self.T*self.T


In [4]:
# useful libraries
import torchvision
import torchvision.transforms as transforms

#############################################
# your code here
# specify preprocessing function
transform = transforms.Compose(
    (
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
    )
)
transform_train = transforms.Compose(
    (
    
    transforms.RandomCrop((32,32),padding=4),
    transforms.RandomHorizontalFlip(),
    #transforms.RandomVerticalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
    
    #
    #transforms.ColorJitter(0.2,0,0)
    
    )
)

transform_val = transform
#############################################
# do NOT change these
from torchvision.datasets import CIFAR10
from torch.utils.data import DataLoader

# a few arguments, do NOT change these
DATA_ROOT = "./data"
TRAIN_BATCH_SIZE = 128
VAL_BATCH_SIZE = 100

#############################################
# your code here
# construct dataset
train_set = CIFAR10(
    root=DATA_ROOT, 
    train=True, 
    download=True,
    transform=transform_train    # your code
)

val_set = CIFAR10(
    root=DATA_ROOT, 
    train=False, 
    download=True,
    transform=transform_val    # your code
)

# construct dataloader
train_loader = DataLoader(
    train_set, 
    batch_size=TRAIN_BATCH_SIZE,  # your code
    shuffle=True,     # your code
    num_workers=2
)

val_loader = DataLoader(
    val_set, 
    batch_size=VAL_BATCH_SIZE,  # your code
    shuffle=False,     # your code
    num_workers=2
)
#############################################

Files already downloaded and verified
Files already downloaded and verified


In [5]:
import torch.nn as nn
import torch.optim as optim



In [6]:
import torchvision.models

def train_res50(train_loader,val_loader,INITIAL_LR = 0.1,REG = 0.0006,DECAY_EPOCHS=[70,140], DECAY=0.1,EPOCHS=200):
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    student=torchvision.models.resnet50(num_classes=10)
    student=student.to(device)
    model=student
    

    # some hyperparameters
    # total number of training epochs

    # hyperparameters, do NOT change right now
    # initial learning rate
    

    # momentum for optimizer
    MOMENTUM = 0.9

    # L2 regularization strength
    
    criterion = nn.CrossEntropyLoss()

    # Add optimizer
    optimizer = optim.SGD(student.parameters(),weight_decay=REG,lr=INITIAL_LR,momentum=MOMENTUM,nesterov=True)

    # the folder where the trained model is saved
    CHECKPOINT_FOLDER = "./tmp_model"
    
   
    # start the training/validation process
    # the process should take about 5 minutes on a GTX 1070-Ti
    # if the code is written efficiently.
    best_val_acc = 0
    current_learning_rate = INITIAL_LR
    
    print("==> Training starts!")
    print("="*50)
    for i in range(0, EPOCHS):
        # handle the learning rate scheduler.
        
        if i in DECAY_EPOCHS:
            current_learning_rate = current_learning_rate * DECAY
        
            for param_group in optimizer.param_groups:
                param_group['lr'] = current_learning_rate
            #print("Current learning rate has decayed to %f" %current_learning_rate)
        
        #######################
        # your code here
        # switch to train mode
        model.train()
        
        #######################
        
        print("Epoch %d:" %i)
        # this help you compute the training accuracy
        total_examples = 0
        correct_examples = 0

        train_loss = 0 # track training loss if you want
        loader=train_loader
        
        # Train the model for 1 epoch.
        for batch_idx, (inputs, targets) in enumerate(loader):
            ####################################
            # your code here
            # copy inputs to device
            inputs=inputs.to(device)
            targets=targets.to(device).long()
            # compute the output and loss
            out=model(inputs)
            loss=criterion(out,targets)
            
            # zero the gradient
            
            optimizer.zero_grad()
            # backpropagation
            loss.backward()

            
            # apply gradient and update the weights
            optimizer.step()
            train_loss+=loss.item()
            
            # count the number of correctly predicted samples in the current batch
            correct_examples+=torch.sum(out.argmax(-1)==targets).item()
            ####################################
        total_examples=len(train_loader.dataset)      
        avg_loss = train_loss / len(train_loader)
        avg_acc = correct_examples / total_examples
        print("Training loss: %.4f, Training accuracy: %.4f" %(avg_loss, avg_acc))

        # Validate on the validation dataset
        #######################
        # your code here
        # switch to eval mode
        model.eval()
        
        #######################

        # this help you compute the validation accuracy
        total_examples = 0
        correct_examples = 0
        
        val_loss = 0 # again, track the validation loss if you want

        # disable gradient during validation, which can save GPU memory
        with torch.no_grad():
            for batch_idx, (inputs, targets) in enumerate(val_loader):
                ####################################
                # your code here
                # copy inputs to device
                inputs=inputs.to(device)
                targets=targets.to(device).long()
                # compute the output and loss
                out=model(inputs)
                loss=criterion(out,targets)
                # count the number of correctly predicted samples in the current batch
                val_loss+=loss.item()
                correct_examples+=torch.sum(out.argmax(-1)==targets).item()
                
                ####################################
        total_examples=len(val_loader.dataset)
        avg_loss = val_loss / len(val_loader)
        avg_acc = correct_examples / total_examples
        print("Validation loss: %.4f, Validation accuracy: %.4f" % (avg_loss, avg_acc))
        
        # save the model checkpoint
        if avg_acc > best_val_acc:
            best_val_acc = avg_acc
            if not os.path.exists(CHECKPOINT_FOLDER):
                os.makedirs(CHECKPOINT_FOLDER)
            print("Saving ...")
            state = {'state_dict': model.state_dict(),
                    'epoch': i,
                    }
            torch.save(state, os.path.join(CHECKPOINT_FOLDER, "res50"+str(avg_acc)+'.pth'))
            

    print("="*50)
    print(f"==> Optimization finished! Best validation accuracy: {best_val_acc:.4f}")
    return best_val_acc


In [10]:
train_res50(train_loader,val_loader,INITIAL_LR = 0.1,REG = 0.006,DECAY_EPOCHS=[60,120,180], DECAY=0.1,EPOCHS=200)

==> Training starts!
Epoch 0:
Training loss: 3.3901, Training accuracy: 0.1704
Validation loss: 2.0135, Validation accuracy: 0.2316
Saving ...
Epoch 1:
Training loss: 1.7112, Training accuracy: 0.3553
Validation loss: 1.8202, Validation accuracy: 0.3739
Saving ...
Epoch 2:
Training loss: 1.6265, Training accuracy: 0.3995
Validation loss: 1.7643, Validation accuracy: 0.3522
Epoch 3:
Training loss: 1.5870, Training accuracy: 0.4148
Validation loss: 1.6090, Validation accuracy: 0.4210
Saving ...
Epoch 4:
Training loss: 1.5616, Training accuracy: 0.4289
Validation loss: 1.8407, Validation accuracy: 0.3502
Epoch 5:
Training loss: 1.5455, Training accuracy: 0.4374
Validation loss: 1.6466, Validation accuracy: 0.4002
Epoch 6:
Training loss: 1.5251, Training accuracy: 0.4489
Validation loss: 2.1092, Validation accuracy: 0.3114
Epoch 7:
Training loss: 1.5193, Training accuracy: 0.4482
Validation loss: 1.9360, Validation accuracy: 0.3347
Epoch 8:
Training loss: 1.5059, Training accuracy: 0.4565


0.8264

In [None]:
train_res50(train_loader,val_loader,INITIAL_LR = 0.1,REG = 4e-4,DECAY_EPOCHS=[40,80,160], DECAY=0.1,EPOCHS=200)

==> Training starts!
Epoch 0:
Training loss: 5.3251, Training accuracy: 0.1230
Validation loss: 2.3692, Validation accuracy: 0.1009
Saving ...
Epoch 1:
Training loss: 2.0562, Training accuracy: 0.2116
Validation loss: 2.0630, Validation accuracy: 0.2581
Saving ...
Epoch 2:
Training loss: 1.8195, Training accuracy: 0.3111
Validation loss: 1.7196, Validation accuracy: 0.3611
Saving ...
Epoch 3:
Training loss: 1.6759, Training accuracy: 0.3715
Validation loss: 1.5940, Validation accuracy: 0.4056
Saving ...
Epoch 4:
Training loss: 1.5640, Training accuracy: 0.4199
Validation loss: 1.4656, Validation accuracy: 0.4695
Saving ...
Epoch 5:
Training loss: 1.4678, Training accuracy: 0.4612
Validation loss: 1.4877, Validation accuracy: 0.4754
Saving ...
Epoch 6:
Training loss: 1.3831, Training accuracy: 0.4932
Validation loss: 1.4731, Validation accuracy: 0.4892
Saving ...
Epoch 7:
Training loss: 1.2886, Training accuracy: 0.5359
Validation loss: 1.2471, Validation accuracy: 0.5529
Saving ...
Epo