In [1]:
import argparse
import os, sys
import time
import datetime
from tqdm import tqdm_notebook as tqdm

import torch
import torch.nn as nn
import torch.nn.functional as F

In [2]:
class Softloss(nn.Module):
    def __init__(self,T=4,loss_portion=[1,0,0]) -> None:
        '''
        T: temperature
        loss_portion: KLD, cosine, mse
        '''
        super(Softloss,self).__init__()
        self.T=T
        self.portion=loss_portion
    def forward(self,x,y):
        soft_x=F.log_softmax(x/self.T,dim=-1)
        soft_y=F.softmax(y/self.T,dim=-1)
        loss=self.portion[0]*F.kl_div(soft_x,soft_y,reduction="batchmean")
        return loss*self.T*self.T


In [3]:

import torchvision
class ResNet_self_distil(nn.Module):
    def __init__(self ):
        super(ResNet_self_distil, self).__init__()
        self.resnet50=torchvision.models.resnet50(num_classes=10)
        self.neck1=torchvision.models.resnet.Bottleneck(256,512,downsample=nn.Sequential(
                nn.Conv2d(256, 2048, kernel_size=(1, 1), stride=1, bias=False),
                nn.BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
            ))
        self.pool1=nn.AdaptiveAvgPool2d((1, 1))
        self.fc1=nn.Linear(in_features=2048, out_features=10, bias=True)
        self.neck2=torchvision.models.resnet.Bottleneck(512,512,downsample=nn.Sequential(
                nn.Conv2d(512, 2048, kernel_size=(1, 1), stride=1, bias=False),
                nn.BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
            ))
        self.pool2=nn.AdaptiveAvgPool2d((1, 1))
        self.fc2=nn.Linear(in_features=2048, out_features=10, bias=True)
        self.neck3=torchvision.models.resnet.Bottleneck(1024,512,downsample=nn.Sequential(
                nn.Conv2d(1024, 2048, kernel_size=(1, 1), stride=1, bias=False),
                nn.BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
            ))
        self.pool3=nn.AdaptiveAvgPool2d((1, 1))
        self.fc3=nn.Linear(in_features=2048, out_features=10, bias=True)
    def forward(self, x,is_eval=True):
        if is_eval:
            return self.resnet50(x)
        feature=[]
        out=[]
        x = self.resnet50.conv1(x)
        x = self.resnet50.bn1(x)
        x = self.resnet50.relu(x)
        x = self.resnet50.maxpool(x)
        x = self.resnet50.layer1(x)
        feature1= torch.flatten(self.pool1(self.neck1(x)),1)
        feature.append(feature1)
        out.append(self.fc1(feature1))

        x = self.resnet50.layer2(x)
        feature2= torch.flatten(self.pool2(self.neck2(x)),1)
        feature.append(feature2)
        out.append(self.fc2(feature2))
        x = self.resnet50.layer3(x)
        feature3= torch.flatten(self.pool3(self.neck3(x)),1)
        feature.append(feature3)
        out.append(self.fc3(feature3))
        x = self.resnet50.layer4(x)
        x = self.resnet50.avgpool(x)
        feature4 = torch.flatten(x, 1)
        x = self.resnet50.fc(feature4)
        feature.append(feature4)
        out.append(x)
        return feature,out


In [4]:
# useful libraries
import torchvision
import torchvision.transforms as transforms

#############################################
# your code here
# specify preprocessing function
transform = transforms.Compose(
    (
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
    )
)
transform_train = transforms.Compose(
    (
    
    transforms.RandomCrop((32,32),padding=4),
    transforms.RandomHorizontalFlip(),
    #transforms.RandomVerticalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
    
    #
    #transforms.ColorJitter(0.2,0,0)
    
    )
)

transform_val = transform
#############################################
# do NOT change these
from torchvision.datasets import CIFAR10
from torch.utils.data import DataLoader

# a few arguments, do NOT change these
DATA_ROOT = "./data"
TRAIN_BATCH_SIZE = 64
VAL_BATCH_SIZE = 100

#############################################
# your code here
# construct dataset
train_set = CIFAR10(
    root=DATA_ROOT, 
    train=True, 
    download=True,
    transform=transform_train    # your code
)

val_set = CIFAR10(
    root=DATA_ROOT, 
    train=False, 
    download=True,
    transform=transform_val    # your code
)

# construct dataloader
train_loader = DataLoader(
    train_set, 
    batch_size=TRAIN_BATCH_SIZE,  # your code
    shuffle=True,     # your code
    num_workers=2
)

val_loader = DataLoader(
    val_set, 
    batch_size=VAL_BATCH_SIZE,  # your code
    shuffle=False,     # your code
    num_workers=2
)
#############################################

Files already downloaded and verified
Files already downloaded and verified


In [5]:
import torch.nn as nn
import torch.optim as optim
def train(T,lamb,alpha,train_loader,val_loader,INITIAL_LR=0.1,DECAY_EPOCHS=[40,100,180],DECAY=0.1,EPOCHS=200,REG = 5e-4):
    # some hyperparameters
    # total number of training epochs
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    model=ResNet_self_distil()
    model=model.to(device)
    MOMENTUM = 0.9

    # L2 regularization strength
    
    tmpalpha=alpha
    alpha=0
    #############################################
    # your code here
    # create loss function
    criterion = nn.CrossEntropyLoss()

    # Add optimizer
    optimizer = optim.SGD(model.parameters(),lr=INITIAL_LR,momentum=MOMENTUM,weight_decay=REG,nesterov=True)
    soft_criterion=Softloss(T)
    # the folder where the trained model is saved
    CHECKPOINT_FOLDER = "./saved_model"
  
    # start the training/validation process
    # the process should take about 5 minutes on a GTX 1070-Ti
    # if the code is written efficiently.
    best_val_acc = 0
    current_learning_rate = INITIAL_LR
    
    print("==> Training starts!")
    print("="*50)
    for i in range(0, EPOCHS):
        # handle the learning rate scheduler.
        if i==10:
            alpha=tmpalpha
        if i in DECAY_EPOCHS and i != 0 :
            current_learning_rate = current_learning_rate * DECAY
        
            for param_group in optimizer.param_groups:
                param_group['lr'] = current_learning_rate
            print("Current learning rate has decayed to %f" %current_learning_rate)
        
        #######################
        # your code here
        # switch to train mode
        model.train()
        
        #######################
        
        print("Epoch %d:" %i)
        # this help you compute the training accuracy
        total_examples = 0
        correct_examples = 0

        train_loss = 0 # track training loss if you want
        loader=train_loader
        
        # Train the model for 1 epoch.
        for batch_idx, (inputs, targets) in enumerate(loader):
            ####################################
            # your code here
            # copy inputs to device
            inputs=inputs.to(device)
            targets=targets.to(device).long()

            
            # compute the output and loss
            feature,out=model(inputs,False)
            loss=lamb*(F.mse_loss(feature[0],feature[3])+F.mse_loss(feature[1],feature[3])+F.mse_loss(feature[2],feature[3]))
            for i in range(3):
                loss+=(1-alpha)*criterion(out[i],targets)+alpha*soft_criterion(out[i],out[3])
            loss+=criterion(out[3],targets)
            loss/=5
            # zero the gradient
            
            optimizer.zero_grad()
            # backpropagation
            loss.backward()

            
            # apply gradient and update the weights
            optimizer.step()
            train_loss+=loss.item()
            
            # count the number of correctly predicted samples in the current batch
            correct_examples+=torch.sum(out[3].argmax(-1)==targets).item()
            ####################################
        total_examples=len(train_loader.dataset)      
        avg_loss = train_loss / len(train_loader)
        avg_acc = correct_examples / total_examples
        print("Training loss: %.4f, Training accuracy: %.4f" %(avg_loss, avg_acc))

        # Validate on the validation dataset
        #######################
        # your code here
        # switch to eval mode
        model.eval()
        
        #######################

        # this help you compute the validation accuracy
        total_examples = 0
        correct_examples = 0
        
        val_loss = 0 # again, track the validation loss if you want

        # disable gradient during validation, which can save GPU memory
        with torch.no_grad():
            for batch_idx, (inputs, targets) in enumerate(val_loader):
                ####################################
                # your code here
                # copy inputs to device
                inputs=inputs.to(device)
                targets=targets.to(device).long()
                # compute the output and loss
                out=model(inputs)
                loss=criterion(out,targets)
                # count the number of correctly predicted samples in the current batch
                val_loss+=loss.item()
                correct_examples+=torch.sum(out.argmax(-1)==targets).item()
                
                ####################################
        total_examples=len(val_loader.dataset)
        avg_loss = val_loss / len(val_loader)
        avg_acc = correct_examples / total_examples
        print("Validation loss: %.4f, Validation accuracy: %.4f" % (avg_loss, avg_acc))
        
        # save the model checkpoint
        if avg_acc > best_val_acc:
            best_val_acc = avg_acc
            if not os.path.exists(CHECKPOINT_FOLDER):
                os.makedirs(CHECKPOINT_FOLDER)
            print("Saving ...")
            state = {'state_dict': model.resnet50.state_dict(),
                    'epoch': i,
                    }
            torch.save(state, os.path.join(CHECKPOINT_FOLDER, 'self_distilled.pth'))
            
        print('')

    print("="*50)
    print(f"==> Optimization finished! Best validation accuracy: {best_val_acc:.4f}")
    return best_val_acc

In [6]:
train(T=4,lamb=0.1,alpha=0.6,train_loader=train_loader,val_loader=val_loader,INITIAL_LR=0.01,DECAY_EPOCHS=1,DECAY=0.96,EPOCHS=200)

==> Training starts!
Epoch 0:
Training loss: 10.0389, Training accuracy: 0.1275
Validation loss: 4.6680, Validation accuracy: 0.1670
Saving ...

Current learning rate has decayed to 0.009600
Epoch 1:
Training loss: 6.2845, Training accuracy: 0.1972
Validation loss: 10.8242, Validation accuracy: 0.2645
Saving ...

Current learning rate has decayed to 0.009216
Epoch 2:
Training loss: 5.2151, Training accuracy: 0.2898
Validation loss: 3.7247, Validation accuracy: 0.3487
Saving ...

Current learning rate has decayed to 0.008847
Epoch 3:
Training loss: 4.4960, Training accuracy: 0.3429
Validation loss: 1.9922, Validation accuracy: 0.3886
Saving ...

Current learning rate has decayed to 0.008493
Epoch 4:
Training loss: 3.8123, Training accuracy: 0.3762
Validation loss: 1.6493, Validation accuracy: 0.4204
Saving ...

Current learning rate has decayed to 0.008154
Epoch 5:
Training loss: 3.5777, Training accuracy: 0.4144
Validation loss: 1.5861, Validation accuracy: 0.4486
Saving ...

Current l

0.8097

In [7]:
train(T=2,lamb=0.1,alpha=0.6,train_loader=train_loader,val_loader=val_loader,INITIAL_LR=0.01,DECAY_EPOCHS=60,DECAY=0.1,EPOCHS=200,REG = 5e-4)

==> Training starts!
Epoch 0:
Training loss: 6.6577, Training accuracy: 0.2007
Validation loss: 1.9295, Validation accuracy: 0.3176
Saving ...

Epoch 1:
Training loss: 4.3437, Training accuracy: 0.3484
Validation loss: 5.0593, Validation accuracy: 0.3881
Saving ...

Epoch 2:
Training loss: 3.8511, Training accuracy: 0.4049
Validation loss: 1.8465, Validation accuracy: 0.4371
Saving ...

Epoch 3:
Training loss: 3.5779, Training accuracy: 0.4458
Validation loss: 1.7672, Validation accuracy: 0.4559
Saving ...

Epoch 4:
Training loss: 3.3376, Training accuracy: 0.4814
Validation loss: 1.8684, Validation accuracy: 0.5180
Saving ...

Epoch 5:
Training loss: 3.0764, Training accuracy: 0.5225
Validation loss: 4.6228, Validation accuracy: 0.5256
Saving ...

Epoch 6:
Training loss: 2.8768, Training accuracy: 0.5546
Validation loss: 2.4551, Validation accuracy: 0.5908
Saving ...

Epoch 7:
Training loss: 2.6774, Training accuracy: 0.5887
Validation loss: 1.6507, Validation accuracy: 0.6262
Saving 

0.8772

In [8]:
train(T=2,lamb=0.1,alpha=0.6,train_loader=train_loader,val_loader=val_loader,INITIAL_LR=0.01,DECAY_EPOCHS=1,DECAY=0.96,EPOCHS=200)

==> Training starts!
Epoch 0:
Training loss: 8.5146, Training accuracy: 0.1418
Validation loss: 4.4801, Validation accuracy: 0.2027
Saving ...

Current learning rate has decayed to 0.009600
Epoch 1:
Training loss: 4.9650, Training accuracy: 0.2760
Validation loss: 2.1240, Validation accuracy: 0.3355
Saving ...

Current learning rate has decayed to 0.009216
Epoch 2:
Training loss: 4.9722, Training accuracy: 0.2874
Validation loss: 3.5183, Validation accuracy: 0.3463
Saving ...

Current learning rate has decayed to 0.008847
Epoch 3:
Training loss: 4.4124, Training accuracy: 0.3490
Validation loss: 2.1100, Validation accuracy: 0.3947
Saving ...

Current learning rate has decayed to 0.008493
Epoch 4:
Training loss: 4.0438, Training accuracy: 0.3839
Validation loss: 1.5908, Validation accuracy: 0.4318
Saving ...

Current learning rate has decayed to 0.008154
Epoch 5:
Training loss: 3.7963, Training accuracy: 0.4159
Validation loss: 5.4809, Validation accuracy: 0.4244

Current learning rate 

KeyboardInterrupt: 

In [6]:
train(T=2,lamb=0.1,alpha=0.6,train_loader=train_loader,val_loader=val_loader,INITIAL_LR=0.01,DECAY_EPOCHS=60,DECAY=0.1,EPOCHS=200,REG = 5e-5)

==> Training starts!
Epoch 0:
Training loss: 7.4699, Training accuracy: 0.3456
Validation loss: 1.7335, Validation accuracy: 0.4443
Saving ...

Epoch 1:
Training loss: 5.9567, Training accuracy: 0.4718
Validation loss: 1.3902, Validation accuracy: 0.5137
Saving ...

Epoch 2:
Training loss: 5.2342, Training accuracy: 0.5369
Validation loss: 1.6304, Validation accuracy: 0.5766
Saving ...

Epoch 3:
Training loss: 4.7386, Training accuracy: 0.5849
Validation loss: 1.2054, Validation accuracy: 0.6172
Saving ...

Epoch 4:
Training loss: 4.3893, Training accuracy: 0.6161
Validation loss: 1.5881, Validation accuracy: 0.6299
Saving ...

Epoch 5:
Training loss: 2.8000, Training accuracy: 0.6154
Validation loss: 1.1984, Validation accuracy: 0.5752

Epoch 6:
Training loss: 3.0001, Training accuracy: 0.5600
Validation loss: 1.2919, Validation accuracy: 0.5789

Epoch 7:
Training loss: 2.6126, Training accuracy: 0.6102
Validation loss: 1.1215, Validation accuracy: 0.6632
Saving ...

Epoch 8:
Training

0.8523

In [7]:
train(T=3,lamb=0.2,alpha=0.4,train_loader=train_loader,val_loader=val_loader,INITIAL_LR=0.1,DECAY_EPOCHS=60,DECAY=0.1,EPOCHS=200,REG = 5e-4)

==> Training starts!
Epoch 0:
Training loss: 1.6775, Training accuracy: 0.2894
Validation loss: 1.4901, Validation accuracy: 0.4511
Saving ...

Epoch 1:
Training loss: 1.1993, Training accuracy: 0.4661
Validation loss: 1.3051, Validation accuracy: 0.5334
Saving ...

Epoch 2:
Training loss: 1.0120, Training accuracy: 0.5504
Validation loss: 1.2628, Validation accuracy: 0.5511
Saving ...

Epoch 3:
Training loss: 0.8768, Training accuracy: 0.6080
Validation loss: 1.0583, Validation accuracy: 0.6335
Saving ...

Epoch 4:
Training loss: 0.7839, Training accuracy: 0.6512
Validation loss: 1.0361, Validation accuracy: 0.6401
Saving ...

Epoch 5:
Training loss: 0.7124, Training accuracy: 0.6858
Validation loss: 0.8621, Validation accuracy: 0.6972
Saving ...

Epoch 6:
Training loss: 0.6660, Training accuracy: 0.7079
Validation loss: 0.8567, Validation accuracy: 0.7091
Saving ...

Epoch 7:
Training loss: 0.6324, Training accuracy: 0.7228
Validation loss: 0.8144, Validation accuracy: 0.7197
Saving 

0.9055

In [None]:
train(T=3,lamb=0.2,alpha=0.4,train_loader=train_loader,val_loader=val_loader,INITIAL_LR=0.1,DECAY_EPOCHS=[40,100,180],DECAY=0.1,EPOCHS=200,REG = 5e-3)

==> Training starts!
Epoch 0:
Training loss: 1.5443, Training accuracy: 0.3033
Validation loss: 1.7406, Validation accuracy: 0.3394
Saving ...

Epoch 1:
Training loss: 1.3322, Training accuracy: 0.3849
Validation loss: 1.9870, Validation accuracy: 0.2698

Epoch 2:
Training loss: 1.3192, Training accuracy: 0.3916
Validation loss: 2.2902, Validation accuracy: 0.2238

Epoch 3:
Training loss: 1.3180, Training accuracy: 0.3918
Validation loss: 2.9051, Validation accuracy: 0.2243

Epoch 4:
Training loss: 1.3207, Training accuracy: 0.3894
Validation loss: 3.3420, Validation accuracy: 0.1832

Epoch 5:
Training loss: 1.3151, Training accuracy: 0.3957
Validation loss: 1.7995, Validation accuracy: 0.3477
Saving ...

Epoch 6:
Training loss: 1.3144, Training accuracy: 0.3962
Validation loss: 1.6417, Validation accuracy: 0.3984
Saving ...

Epoch 7:
Training loss: 1.3222, Training accuracy: 0.3901
Validation loss: 1.8657, Validation accuracy: 0.3375

Epoch 8:
Training loss: 1.3161, Training accuracy:

In [6]:
train(T=3,lamb=0.2,alpha=0.4,train_loader=train_loader,val_loader=val_loader,INITIAL_LR=0.1,DECAY_EPOCHS=[40,100,180],DECAY=0.1,EPOCHS=200,REG = 8e-4)

==> Training starts!
Epoch 0:
Training loss: 1.6790, Training accuracy: 0.2840
Validation loss: 1.5954, Validation accuracy: 0.3951
Saving ...

Epoch 1:
Training loss: 1.1758, Training accuracy: 0.4591
Validation loss: 1.3836, Validation accuracy: 0.4990
Saving ...

Epoch 2:
Training loss: 1.0274, Training accuracy: 0.5286
Validation loss: 1.4362, Validation accuracy: 0.4768

Epoch 3:
Training loss: 0.9258, Training accuracy: 0.5797
Validation loss: 1.3440, Validation accuracy: 0.5389
Saving ...

Epoch 4:
Training loss: 0.8661, Training accuracy: 0.6098
Validation loss: 1.1956, Validation accuracy: 0.5925
Saving ...

Epoch 5:
Training loss: 0.8287, Training accuracy: 0.6266
Validation loss: 1.3217, Validation accuracy: 0.5501

Epoch 6:
Training loss: 0.8040, Training accuracy: 0.6423
Validation loss: 1.0693, Validation accuracy: 0.6262
Saving ...

Epoch 7:
Training loss: 0.7859, Training accuracy: 0.6481
Validation loss: 1.1506, Validation accuracy: 0.6186

Epoch 8:
Training loss: 0.77

KeyboardInterrupt: 

In [7]:
train(T=3,lamb=0.2,alpha=0.4,train_loader=train_loader,val_loader=val_loader,INITIAL_LR=0.1,DECAY_EPOCHS=[40,100,180],DECAY=0.1,EPOCHS=200,REG = 2e-4)

==> Training starts!
Epoch 0:
Training loss: 1.6696, Training accuracy: 0.3068
Validation loss: 6.7633, Validation accuracy: 0.4206
Saving ...

Epoch 1:
Training loss: 1.2285, Training accuracy: 0.4491
Validation loss: 3.8490, Validation accuracy: 0.5173
Saving ...

Epoch 2:
Training loss: 1.0419, Training accuracy: 0.5279
Validation loss: 1.4442, Validation accuracy: 0.5666
Saving ...

Epoch 3:
Training loss: 0.9242, Training accuracy: 0.5801
Validation loss: 1.1496, Validation accuracy: 0.5949
Saving ...

Epoch 4:
Training loss: 0.8377, Training accuracy: 0.6216
Validation loss: 1.0826, Validation accuracy: 0.6309
Saving ...

Epoch 5:
Training loss: 0.7692, Training accuracy: 0.6535
Validation loss: 1.0714, Validation accuracy: 0.6267

Epoch 6:
Training loss: 0.7134, Training accuracy: 0.6761
Validation loss: 0.9235, Validation accuracy: 0.6878
Saving ...

Epoch 7:
Training loss: 0.6729, Training accuracy: 0.6991
Validation loss: 0.8397, Validation accuracy: 0.7084
Saving ...

Epoch 

0.9052

In [None]:
train(T=3,lamb=0.4,alpha=0.4,train_loader=train_loader,val_loader=val_loader,INITIAL_LR=0.1,DECAY_EPOCHS=[40,80,180],DECAY=0.1,EPOCHS=200,REG = 2e-4)

==> Training starts!
Epoch 0:
Training loss: 1.8158, Training accuracy: 0.2633
Validation loss: 14.3474, Validation accuracy: 0.3707
Saving ...

Epoch 1:
Training loss: 1.3046, Training accuracy: 0.4183
Validation loss: 1.9721, Validation accuracy: 0.4785
Saving ...

Epoch 2:
Training loss: 1.1244, Training accuracy: 0.4933
Validation loss: 1.5144, Validation accuracy: 0.5664
Saving ...

Epoch 3:
Training loss: 1.0003, Training accuracy: 0.5471
Validation loss: 1.1961, Validation accuracy: 0.5859
Saving ...

Epoch 4:
Training loss: 0.9160, Training accuracy: 0.5849
Validation loss: 1.0493, Validation accuracy: 0.6272
Saving ...

Epoch 5:
Training loss: 0.8456, Training accuracy: 0.6163
Validation loss: 1.0138, Validation accuracy: 0.6454
Saving ...

Epoch 6:
Training loss: 0.7892, Training accuracy: 0.6397
Validation loss: 0.9805, Validation accuracy: 0.6659
Saving ...

Epoch 7:
Training loss: 0.7296, Training accuracy: 0.6705
Validation loss: 0.9647, Validation accuracy: 0.6715
Saving

In [7]:
train(T=3,lamb=0.6,alpha=0.4,train_loader=train_loader,val_loader=val_loader,INITIAL_LR=0.1,DECAY_EPOCHS=[40,80,180],DECAY=0.1,EPOCHS=200,REG = 2e-4)

==> Training starts!
Epoch 0:
Training loss: 1.7158, Training accuracy: 0.2982
Validation loss: 1.6268, Validation accuracy: 0.4093
Saving ...

Epoch 1:
Training loss: 1.2613, Training accuracy: 0.4312
Validation loss: 1.5897, Validation accuracy: 0.4996
Saving ...

Epoch 2:
Training loss: 1.0727, Training accuracy: 0.5139
Validation loss: 1.5931, Validation accuracy: 0.5360
Saving ...

Epoch 3:
Training loss: 0.9597, Training accuracy: 0.5671
Validation loss: 1.1323, Validation accuracy: 0.6010
Saving ...

Epoch 4:
Training loss: 0.8729, Training accuracy: 0.6057
Validation loss: 1.2780, Validation accuracy: 0.6470
Saving ...

Epoch 5:
Training loss: 0.8068, Training accuracy: 0.6373
Validation loss: 1.0624, Validation accuracy: 0.6702
Saving ...

Epoch 6:
Training loss: 0.7460, Training accuracy: 0.6690
Validation loss: 1.1666, Validation accuracy: 0.6683

Epoch 7:
Training loss: 0.7031, Training accuracy: 0.6828
Validation loss: 1.2306, Validation accuracy: 0.6770
Saving ...

Epoch 

0.9033

In [6]:
train(T=3,lamb=0.8,alpha=0.4,train_loader=train_loader,val_loader=val_loader,INITIAL_LR=0.1,DECAY_EPOCHS=[40,80,180],DECAY=0.1,EPOCHS=200,REG = 2e-4)

==> Training starts!
Epoch 0:
Training loss: 1.8225, Training accuracy: 0.2777
Validation loss: 5.8094, Validation accuracy: 0.4109
Saving ...

Epoch 1:
Training loss: 1.2512, Training accuracy: 0.4368
Validation loss: 6.3970, Validation accuracy: 0.4567
Saving ...

Epoch 2:
Training loss: 1.1004, Training accuracy: 0.5017
Validation loss: 1.3469, Validation accuracy: 0.5337
Saving ...

Epoch 3:
Training loss: 0.9639, Training accuracy: 0.5640
Validation loss: 1.6171, Validation accuracy: 0.5424
Saving ...

Epoch 4:
Training loss: 0.8887, Training accuracy: 0.6002
Validation loss: 1.1405, Validation accuracy: 0.6203
Saving ...

Epoch 5:
Training loss: 0.8315, Training accuracy: 0.6264
Validation loss: 1.3211, Validation accuracy: 0.6123

Epoch 6:
Training loss: 0.7846, Training accuracy: 0.6492
Validation loss: 1.0721, Validation accuracy: 0.6378
Saving ...

Epoch 7:
Training loss: 0.7387, Training accuracy: 0.6692
Validation loss: 1.0491, Validation accuracy: 0.6565
Saving ...

Epoch 

0.9

In [8]:
train(T=3,lamb=0.6,alpha=0.4,train_loader=train_loader,val_loader=val_loader,INITIAL_LR=0.1,DECAY_EPOCHS=[40,80,160],DECAY=0.1,EPOCHS=200,REG = 4e-4)

==> Training starts!
Epoch 0:
Training loss: 1.7610, Training accuracy: 0.3007
Validation loss: 2.4393, Validation accuracy: 0.4025
Saving ...

Epoch 1:
Training loss: 1.2247, Training accuracy: 0.4485
Validation loss: 2.4373, Validation accuracy: 0.5173
Saving ...

Epoch 2:
Training loss: 1.0490, Training accuracy: 0.5260
Validation loss: 1.5810, Validation accuracy: 0.5090

Epoch 3:
Training loss: 0.9566, Training accuracy: 0.5682
Validation loss: 1.3258, Validation accuracy: 0.6096
Saving ...

Epoch 4:
Training loss: 0.8622, Training accuracy: 0.6088
Validation loss: 1.0559, Validation accuracy: 0.6330
Saving ...

Epoch 5:
Training loss: 0.7854, Training accuracy: 0.6442
Validation loss: 1.0209, Validation accuracy: 0.6530
Saving ...

Epoch 6:
Training loss: 0.7350, Training accuracy: 0.6713
Validation loss: 1.0118, Validation accuracy: 0.6522

Epoch 7:
Training loss: 0.7030, Training accuracy: 0.6859
Validation loss: 0.9704, Validation accuracy: 0.6704
Saving ...

Epoch 8:
Training

0.9067

In [6]:
train(T=3,lamb=0.6,alpha=0.4,train_loader=train_loader,val_loader=val_loader,INITIAL_LR=0.1,DECAY_EPOCHS=[40,80,160],DECAY=0.1,EPOCHS=200,REG = 6e-4)

==> Training starts!
Epoch 0:
Training loss: 1.6560, Training accuracy: 0.3239
Validation loss: 3.1435, Validation accuracy: 0.4412
Saving ...

Epoch 1:
Training loss: 1.1971, Training accuracy: 0.4550
Validation loss: 15.7055, Validation accuracy: 0.4792
Saving ...

Epoch 2:
Training loss: 1.0471, Training accuracy: 0.5249
Validation loss: 1.3090, Validation accuracy: 0.5506
Saving ...

Epoch 3:
Training loss: 0.9522, Training accuracy: 0.5690
Validation loss: 1.2610, Validation accuracy: 0.5732
Saving ...

Epoch 4:
Training loss: 0.8725, Training accuracy: 0.6038
Validation loss: 1.0510, Validation accuracy: 0.6388
Saving ...

Epoch 5:
Training loss: 0.8173, Training accuracy: 0.6320
Validation loss: 1.2445, Validation accuracy: 0.5831

Epoch 6:
Training loss: 0.7792, Training accuracy: 0.6509
Validation loss: 1.0436, Validation accuracy: 0.6381

Epoch 7:
Training loss: 0.7497, Training accuracy: 0.6648
Validation loss: 1.1303, Validation accuracy: 0.6205

Epoch 8:
Training loss: 0.7

KeyboardInterrupt: 

In [7]:
train(T=1,lamb=0.6,alpha=0.4,train_loader=train_loader,val_loader=val_loader,INITIAL_LR=0.1,DECAY_EPOCHS=[40,80,160],DECAY=0.1,EPOCHS=200,REG = 4e-4)

==> Training starts!
Epoch 0:
Training loss: 1.7486, Training accuracy: 0.3017
Validation loss: 4.0329, Validation accuracy: 0.3838
Saving ...

Epoch 1:
Training loss: 1.2260, Training accuracy: 0.4524
Validation loss: 1.4453, Validation accuracy: 0.5103
Saving ...

Epoch 2:
Training loss: 1.0623, Training accuracy: 0.5177
Validation loss: 1.2197, Validation accuracy: 0.5626
Saving ...

Epoch 3:
Training loss: 0.9551, Training accuracy: 0.5694
Validation loss: 1.5025, Validation accuracy: 0.5047

Epoch 4:
Training loss: 0.8880, Training accuracy: 0.5966
Validation loss: 1.0447, Validation accuracy: 0.6363
Saving ...

Epoch 5:
Training loss: 0.7960, Training accuracy: 0.6396
Validation loss: 1.1691, Validation accuracy: 0.5998

Epoch 6:
Training loss: 0.7477, Training accuracy: 0.6674
Validation loss: 1.1461, Validation accuracy: 0.6127

Epoch 7:
Training loss: 0.7150, Training accuracy: 0.6811
Validation loss: 1.0447, Validation accuracy: 0.6468
Saving ...

Epoch 8:
Training loss: 0.69

0.905

In [6]:
train(T=2,lamb=0.6,alpha=0.4,train_loader=train_loader,val_loader=val_loader,INITIAL_LR=0.1,DECAY_EPOCHS=[40,80,160],DECAY=0.1,EPOCHS=200,REG = 4e-4)

==> Training starts!
Epoch 0:
Training loss: 1.7234, Training accuracy: 0.3044
Validation loss: 1.5948, Validation accuracy: 0.4232
Saving ...

Epoch 1:
Training loss: 1.2070, Training accuracy: 0.4560
Validation loss: 2.2621, Validation accuracy: 0.4935
Saving ...

Epoch 2:
Training loss: 1.0354, Training accuracy: 0.5335
Validation loss: 1.3222, Validation accuracy: 0.5404
Saving ...

Epoch 3:
Training loss: 0.9089, Training accuracy: 0.5901
Validation loss: 1.5381, Validation accuracy: 0.5813
Saving ...

Epoch 4:
Training loss: 0.8210, Training accuracy: 0.6293
Validation loss: 1.1514, Validation accuracy: 0.6069
Saving ...

Epoch 5:
Training loss: 0.7639, Training accuracy: 0.6538
Validation loss: 1.0311, Validation accuracy: 0.6438
Saving ...

Epoch 6:
Training loss: 0.7237, Training accuracy: 0.6762
Validation loss: 1.1330, Validation accuracy: 0.6236

Epoch 7:
Training loss: 0.6948, Training accuracy: 0.6895
Validation loss: 0.9606, Validation accuracy: 0.6723
Saving ...

Epoch 

0.9038

In [7]:
train(T=4,lamb=0.6,alpha=0.4,train_loader=train_loader,val_loader=val_loader,INITIAL_LR=0.1,DECAY_EPOCHS=[40,80,160],DECAY=0.1,EPOCHS=200,REG = 4e-4)

==> Training starts!
Epoch 0:
Training loss: 1.6047, Training accuracy: 0.3300
Validation loss: 1.7521, Validation accuracy: 0.4322
Saving ...

Epoch 1:
Training loss: 1.1887, Training accuracy: 0.4625
Validation loss: 1.3359, Validation accuracy: 0.5349
Saving ...

Epoch 2:
Training loss: 1.0125, Training accuracy: 0.5414
Validation loss: 1.4947, Validation accuracy: 0.5702
Saving ...

Epoch 3:
Training loss: 0.9082, Training accuracy: 0.5865
Validation loss: 1.3701, Validation accuracy: 0.5515

Epoch 4:
Training loss: 0.8204, Training accuracy: 0.6298
Validation loss: 1.1028, Validation accuracy: 0.6271
Saving ...

Epoch 5:
Training loss: 0.7654, Training accuracy: 0.6551
Validation loss: 1.0836, Validation accuracy: 0.6329
Saving ...

Epoch 6:
Training loss: 0.7258, Training accuracy: 0.6757
Validation loss: 0.9933, Validation accuracy: 0.6625
Saving ...

Epoch 7:
Training loss: 0.6990, Training accuracy: 0.6873
Validation loss: 1.0389, Validation accuracy: 0.6352

Epoch 8:
Training

0.9043

In [None]:
train(T=8,lamb=0.6,alpha=0.4,train_loader=train_loader,val_loader=val_loader,INITIAL_LR=0.1,DECAY_EPOCHS=[40,80,160],DECAY=0.1,EPOCHS=200,REG = 4e-4)