In [76]:
from __future__ import print_function
from __future__ import division


import argparse
import time
import sys
import copy
# import warnings
# import random
# import shutil
# import numpy as np
# import os


import torch
import torch.nn as nn
import torch.nn.parallel
# import torch.nn.functional as F

# import torch.backends.cudnn as cudnn
# import torch.distributed as dist
# import torch.optim as optim
# import torch.multiprocessing as mp

import torch.utils.data
import torch.utils.data.distributed

import torchvision
from torchvision import datasets, transforms, models


from torch.autograd import Variable


In [77]:
sys.stdout.write("PyTorch Version: {}\n".format(torch.__version__))
sys.stdout.write("Torchvision Version: {}\n".format(torchvision.__version__))

if torch.cuda.is_available():
    sys.stdout.write('GPU mode \n')
else:
    sys.stdout.write('Warning, CPU mode, pls check\n')

PyTorch Version: 0.4.1
Torchvision Version: 0.2.2
GPU mode 


In [78]:
########################################################################################
########################################################################################
########################################################################################

def str2bool(v):
    if v.lower() in ('yes', 'true', 't', 'y', '1'):
        return True
    elif v.lower() in ('no', 'false', 'f', 'n', '0'):
        return False
    else:
        raise argparse.ArgumentTypeError('Boolean value expected.')

parser = argparse.ArgumentParser(description='DL19_FinalProject_PyTorch')

parser.add_argument('--model', type=str, default='vgg',
                    help='type of cnn ("resnet", "alexnet","vgg","squeezenet","densenet","inception")')

parser.add_argument('--AE-folder', type=str, default='/scratch/by783/DL_Final_models/',
                    help='path to store model files')

parser.add_argument('--AE-file', type=str, default = '190425_raw_vggae_fromscratch_s.pt',
                    help='path to autoencoder')

parser.add_argument('-b', '--batch-size', default=256, type=int,
                    metavar='N',
                    help='mini-batch size (default: 256), this is the total '
                         'batch size of all GPUs on the current node when '
                         'using Data Parallel or Distributed Data Parallel')
parser.add_argument('--save-folder', type=str, default='/beegfs/by783/DL_Final_models/',
                    help='path to save the final model')

parser.add_argument('--save', type=str, default='model.pt',
                    help='path to save the final model')
parser.add_argument('--num-classes', type=int, default=1000,
                    help='number of classes')
parser.add_argument("--feature-pinning", type=str, default='True',
                    help="pin all the conv layers.")
parser.add_argument('--epochs', type=int, default=50,
                    help='upper epoch limit')
parser.add_argument('--lr', type=float, default=0.001,
                    help='learning rate')
# parser.add_argument('--noise-level', type=float, default=0.3,
#                     help='add noise to input')
# no noise added now
parser.add_argument('--dataset-path', type=str, default='/beegfs/by783/DL_Final/ssl_data_96',
                    help='path to dataset')

#args = parser.parse_args()
args=parser.parse_args("--model vgg --model-file 190504_SDvggAE_D01.pt --batch-size 512 --feature-pinning True --save 190505_try2 --epochs 50 --lr 0.001 ".split())
########################################################################################

model_name = args.model

model_load_path = args.AE_folder + args.AE_file

save_path = args.save_folder + args.save

feature_pinning=str2bool(args.feature_pinning)
num_classes = args.num_classes

num_epochs = args.epochs
loader_batch_size = args.batch_size
loader_image_path = args.dataset_path
# noise_level = args.noise_level


########################################################################################
########################################################################################
########################################################################################

usage: ipykernel_launcher.py [-h] [--model MODEL] [--AE-folder AE_FOLDER]
                             [--AE-file AE_FILE] [-b N]
                             [--save-folder SAVE_FOLDER] [--save SAVE]
                             [--num-classes NUM_CLASSES]
                             [--feature-pinning FEATURE_PINNING]
                             [--epochs EPOCHS] [--lr LR]
                             [--dataset-path DATASET_PATH]
ipykernel_launcher.py: error: unrecognized arguments: --model-file 190504_SDvggAE_D01.pt


SystemExit: 2

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)


In [79]:
def image_loader(path, batch_size):
    transform = transforms.Compose(
        [
            #transforms.Resize(input_size),
            #transforms.CenterCrop(input_size),
            # use model fitted with the image size, so no need to resize
            transforms.ToTensor(),
            transforms.Normalize([0.502, 0.474, 0.426], [0.227, 0.222, 0.226])
            # https://pytorch.org/docs/stable/torchvision/transforms.html
            # [mean],[std] for different channels
        ]
    )
    sup_train_data = datasets.ImageFolder('{}/{}/train'.format(path, 'supervised'), transform=transform)
    sup_val_data = datasets.ImageFolder('{}/{}/val'.format(path, 'supervised'), transform=transform)
    unsup_data = datasets.ImageFolder('{}/{}/'.format(path, 'unsupervised'), transform=transform)
    # source code: https://github.com/pytorch/vision/blob/master/torchvision/datasets/folder.py
    # Main idea:
    data_loader_sup_train = torch.utils.data.DataLoader(
        sup_train_data,
        batch_size=batch_size,
        shuffle=True,
        num_workers=0
    )
    data_loader_sup_val = torch.utils.data.DataLoader(
        sup_val_data,
        batch_size=batch_size,
        shuffle=True,
        num_workers=0
    )
    data_loader_unsup = torch.utils.data.DataLoader(
        unsup_data,
        batch_size=batch_size,
        shuffle=True,
        num_workers=0
    )

    print('sup_train_data.class_to_idx==sup_val_data.class_to_idx: ',
          sup_train_data.class_to_idx == sup_val_data.class_to_idx)

    return data_loader_sup_train, data_loader_sup_val, data_loader_unsup, sup_train_data.class_to_idx

In [80]:
########################################################################################
########################################################################################
########################################################################################

class CDAutoEncoder(nn.Module):
    r"""
    Convolutional denoising autoencoder layer for stacked autoencoders.
    This module is automatically trained when in model.training is True.
    Args:
        input_size: The number of features in the input
        output_size: The number of features to output
        stride: Stride of the convolutional layers.
    """

    def __init__(self, input_size, output_size, conv_num=1, criterion=nn.MSELoss(), learning_rate=0.01):
        super(CDAutoEncoder, self).__init__()
        if conv_num == 2:
            self.forward_pass = nn.Sequential(
                nn.Conv2d(input_size, output_size, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
                nn.BatchNorm2d(output_size, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True),
                nn.ReLU(inplace=True),
                nn.Conv2d(output_size, output_size, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
                nn.BatchNorm2d(output_size, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True),
                nn.ReLU(inplace=True),
                nn.MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
            )
            self.backward_pass = nn.Sequential(
                nn.ConvTranspose2d(output_size, output_size, kernel_size=(2, 2), stride=(2, 2)),
                nn.BatchNorm2d(output_size, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True),
                nn.ReLU(inplace=True),
                nn.ConvTranspose2d(output_size, input_size, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
                nn.BatchNorm2d(input_size, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True),
                nn.ReLU(inplace=True)
            )
        if conv_num == 1:
            self.forward_pass = nn.Sequential(
                nn.Conv2d(input_size, output_size, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
                nn.BatchNorm2d(output_size, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True),
                nn.ReLU(inplace=True),
                nn.MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
            )
            self.backward_pass = nn.Sequential(
                nn.ConvTranspose2d(output_size, input_size, kernel_size=(2, 2), stride=(2, 2)),
                nn.BatchNorm2d(input_size, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True),
                nn.ReLU(inplace=True)
            )

        self.criterion = criterion
        self.optimizer = torch.optim.SGD(self.parameters(), lr=learning_rate, momentum=0.9)

    def forward(self, x):
        # Train each autoencoder individually
        x = x.detach()
        # Add noise, but use the original lossless input as the target.SGD(self.parameters(), lr=learning_rate, momentum=0.9)
        x_noisy = x * (Variable(x.data.new(x.size()).normal_(0, 0.1)) > -.1).type_as(x)
        #         print('forward: x: ',x.shape)
        y = self.forward_pass(x_noisy)

        if self.training:
            x_reconstruct = self.backward_pass(y)
            #             print('forward: x_re: ',x_reconstruct.shape)
            loss = self.criterion(x_reconstruct, Variable(x.data, requires_grad=False))
            self.optimizer.zero_grad()
            loss.backward()
            self.optimizer.step()

        return y.detach()

    def reconstruct(self, x):
        return self.backward_pass(x)


class StackedAutoEncoder(nn.Module):
    r"""
    A stacked autoencoder made from the convolutional denoising autoencoders above.
    Each autoencoder is trained independently and at the same time.
    """

    def __init__(self, criterion=nn.MSELoss(), learning_rate=0.01):
        super(StackedAutoEncoder, self).__init__()

        self.ae1 = CDAutoEncoder(3, 64, conv_num=1, criterion=criterion, learning_rate=learning_rate)
        self.ae2 = CDAutoEncoder(64, 128, conv_num=1, criterion=criterion, learning_rate=learning_rate)
        self.ae3 = CDAutoEncoder(128, 256, conv_num=2, criterion=criterion, learning_rate=learning_rate)
        self.ae4 = CDAutoEncoder(256, 512, conv_num=2, criterion=criterion, learning_rate=learning_rate)
        self.ae5 = CDAutoEncoder(512, 512, conv_num=2, criterion=criterion, learning_rate=learning_rate)

    def forward(self, x):
        a1 = self.ae1(x)
        a2 = self.ae2(a1)
        a3 = self.ae3(a2)
        a4 = self.ae4(a3)
        a5 = self.ae5(a4)

        if self.training:
            return a5, self.reconstruct(a5)

        else:
            return a5, self.reconstruct(a5)

    def reconstruct(self, x):
        a4_reconstruct = self.ae5.reconstruct(x)
        a3_reconstruct = self.ae4.reconstruct(a4_reconstruct)
        a2_reconstruct = self.ae3.reconstruct(a3_reconstruct)
        a1_reconstruct = self.ae2.reconstruct(a2_reconstruct)
        x_reconstruct = self.ae1.reconstruct(a1_reconstruct)
        return x_reconstruct

########################################################################################

In [81]:
try:
    model_ae=torch.load(model_load_path).module
except:
    model_ae=torch.load(model_load_path)

In [82]:
model_ae

StackedAutoEncoder(
  (ae1): CDAutoEncoder(
    (forward_pass): Sequential(
      (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU(inplace)
      (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    )
    (backward_pass): Sequential(
      (0): ConvTranspose2d(64, 3, kernel_size=(2, 2), stride=(2, 2))
      (1): BatchNorm2d(3, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU(inplace)
    )
    (criterion): MSELoss()
  )
  (ae2): CDAutoEncoder(
    (forward_pass): Sequential(
      (0): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU(inplace)
      (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    )
    (backward_pass): Sequential(
      (0):

In [83]:
model_ae.state_dict()

OrderedDict([('ae1.forward_pass.0.weight',
              tensor([[[[ 0.0794,  0.0704,  0.1415],
                        [ 0.1218,  0.1748,  0.0047],
                        [-0.0374, -0.1731,  0.0802]],
              
                       [[-0.0776, -0.0328, -0.0592],
                        [ 0.0587, -0.1551, -0.1743],
                        [-0.0518, -0.1666, -0.1928]],
              
                       [[-0.0743, -0.0553,  0.0089],
                        [-0.0371,  0.1244, -0.1749],
                        [ 0.1361,  0.0742, -0.1439]]],
              
              
                      [[[ 0.0905, -0.1278, -0.0683],
                        [-0.1034,  0.0664,  0.0247],
                        [-0.0356, -0.1215, -0.1541]],
              
                       [[ 0.1491, -0.0924,  0.0204],
                        [-0.0161,  0.1472, -0.0729],
                        [ 0.0050, -0.1577,  0.1867]],
              
                       [[-0.0883, -0.0080, -0.1563],
             

In [84]:
list(model_ae.ae1.forward_pass.children())[0].weight

Parameter containing:
tensor([[[[ 0.0794,  0.0704,  0.1415],
          [ 0.1218,  0.1748,  0.0047],
          [-0.0374, -0.1731,  0.0802]],

         [[-0.0776, -0.0328, -0.0592],
          [ 0.0587, -0.1551, -0.1743],
          [-0.0518, -0.1666, -0.1928]],

         [[-0.0743, -0.0553,  0.0089],
          [-0.0371,  0.1244, -0.1749],
          [ 0.1361,  0.0742, -0.1439]]],


        [[[ 0.0905, -0.1278, -0.0683],
          [-0.1034,  0.0664,  0.0247],
          [-0.0356, -0.1215, -0.1541]],

         [[ 0.1491, -0.0924,  0.0204],
          [-0.0161,  0.1472, -0.0729],
          [ 0.0050, -0.1577,  0.1867]],

         [[-0.0883, -0.0080, -0.1563],
          [-0.0511, -0.0895, -0.1018],
          [-0.0630, -0.0272, -0.0767]]],


        [[[-0.0253, -0.0177, -0.0687],
          [-0.1442,  0.1722,  0.0793],
          [-0.1045, -0.0532,  0.1365]],

         [[-0.1850,  0.0987,  0.1782],
          [ 0.0759,  0.1897, -0.1253],
          [-0.0530, -0.1833,  0.1572]],

         [[-0.0760, -0

In [85]:
nn.Sequential(*copy.deepcopy(list(model_ae.ae1.forward_pass.children())+list(model_ae.ae2.forward_pass.children())+list(model_ae.ae3.forward_pass.children())+list(model_ae.ae4.forward_pass.children())+list(model_ae.ae5.forward_pass.children())))

Sequential(
  (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (2): ReLU(inplace)
  (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (4): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (5): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (6): ReLU(inplace)
  (7): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (8): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (9): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (10): ReLU(inplace)
  (11): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (12): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (13): ReLU(inplace)
  (14): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (15)

In [86]:
class AE_Transfered_Network(torch.nn.Module):
    def __init__(self, classifier_type, autoencoder_model, num_classes=1000):
        super(AE_Transfered_Network, self).__init__()
        if classifier_type != 'vgg':
            sys.stdout.write('Dear, we only support vgg now...\n')

        self.features = nn.Sequential(*copy.deepcopy(
            list(autoencoder_model.ae1.forward_pass.children())+
            list(autoencoder_model.ae2.forward_pass.children())+
            list(autoencoder_model.ae3.forward_pass.children())+
            list(autoencoder_model.ae4.forward_pass.children())+
            list(autoencoder_model.ae5.forward_pass.children())))
        self.avgpool = nn.AdaptiveAvgPool2d(output_size=(3, 3))
        self.classifier = nn.Sequential(
            nn.Linear(in_features=4608, out_features=4096, bias=True),
            nn.ReLU(inplace=True),
            nn.Dropout(p=0.5),
            nn.Linear(in_features=4096, out_features=4096, bias=True),
            nn.ReLU(inplace=True),
            nn.Dropout(p=0.5),
            nn.Linear(in_features=4096, out_features=num_classes, bias=True)
        )
    
    def forward(self, x):
        x = self.features(x)
        x = self.avgpool(x)
        x = x.view(x.size(0), -1)
        x = self.classifier(x)
        return x

In [93]:
try:
    model_ae=torch.load(model_load_path).module
except:
    model_ae=torch.load(model_load_path)

model_ft = AE_Transfered_Network('vgg',model_ae)

del model_ae #save memory

In [94]:
model_ft

AE_Transfered_Network(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace)
    (3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (4): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (5): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (6): ReLU(inplace)
    (7): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (8): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (10): ReLU(inplace)
    (11): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (12): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (13): ReLU(inplace)
    (14): MaxPool2d(kern

In [95]:
model_ft.state_dict() # no problem

OrderedDict([('features.0.weight', tensor([[[[ 0.0794,  0.0704,  0.1415],
                        [ 0.1218,  0.1748,  0.0047],
                        [-0.0374, -0.1731,  0.0802]],
              
                       [[-0.0776, -0.0328, -0.0592],
                        [ 0.0587, -0.1551, -0.1743],
                        [-0.0518, -0.1666, -0.1928]],
              
                       [[-0.0743, -0.0553,  0.0089],
                        [-0.0371,  0.1244, -0.1749],
                        [ 0.1361,  0.0742, -0.1439]]],
              
              
                      [[[ 0.0905, -0.1278, -0.0683],
                        [-0.1034,  0.0664,  0.0247],
                        [-0.0356, -0.1215, -0.1541]],
              
                       [[ 0.1491, -0.0924,  0.0204],
                        [-0.0161,  0.1472, -0.0729],
                        [ 0.0050, -0.1577,  0.1867]],
              
                       [[-0.0883, -0.0080, -0.1563],
                        [-0.0511, -

In [96]:
def set_parameter_pin_grad(model, pinning):
    if pinning:
        for param in model.parameters():
            param.requires_grad = False
    else:
        for param in model.parameters():
            param.requires_grad = True

In [97]:
set_parameter_pin_grad(model_ft.features,feature_pinning)

In [98]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

if torch.cuda.device_count() > 1:
    model_ft = nn.DataParallel(model_ft)

model_ft = model_ft.to(device)

In [99]:

sys.stdout.write("Params to learn:\n")
params_to_update = []
for name,param in model_ft.named_parameters():
    if param.requires_grad == True:
        params_to_update.append(param)
        sys.stdout.write("\t{}\n".format(name))

Params to learn:
	classifier.0.weight
	classifier.0.bias
	classifier.3.weight
	classifier.3.bias
	classifier.6.weight
	classifier.6.bias


In [68]:
def train_model(model, dataloaders, criterion, optimizer, num_epochs=25, is_inception=False):
    since = time.time()

    val_acc_history = []

    #best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode
            # 切换phase重置loss
            running_loss = 0.0
            running_corrects = 0

            # Iterate over data.
            for inputs, labels in dataloaders[phase]:
                inputs = inputs.to(device)
                labels = labels.to(device)

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train'):
                    # Get model outputs and calculate loss
                    # Special case for inception because in training it has an auxiliary output. In train
                    #   mode we calculate the loss by summing the final output and the auxiliary output
                    #   but in testing we only consider the final output.
                    if is_inception and phase == 'train':
                        # From https://discuss.pytorch.org/t/how-to-optimize-inception-model-with-auxiliary-classifiers/7958
                        outputs, aux_outputs = model(inputs)
                        loss1 = criterion(outputs, labels)
                        loss2 = criterion(aux_outputs, labels)
                        loss = loss1 + 0.4*loss2
                    else:
                        outputs = model(inputs)
                        loss = criterion(outputs, labels)

                    _, preds = torch.max(outputs, 1)

                    # backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                # statistics
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

            epoch_loss = running_loss / len(dataloaders[phase].dataset)
            epoch_acc = running_corrects.double() / len(dataloaders[phase].dataset)

            sys.stdout.write('{} Loss: {:.4f} Acc: {:.4f}\n'.format(phase, epoch_loss, epoch_acc))
            sys.stdout.write('training time: {:.0f}s\n'.format( time.time() - since ))

            # deep copy the model
            if phase == 'val':
                if epoch_acc > best_acc:
                    best_acc = epoch_acc
                    #best_model_wts = copy.deepcopy(model.state_dict())
                    with open(save_path, 'wb') as f:
                        torch.save(model, f)
                #else:
                    #lr/=4
                # 只有phase为val的acc loss才被加入 val_acc
                val_acc_history.append(epoch_acc)
                with open(save_path+'_val_acc', 'w') as f:
                    for item in val_acc_history:
                        f.write("%s\n" % item)

        print()

    time_elapsed = time.time() - since
    sys.stdout.write('Training complete in {:.0f}m {:.0f}s\n'.format(time_elapsed // 60, time_elapsed % 60))
    sys.stdout.write('Best val Acc: {:4f}\n'.format(best_acc))

    # load best model weights
    model.load_state_dict(best_model_wts)
    return model, val_acc_history

In [74]:
sys.stdout.write('Begin to load data...\n')

dataloaders={}

dataloaders['train'], dataloaders['val'], dataloaders['unlabeled'], class_to_idx_dict = image_loader(loader_image_path,loader_batch_size)

Begin to load data...
sup_train_data.class_to_idx==sup_val_data.class_to_idx:  True


In [75]:
##########################################  set training parameters ##########################################

# Observe that all parameters are being optimized
optimizer_ft = torch.optim.SGD(params_to_update, lr=0.001, momentum=0.9)

# Setup the loss fxn
criterion = nn.CrossEntropyLoss()
############################################ training ###########################
sys.stdout.write('Begin to train...\n')
# Train and evaluate
model_ft, hist = train_model(model_ft, dataloaders, criterion, optimizer_ft, num_epochs=num_epochs, is_inception=(model_name=="inception"))

sys.stdout.write('Finished')


Begin to train...
Epoch 0/49
----------
train Loss: 6.9103 Acc: 0.0010
training time: 547s


KeyboardInterrupt: 