In [1]:
!pip3 install torch torchvision



In [2]:
import torch
import math
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable
from torchvision import datasets, transforms
import torch.nn.functional as F
import time
import os
import torch.backends.cudnn as cudnn
import torchvision.models as models


os.environ["CUDA_VISIBLE_DEVICES"] = '0'                # GPU Number 
start_time = time.time()
batch_size = 40
learning_rate = 0.001
root_dir = 'drive/app/cifar10/'
default_directory = 'drive/app/torch/save_models'

# Data Augmentation
transform_train = transforms.Compose([
    transforms.Resize(size=(224, 224)),               # Random Position Crop
    transforms.RandomHorizontalFlip(),                  # right and left flip
    transforms.ToTensor(),                              # change [0,255] Int value to [0,1] Float value
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010) )  # RGB Normalize Standard Deviation
])

transform_test = transforms.Compose([
    transforms.Resize(size=(224, 224)),                               
    transforms.ToTensor(),                              # change [0,255] Int value to [0,1] Float value
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010) )  # RGB Normalize Standard Deviation
])

# automatically download
train_dataset = datasets.CIFAR10(root=root_dir,
                                 train=True,
                                 transform=transform_train,
                                 download=True)

test_dataset = datasets.CIFAR10(root=root_dir,
                                train=False,
                                transform=transform_test)

train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                           batch_size=batch_size,
                                           shuffle=True,            # at Training Procedure, Data Shuffle = True
                                           num_workers=4)           # CPU loader number

test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                          batch_size=batch_size,
                                          shuffle=False,            # at Test Procedure, Data Shuffle = False
                                          num_workers=4)            # CPU loader number


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


def initialize_model( num_classes, use_pretrained=True):

    model_ft = models.vgg19_bn(pretrained=use_pretrained)
    num_ftrs = model_ft.classifier[6].in_features
    model_ft.classifier[6] = nn.Linear(num_ftrs,num_classes)
    input_size = 224
    return model_ft, input_size

# Initialize the model for this run
model, input_size = initialize_model(10, use_pretrained=True)
'''
#loading Pretrainined VGG
model= models.vgg16(pretrained=True)
model.classifier[6] = nn.Linear(4096, 10)

'''
model = model.to(device)
#optim.Adam(model.parameters(), lr=0.01, betas=(0.9, 0.999), eps=1e-08, weight_decay=0, amsgrad=False)
optimizer = optim.Adagrad(model.parameters(), lr=0.01, lr_decay=0, weight_decay=0, initial_accumulator_value=0, eps=1e-10) #Changed the optimizer to Adagrad 


criterion = nn.CrossEntropyLoss()

if torch.cuda.device_count() > 0:
    print("USE", torch.cuda.device_count(), "GPUs!")
    model = nn.DataParallel(model).cuda()
    cudnn.benchmark = True
else:
    print("USE ONLY CPU!")


def train(epoch):
    model.train()
    train_loss = 0 
    total = 0
    correct = 0
    for batch_idx, (data, target) in enumerate(train_loader):
        if torch.cuda.is_available():
            data, target = Variable(data.cuda()), Variable(target.cuda())
        else:
            data, target = Variable(data), Variable(target)

        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()

        train_loss += loss.item()
        _, predicted = torch.max(output.data, 1)

        total += target.size(0)
        correct += predicted.eq(target.data).cpu().sum()
        if batch_idx % 10 == 0:
            print('Epoch: {} | Batch_idx: {} |  Loss: ({:.4f}) | Acc: ({:.2f}%) ({}/{})'
                  .format(epoch, batch_idx, train_loss / (batch_idx + 1), 100. * correct / total, correct, total))


def test():
    model.eval()
    test_loss = 0
    correct = 0
    total = 0
    for batch_idx, (data, target) in enumerate(test_loader):
        if torch.cuda.is_available():
            data, target = Variable(data.cuda()), Variable(target.cuda())
        else:
            data, target = Variable(data), Variable(target)

        outputs = model(data)
        loss = criterion(outputs, target)

        test_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        total += target.size(0)
        correct += predicted.eq(target.data).cpu().sum()
    print('# TEST : Loss: ({:.4f}) | Acc: ({:.2f}%) ({}/{})'
          .format(test_loss / (batch_idx + 1), 100. * correct / total, correct, total))


def save_checkpoint(directory, state, filename='latest.tar.gz'):

    if not os.path.exists(directory):
        os.makedirs(directory)

    model_filename = os.path.join(directory, filename)
    torch.save(state, model_filename)
    print("=> saving checkpoint")

def load_checkpoint(directory, filename='latest.tar.gz'):

    model_filename = os.path.join(directory, filename)
    if os.path.exists(model_filename):
        print("=> loading checkpoint")
        state = torch.load(model_filename)
        return state
    else:
        return None

start_epoch = 0

checkpoint = load_checkpoint(default_directory)
if not checkpoint:
    pass
else:
    start_epoch = checkpoint['epoch'] + 1
    model.load_state_dict(checkpoint['state_dict'])
    optimizer.load_state_dict(checkpoint['optimizer'])

for epoch in range(start_epoch, 10):

    if epoch < 20:
        lr = learning_rate
    elif epoch < 40:
        lr = learning_rate * 0.1
    else:
        lr = learning_rate * 0.01
    for param_group in optimizer.param_groups:
        param_group['lr'] = lr

    train(epoch)
    save_checkpoint(default_directory, {
        'epoch': epoch,
        'model': model,
        'state_dict': model.state_dict(),
        'optimizer': optimizer.state_dict(),
    })
    test()  

now = time.gmtime(time.time() - start_time)
print('{} hours {} mins {} secs for training'.format(now.tm_hour, now.tm_min, now.tm_sec))

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to drive/app/cifar10/cifar-10-python.tar.gz


  0%|          | 0/170498071 [00:00<?, ?it/s]

Extracting drive/app/cifar10/cifar-10-python.tar.gz to drive/app/cifar10/


  cpuset_checked))
Downloading: "https://download.pytorch.org/models/vgg19_bn-c79401a0.pth" to /root/.cache/torch/hub/checkpoints/vgg19_bn-c79401a0.pth


  0%|          | 0.00/548M [00:00<?, ?B/s]

USE 1 GPUs!
Epoch: 0 | Batch_idx: 0 |  Loss: (2.3590) | Acc: (15.00%) (6/40)
Epoch: 0 | Batch_idx: 10 |  Loss: (2.1180) | Acc: (25.23%) (111/440)
Epoch: 0 | Batch_idx: 20 |  Loss: (1.8540) | Acc: (35.00%) (294/840)
Epoch: 0 | Batch_idx: 30 |  Loss: (1.5927) | Acc: (43.47%) (539/1240)
Epoch: 0 | Batch_idx: 40 |  Loss: (1.4355) | Acc: (49.63%) (814/1640)
Epoch: 0 | Batch_idx: 50 |  Loss: (1.3253) | Acc: (53.92%) (1100/2040)
Epoch: 0 | Batch_idx: 60 |  Loss: (1.2317) | Acc: (57.01%) (1391/2440)
Epoch: 0 | Batch_idx: 70 |  Loss: (1.1598) | Acc: (59.65%) (1694/2840)
Epoch: 0 | Batch_idx: 80 |  Loss: (1.1015) | Acc: (61.94%) (2007/3240)
Epoch: 0 | Batch_idx: 90 |  Loss: (1.0449) | Acc: (63.96%) (2328/3640)
Epoch: 0 | Batch_idx: 100 |  Loss: (1.0028) | Acc: (65.50%) (2646/4040)
Epoch: 0 | Batch_idx: 110 |  Loss: (0.9684) | Acc: (66.64%) (2959/4440)
Epoch: 0 | Batch_idx: 120 |  Loss: (0.9420) | Acc: (67.77%) (3280/4840)
Epoch: 0 | Batch_idx: 130 |  Loss: (0.9085) | Acc: (68.95%) (3613/5240)
Ep