model.cifar.vgg-cfiar.py



In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import torch
import torch.nn as nn

class SeparableConv2d(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size):
        super(SeparableConv2d, self).__init__()

        self.depthwise = nn.Conv2d(in_channels, in_channels, kernel_size=kernel_size, padding=1)
        self.pointwise = nn.Conv2d(in_channels, out_channels, kernel_size=1)

    def forward(self, x):
        x = self.depthwise(x)
        x = self.pointwise(x)
        return x

class Xception(nn.Module):
    def __init__(self):
        super(Xception, self).__init__()
        self.feature = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, stride=2, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(inplace=True),
            nn.Conv2d(32, 64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True)
        )

        self.conv3 = nn.Conv2d(64, 128, kernel_size=1, stride=2)

        self.block1 = nn.Sequential(
            SeparableConv2d(64, 128, kernel_size=3),
            nn.ReLU(),
            SeparableConv2d(128, 128, 3),
            nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        )

        self.conv4 = nn.Conv2d(128, 256, kernel_size=1, stride=2)

        self.block2 = nn.Sequential(
            nn.ReLU(),
            SeparableConv2d(128, 256, kernel_size=3),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            SeparableConv2d(256, 256, 3),
            nn.BatchNorm2d(256),
            nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        )

        self.conv5 = nn.Conv2d(256, 728, kernel_size=1, stride=2)

        self.block3 = nn.Sequential(
            nn.ReLU(),
            SeparableConv2d(256, 728, kernel_size=3),
            nn.BatchNorm2d(728),
            nn.ReLU(),
            SeparableConv2d(728, 728, 3),
            nn.BatchNorm2d(728),
            nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        )

        self.conv6 = nn.Conv2d(728, 728, kernel_size=1, stride=2)

        self.block4 = nn.Sequential(
            nn.ReLU(),
            SeparableConv2d(728, 728, kernel_size=3),
            nn.BatchNorm2d(728),
            nn.ReLU(),
            SeparableConv2d(728, 728, 3),
            nn.BatchNorm2d(728),
            nn.ReLU(),
            SeparableConv2d(728, 728, 3),
            nn.BatchNorm2d(728),  
        )

        self.conv7 = nn.Conv2d(728, 1024, kernel_size=1, stride=2)

        self.block5 = nn.Sequential(
            nn.ReLU(),
            SeparableConv2d(728, 728, kernel_size=3),
            nn.BatchNorm2d(728),
            nn.ReLU(),
            SeparableConv2d(728, 1024, 3),
            nn.BatchNorm2d(1024),
            nn.MaxPool2d(kernel_size=3, stride=2, padding=1)   
        )

        self.block6 = nn.Sequential(
            nn.ReLU(),
            SeparableConv2d(1024, 1536, kernel_size=3),
            nn.BatchNorm2d(1536),
            nn.ReLU(),
            SeparableConv2d(1536, 2048, 3),
            nn.BatchNorm2d(2048),
            nn.AvgPool2d(kernel_size=1, stride=1)
        )

        self.fc = nn.Linear(2048, 100)

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.Linear):
                nn.init.normal_(m.weight, 0, 0.01)
                nn.init.constant_(m.bias, 0)


    def forward(self, x):
        x = self.feature(x)

        x = self.block1(x) + self.conv3(x)
        x = self.block2(x) + self.conv4(x)
        x = self.block3(x) + self.conv5(x)
        x = self.block4(x) + self.conv6(x)
        x = self.block5(x) + self.conv7(x)
        x = self.block6(x)

        x = x.view(x.size(0), -1)
        x = self.fc(x)

        return x

def xception():
    return Xception()


config.py


In [None]:
import easydict


def config():
  
    cfg = easydict.EasyDict({
            "arch": "xception",
            "dataset": "cifar100",
            "batch_size": 128,
            "epochs": 200,
            "learning_rate": 0.1,
            "weight_decay": 0.00001,
            "momentum": 0.9,
            "nesterov": True,
            "print_freq": 50,
            "ckpt": "/content/drive/My Drive/MLVC/Baseline/checkpoint/",
            "results_dir": "./results/",
            "resume": False,
            "evaluate": False,
            "cuda": True,
            "gpuids": [0],
            "colab": True,    
    })


    cfg.gpuids = list(map(int, cfg.gpuids))

    model = xception()
    if cfg.arch == "xception":
        model = xception()
    #elif cfg.arch == "resnet-cifar":
    #    model = resnet.resnet20()
    #elif cfg.arch == "vgg-cifar-binary":
    #    model = vgg_bnn.vgg11()
    #elif cfg.arch == "resnet-cifar-dorefa":
    #    model = resnet_dorefanet.resnet20()

    return cfg, model


utility.py


In [None]:
import torch
import time
import shutil
import pathlib
from collections import OrderedDict


def load_model(model, ckpt_file, args):
    if args.cuda:
        checkpoint = torch.load(
            ckpt_file, map_location=lambda storage, loc: storage.cuda(args.gpuids[0])
        )
        try:
            model.load_state_dict(checkpoint["model"])
        except:  # noqa
            model.module.load_state_dict(checkpoint["model"])
    else:
        checkpoint = torch.load(ckpt_file, map_location=lambda storage, loc: storage)
        try:
            model.load_state_dict(checkpoint["model"])
        except:  # noqa
            # create new OrderedDict that does not contain `module.`
            new_state_dict = OrderedDict()
            for k, v in checkpoint["model"].items():
                if k[:7] == "module.":
                    name = k[7:]  # remove `module.`
                else:
                    name = k[:]
                new_state_dict[name] = v

            model.load_state_dict(new_state_dict)

    return checkpoint


def save_model(state, epoch, is_best, args):
    dir_ckpt = pathlib.Path("checkpoint")
    dir_path = dir_ckpt / args.dataset
    dir_path.mkdir(parents=True, exist_ok=True)

    model_file = dir_path / "ckpt_epoch_{}.pth".format(epoch)
    torch.save(state, model_file)

    if is_best:
        shutil.copyfile(model_file, dir_path / "ckpt_best.pth")


class AverageMeter(object):
    """Computes and stores the average and current value"""

    def __init__(self, name, fmt=":f"):
        self.name = name
        self.fmt = fmt
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count

    def __str__(self):
        fmtstr = "{name} {val" + self.fmt + "} ({avg" + self.fmt + "})"
        return fmtstr.format(**self.__dict__)


class ProgressMeter(object):
    def __init__(self, num_batches, *meters, prefix=""):
        self.batch_fmtstr = self._get_batch_fmtstr(num_batches)
        self.meters = meters
        self.prefix = prefix

    def print(self, batch):
        entries = [self.prefix + self.batch_fmtstr.format(batch)]
        entries += [str(meter) for meter in self.meters]
        print("\t".join(entries))

    def _get_batch_fmtstr(self, num_batches):
        num_digits = len(str(num_batches // 1))
        fmt = "{:" + str(num_digits) + "d}"
        return "[" + fmt + "/" + fmt.format(num_batches) + "]"


def adjust_learning_rate(optimizer, epoch, lr):
    """Sets the learning rate, decayed rate of 0.1 every epoch"""
    #if epoch >= 60:
    #    lr = 0.01
    #if epoch >= 120:
    #    lr = 0.001
    #if epoch >= 160:
    #    lr = 0.0001

    for param_group in optimizer.param_groups:
        param_group["lr"] = lr

    return lr


def accuracy(output, target, topk=(1,)):
    """Computes the accuracy over the k top predictions for the specified values of k"""
    with torch.no_grad():
        maxk = max(topk)
        batch_size = target.size(0)

        _, pred = output.topk(maxk, 1, True, True)
        pred = pred.t()
        correct = pred.eq(target.view(1, -1).expand_as(pred))

        res = []
        for k in topk:
            correct_k = correct[:k].view(-1).float().sum(0, keepdim=True)
            res.append(correct_k.mul_(100.0 / batch_size))
        return res

def print_reults(start_time, train_time, validate_time, start_epoch, epochs):
    avg_train_time = train_time / (epochs - start_epoch)
    avg_valid_time = validate_time / (epochs - start_epoch)
    total_train_time = train_time + validate_time
    print(
        "====> average training time per epoch: {:,}m {:.2f}s".format(
            int(avg_train_time // 60), avg_train_time % 60
        )
    )
    print(
        "====> average validation time per epoch: {:,}m {:.2f}s".format(
            int(avg_valid_time // 60), avg_valid_time % 60
        )
    )
    print(
        "====> training time: {}h {}m {:.2f}s".format(
            int(train_time // 3600), int((train_time % 3600) // 60), train_time % 60
        )
    )
    print(
        "====> validation time: {}h {}m {:.2f}s".format(
            int(validate_time // 3600),
            int((validate_time % 3600) // 60),
            validate_time % 60,
        )
    )
    print(
        "====> total training time: {}h {}m {:.2f}s".format(
            int(total_train_time // 3600),
            int((total_train_time % 3600) // 60),
            total_train_time % 60,
        )
    )

    elapsed_time = time.time() - start_time
    print(
        "====> total time: {}h {}m {:.2f}s".format(
            int(elapsed_time // 3600), int((elapsed_time % 3600) // 60), elapsed_time % 60
        )
    )

data_loader.py


In [None]:
import torch
import torchvision.transforms as transforms
from torchvision import datasets


def dataloader(dataset, batch_size):
    train_dataset, val_dataset = load_cifar10()

    if dataset == "cifar100":
        train_dataset, val_dataset = load_cifar100()

    # Data loader
    train_loader = torch.utils.data.DataLoader(
        dataset=train_dataset, batch_size=batch_size, shuffle=True
    )

    val_loader = torch.utils.data.DataLoader(
        dataset=val_dataset, batch_size=batch_size, shuffle=False
    )

    return train_loader, val_loader

def load_cifar10():
    # CIFAR-10 dataset
    normalize = transforms.Normalize(
        mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]
    )

    train_dataset = datasets.CIFAR10(
        root="../../data/",
        train=True,
        transform=transforms.Compose(
            [
                transforms.Pad(4),
                transforms.RandomHorizontalFlip(),
                transforms.RandomCrop(32),
                transforms.ToTensor(),
                normalize,
            ]
        ),
        download=True,
    )

    val_dataset = datasets.CIFAR10(
        root="../../data/",
        train=False,
        transform=transforms.Compose([transforms.ToTensor(), normalize]),
    )
    return train_dataset, val_dataset

def load_cifar100():
    # CIFAR-100 dataset
    normalize = transforms.Normalize(
        mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]
    )
    train_dataset = datasets.CIFAR100(
        root="../../data/",
        train=True,
        transform=transforms.Compose(
            [
                transforms.Pad(4),
                transforms.RandomHorizontalFlip(),
                transforms.RandomCrop(32),
                transforms.ToTensor(),
                normalize,
            ]
        ),
        download=True,
    )

    val_dataset = datasets.CIFAR100(
        root="../../data/",
        train=False,
        transform=transforms.Compose([transforms.ToTensor(), normalize]),
    )
    return train_dataset, val_dataset

main.py


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.backends.cudnn as cudnn

import time
import pathlib
from os.path import isfile
import pandas as pd


def main():
    global args, start_epoch, best_acc1
    args, model = config()

    print("Model: {}".format(args.arch))

    if args.cuda and not torch.cuda.is_available():
        raise Exception("No GPU found, please run without --cuda")

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(
        model.parameters(),
        lr=args.learning_rate,
        weight_decay=args.weight_decay,
        momentum=args.momentum,
        nesterov=args.nesterov,
    )

    best_acc1 = 0
    start_epoch = 0

    if args.cuda:
        torch.cuda.set_device(args.gpuids[0])
        with torch.cuda.device(args.gpuids[0]):
            model = model.cuda()
            criterion = criterion.cuda()
        model = nn.DataParallel(
            model, device_ids=args.gpuids, output_device=args.gpuids[0]
        )
        cudnn.benchmark = True

    # checkpoint file
    ckpt_dir = pathlib.Path(args.ckpt)
    ckpt_file = ckpt_dir / args.dataset / args.ckpt

    # for resuming training
    if args.resume:
        retrain(ckpt_file, model, optimizer)

    # Data loading
    print("\n==> Load data..")
    train_loader, val_loader = dataloader(args.dataset, args.batch_size)

    # initiailizae
    train_time, validate_time = 0.0, 0.0
    avgloss_train = 0.0
    acc1_train, acc5_train, acc1_valid, acc5_valid = 0.0, 0.0, 0.0, 0.0
    is_best = False

    # result lists
    result_epoch, result_lr, result_train_avgtime, result_train_avgloss = [], [], [], []
    result_train_avgtop1acc, result_train_avgtop5acc = [], []
    result_val_avgtime, result_val_avgtop1acc, result_val_avgtop5acc = [], [], []

    # train...
    lr = args.learning_rate
    curr_lr = lr
    for epoch in range(start_epoch, args.epochs):
        curr_lr = adjust_learning_rate(optimizer, epoch, lr)
        print("\n==> Epoch: {}, lr = {}".format(epoch, optimizer.param_groups[0]["lr"]))

        # train for one epoch
        train_time, acc1_train, acc5_train, avgloss_train = train_epoch(
            train_time,
            acc1_train,
            acc5_train,
            avgloss_train,
            train_loader,
            epoch,
            model,
            criterion,
            optimizer,
        )

        # evaluate on validation set
        validate_time, acc1_valid, acc5_valid = validation_epoch(
            validate_time, acc1_valid, acc5_valid, val_loader, model, criterion
        )

        # remember best Acc@1 and save checkpoint
        is_best = save_model_data(
            is_best, best_acc1, acc1_valid, epoch, model, optimizer, args
        )


        result_epoch.append(epoch)
        result_lr.append(curr_lr)
        result_train_avgtime.append(train_time)
        result_train_avgloss.append(avgloss_train)
        result_train_avgtop1acc.append(acc1_train.item())
        result_train_avgtop5acc.append(acc5_train.item())
        result_val_avgtop1acc.append(acc1_valid.item())
        result_val_avgtop5acc.append(acc5_valid.item())

        df = pd.DataFrame({
            'Epoch': result_epoch,
            'Learning rate': result_lr,
            'Training avg loss': result_train_avgloss,
            'Training avg top1 acc': result_train_avgtop1acc,
            'Training avg top5 acc': result_train_avgtop5acc,
            'Test avg top1 acc': result_val_avgtop1acc,
            'Test avg top5 acc': result_val_avgtop5acc,
        })

        if args.colab:
            df.to_csv('/content/drive/My Drive/MLVC/Baseline/results/{}_result.csv'.format(args.arch))
        else:
            df.to_csv('./results/{}_result.csv'.format(args.arch))


    print_results(train_time, validate_time)


def retrain(ckpt_file, model, optimizer):
    if isfile(ckpt_file):
        print("\n==> Loading Checkpoint '{}'".format(args.ckpt))
        checkpoint = load_model(model, ckpt_file, args)

        start_epoch = checkpoint["epoch"]
        optimizer.load_state_dict(checkpoint["optimizer"])

        print("==> Loaded Checkpoint '{}' (epoch {})".format(args.ckpt, start_epoch))
    else:
        print("==> no checkpoint found '{}'".format(args.ckpt))
        return


def train_epoch(
    train_time, acc1_train, acc5_train, avgloss_train, train_loader, epoch, model, criterion, optimizer
):
    print("===> [ Training ]")
    start_time = time.time()
    acc1_train, acc5_train, avgloss_train = train(
        train_loader, epoch=epoch, model=model, criterion=criterion, optimizer=optimizer
    )
    elapsed_time = time.time() - start_time
    train_time += elapsed_time
    print("====> {:.2f} seconds to train this epoch\n".format(elapsed_time))

    return train_time, acc1_train, acc5_train, avgloss_train


def validation_epoch(
    validate_time, acc1_valid, acc5_valid, val_loader, model, criterion
):
    print("===> [ Validation ]")
    start_time = time.time()
    acc1_valid, acc5_valid, avgloss_valid = validate(val_loader, model, criterion)
    elapsed_time = time.time() - start_time
    validate_time += elapsed_time
    print("====> {:.2f} seconds to validate this epoch\n".format(elapsed_time))

    return validate_time, acc1_valid, acc5_valid


def save_model_data(is_best, best_acc1, acc1_valid, epoch, model, optimizer, args):
    is_best = acc1_valid > best_acc1
    best_acc1 = max(acc1_valid, best_acc1)
    state = {
        "epoch": epoch + 1,
        "model": model.state_dict(),
        "optimizer": optimizer.state_dict(),
    }
    if (epoch + 1) % 20 == 0:
        save_model(state, epoch, is_best, args)
    return is_best


def train(train_loader, **kwargs):
    epoch = kwargs.get("epoch")
    model = kwargs.get("model")
    criterion = kwargs.get("criterion")
    optimizer = kwargs.get("optimizer")

    batch_time = AverageMeter("Time", ":6.3f")
    data_time = AverageMeter("Data", ":6.3f")
    losses = AverageMeter("Loss", ":.4e")
    top1 = AverageMeter("Acc@1", ":6.2f")
    top5 = AverageMeter("Acc@5", ":6.2f")
    progress = ProgressMeter(
        len(train_loader),
        batch_time,
        data_time,
        losses,
        top1,
        top5,
        prefix="Epoch: [{}]".format(epoch),
    )

    # switch to train mode
    model.train()

    end = time.time()
    running_loss = 0.0
    for i, (input, target) in enumerate(train_loader):
        # measure data loading time
        data_time.update(time.time() - end)

        if args.cuda:
            input = input.cuda(non_blocking=True)
            target = target.cuda(non_blocking=True)

        # compute output
        output = model(input)
        loss = criterion(output, target)

        # measure accuracy and record loss
        acc1, acc5 = accuracy(output, target, topk=(1, 5))
        losses.update(loss.item(), input.size(0))
        top1.update(acc1[0], input.size(0))
        top5.update(acc5[0], input.size(0))

        # compute gradient and do SGD step.
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # measure elapsed time
        batch_time.update(time.time() - end)

        running_loss += loss.item()

        if i % args.print_freq == 0:
            progress.print(i)

        end = time.time()

    print(
        "====> Acc@1 {top1.avg:.3f} Acc@5 {top5.avg:.3f}".format(top1=top1, top5=top5)
    )
    epoch_loss = running_loss / len(train_loader)
    print("====> Epoch loss {:.3f}".format(epoch_loss))

    return top1.avg, top5.avg, epoch_loss


def validate(val_loader, model, criterion):
    batch_time = AverageMeter("Time", ":6.3f")
    losses = AverageMeter("Loss", ":.4e")
    top1 = AverageMeter("Acc@1", ":6.2f")
    top5 = AverageMeter("Acc@5", ":6.2f")
    progress = ProgressMeter(
        len(val_loader), batch_time, losses, top1, top5, prefix="Test: "
    )

    # switch to evaluate mode
    model.eval()
    total_loss = 0.0

    with torch.no_grad():
        end = time.time()
        for i, (input, target) in enumerate(val_loader):

            if args.cuda:
                input = input.cuda(non_blocking=True)
                target = target.cuda(non_blocking=True)

            # compute output
            output = model(input)
            loss = criterion(output, target)

            # measure accuracy and record loss
            acc1, acc5 = accuracy(output, target, topk=(1, 5))
            losses.update(loss.item(), input.size(0))
            top1.update(acc1[0], input.size(0))
            top5.update(acc5[0], input.size(0))

            # measure elapsed time
            batch_time.update(time.time() - end)
            total_loss += loss.item()

            if i % args.print_freq == 0:
                progress.print(i)

            end = time.time()

        print(
            "====> Acc@1 {top1.avg:.3f} Acc@5 {top5.avg:.3f}".format(
                top1=top1, top5=top5
            )
        )
        total_loss = total_loss / len(val_loader)

    return top1.avg, top5.avg, loss.item()


def print_results(train_time, validate_time):

    avg_train_time = train_time / (args.epochs - start_epoch)
    avg_valid_time = validate_time / (args.epochs - start_epoch)
    total_train_time = train_time + validate_time
    print(
        "====> average training time per epoch: {:,}m {:.2f}s".format(
            int(avg_train_time // 60), avg_train_time % 60
        )
    )
    print(
        "====> average validation time per epoch: {:,}m {:.2f}s".format(
            int(avg_valid_time // 60), avg_valid_time % 60
        )
    )
    print(
        "====> training time: {}h {}m {:.2f}s".format(
            int(train_time // 3600), int((train_time % 3600) // 60), train_time % 60
        )
    )
    print(
        "====> validation time: {}h {}m {:.2f}s".format(
            int(validate_time // 3600),
            int((validate_time % 3600) // 60),
            validate_time % 60,
        )
    )
    print(
        "====> total training time: {}h {}m {:.2f}s".format(
            int(total_train_time // 3600),
            int((total_train_time % 3600) // 60),
            total_train_time % 60,
        )
    )


if __name__ == "__main__":
    start_time = time.time()
    main()
    elapsed_time = time.time() - start_time
    print(
        "====> total time: {}h {}m {:.2f}s".format(
            int(elapsed_time // 3600),
            int((elapsed_time % 3600) // 60),
            elapsed_time % 60,
        )
    )


Model: xception

==> Load data..
Files already downloaded and verified
Files already downloaded and verified

==> Epoch: 0, lr = 0.1
===> [ Training ]
Epoch: [0][  0/391]	Time  0.085 ( 0.085)	Data  0.030 ( 0.030)	Loss 4.6868e+00 (4.6868e+00)	Acc@1   0.78 (  0.78)	Acc@5   5.47 (  5.47)
Epoch: [0][ 50/391]	Time  0.087 ( 0.074)	Data  0.038 ( 0.028)	Loss 1.4095e+01 (2.2584e+01)	Acc@1   0.78 (  1.15)	Acc@5   2.34 (  5.30)
Epoch: [0][100/391]	Time  0.090 ( 0.073)	Data  0.040 ( 0.028)	Loss 5.1486e+00 (1.5213e+01)	Acc@1   0.00 (  1.11)	Acc@5   6.25 (  5.10)
Epoch: [0][150/391]	Time  0.071 ( 0.073)	Data  0.023 ( 0.027)	Loss 4.6776e+00 (1.1855e+01)	Acc@1   0.00 (  1.12)	Acc@5   2.34 (  5.27)
Epoch: [0][200/391]	Time  0.066 ( 0.073)	Data  0.023 ( 0.028)	Loss 4.8289e+00 (1.0085e+01)	Acc@1   0.78 (  1.04)	Acc@5   4.69 (  5.27)
Epoch: [0][250/391]	Time  0.074 ( 0.073)	Data  0.027 ( 0.028)	Loss 4.6382e+00 (9.0025e+00)	Acc@1   3.91 (  1.03)	Acc@5   4.69 (  5.33)
Epoch: [0][300/391]	Time  0.065 ( 0.073