In [None]:
import os
import torch
import torch.nn as nn
import numpy as np
import argparse
import copy
# import data_loader
import pdb
import torch.nn.functional as F
import re, random, collections
import pickle

rd=3473 
torch.manual_seed(rd)
torch.cuda.manual_seed_all(rd)

import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"]='1'

In [None]:
class args():
    
    # download tinyimagenet dataset and set the path
    data_path = "/data/dataset/tiny_imagenet/tiny-imagenet-200/"
    num_class = 200
    class_per_task = 20
    n_classes=class_per_task
    num_task = 10
    dataset = "tinyimagenet"
    train_batch = 128
    test_batch = 256
    workers = 16
    gamma = 0.2
    random_classes = False
    validation = 0
    memory = 2000
    mu = 1
    beta = 1.0
    r = 2
    overflow=False
    print_freq=100
    
    lr=0.01
    resume=False
    total_epoch=1
    model_path='ckpt816/vgg16-3conv816'

args=args()

In [None]:
import torch.optim as optim
import torch.backends.cudnn as cudnn
from torch.utils.data import DataLoader
import torchvision
import torchvision.transforms as transforms
import argparse
# from utils import progress_bar
from torch.optim.lr_scheduler import MultiStepLR
torch.set_printoptions(precision=5,sci_mode=False)

In [None]:
class ConvAdapt(nn.Module):
    def __init__(self, in_channels, out_channels, p,kernel,stride,padding):
        super(ConvAdapt, self).__init__()
        # Groupwise Convolution
        self.gwc = nn.Conv2d(in_channels, out_channels, kernel_size=kernel, 
                             stride=stride, padding=padding, groups=int(p/gp), bias=True)
        # Pointwise Convolution
        self.pwc = nn.Conv2d(in_channels, out_channels, kernel_size=1, groups=int(p/pt),bias=True)

    def forward(self, x):
        return self.gwc(x) + self.pwc(x)
    
'''VGG11/13/16/19 in Pytorch.'''
import torch
import torch.nn as nn


cfg = {
    'VGG11': [64,'G', 'M', 128,'G', 'M', 256,'G', 256,'G', 'M', 512,'G', 512,'G', 'M', 512, 'G',512,'G', 'M'],
    'VGG13': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
    'VGG16': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M'],
    'VGG19': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'],
}

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.features = self._make_layers(cfg['VGG16'])
        self.classifier = nn.Linear(2048, args.class_per_task)

    def forward(self, x):
        out = self.features(x)
        out = out.view(out.size(0), -1)
        out = self.classifier(out)
        return out

    def _make_layers(self, cfg):
        layers = []
        in_channels = 3
        for x in cfg:
            if x == 'M':
                layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
            else:
                layers += [nn.Conv2d(in_channels, x, kernel_size=3, padding=1)]
                in_channels = x
                layers += [ConvAdapt(in_channels,in_channels,in_channels,3,1,1),
                           nn.BatchNorm2d(x),
                           nn.ReLU(inplace=True)]
        layers += [nn.AvgPool2d(kernel_size=2, stride=2)]
        return nn.Sequential(*layers)

In [None]:
def save_model(task,acc,model):
    print('Saving..')
    statem = {
        'net': model.state_dict(),
        'acc': acc,
    }
    fname=args.model_path
    if not os.path.isdir(fname):
        os.makedirs(fname)   
    torch.save(statem, fname+'/ckpt_task'+str(task)+'.pth')

        
def load_model(task,model):
    fname=args.model_path
    # Load checkpoint.
    print('==> Resuming from checkpoint..')
    print(fname+'/ckpt_task'+str(task)+'.pth')
    checkpoint = torch.load(fname+'/ckpt_task'+str(task)+'.pth')
    model.load_state_dict(checkpoint['net'])
    best_acc = checkpoint['acc']
    return best_acc

In [None]:
def train(train_loader,epoch,task,model):
    print('\nEpoch: %d' % epoch)
    model.train()
    global best_acc
    train_loss = 0
    correct = 0
    total = 0
    for batch_idx, (inputs, targets) in enumerate(train_loader):
        inputs, targets = inputs.cuda(), targets.cuda()
        targets=targets-task*args.n_classes
        optimizer.zero_grad()
        outputs = model(inputs)
        
        loss = criterion(outputs, targets)#+ 1*regularizer_loss(model)
        loss.backward()
        optimizer.step()
        
        train_loss += loss.item()
        _, predicted = outputs.max(1)
        total += targets.size(0)
        correct += predicted.eq(targets).sum().item()
        acc = 100.*correct/total
    
    print("[Train: ], [%d/%d: ], [Accuracy: %f], [Loss: %f] [Lr: %f]" 
          %(epoch,args.total_epoch,acc, train_loss/batch_idx,optimizer.param_groups[0]['lr']))

In [None]:
def test(test_loader,task,model):
    global best_acc
    model.eval()
    test_loss = 0
    correct = 0
    total = 0
    with torch.no_grad():
        for batch_idx, (inputs, targets) in enumerate(test_loader):
            inputs, targets = inputs.cuda(), targets.cuda()
            targets=targets-task*args.n_classes
            outputs = model(inputs)
            loss = criterion(outputs, targets)

            test_loss += loss.item()
            _, predicted = outputs.max(1)
            total += targets.size(0)
            correct += predicted.eq(targets).sum().item()
    acc = 100.*correct/total
    print("[Test Accuracy: %f], [Loss: %f]" %(acc,test_loss/batch_idx))
    
    if acc>best_acc:
        save_model(task,acc,model)
        best_acc=acc
    return acc

In [None]:
def grad_false(modelm):
    gradf=[0,1,8,9,16,17,24,25,32,33,40,41,48,49,56,57,64,65,72,73]

    i=0
    for p in modelm.parameters():
        if i in gradf:
            p.requires_grad=False
        i=i+1


In [None]:
import incremental_dataloader as data
inc_dataset = data.IncrementalDataset(
                                dataset_name=args.dataset,
                                args = args,
                                random_order=args.random_classes,
                                shuffle=True,
                                seed=1,
                                batch_size=args.train_batch,
                                workers=args.workers,
                                validation_split=args.validation,
                                increment=args.class_per_task,
                            )
task_data=[]
for i in range(args.num_task):
    task_info, train_loader, val_loader, test_loader = inc_dataset.new_task()
    task_data.append([train_loader,test_loader])

In [None]:
gp=8
pt=16


import incremental_dataloader as data
task_acc=[]
for task in range(0,args.num_task):
    print('Training Task :---'+str(task)+'\n')
    best_acc=0
    
    if task==0:
        
        train_loader= task_data[task][0]
        test_loader = task_data[task][1]
        modelm = Net().cuda()
        
        criterion = nn.CrossEntropyLoss()
        optimizer = optim.SGD(modelm.parameters(), lr=args.lr, momentum=0.9, weight_decay=5e-4)
        schedulerG = MultiStepLR(optimizer, milestones=[70, 100,120], gamma=0.1)

        for epoch in range(args.total_epoch):
            train(train_loader,epoch,task,modelm)
            test(test_loader,task,modelm)
            schedulerG.step()
            
        acc1=load_model(task,modelm)
    
    
    
    if task>=1:
        train_loader= task_data[task][0]
        test_loader = task_data[task][1]
        
        modelm = Net().cuda()
        
        acc1=load_model(task-1,modelm)  
        grad_false(modelm)
        
        criterion = nn.CrossEntropyLoss()
        optimizer = optim.SGD(modelm.parameters(), lr=args.lr, momentum=0.9, weight_decay=5e-4)
        schedulerG = MultiStepLR(optimizer, milestones=[70, 100,120], gamma=0.1)

        for epoch in range(args.total_epoch):
            train(train_loader,epoch,task,modelm)
            test(test_loader,task,modelm)
            schedulerG.step()
            
        acc1=load_model(task,modelm)
        
    task_acc.append(acc1)
    np.save(args.model_path+'.npy',task_acc)
    print('Task: '+str(task)+'  Test_accuracy1: '+ str(acc1))

print(task_acc)

In [None]:
np.mean(task_acc)