In [1]:
from random import randint, random

import numpy as np
import math
import time
from collections import deque
import torch as T
import torch.nn.functional as F
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
from tqdm import tqdm
import sklearn.neighbors
import sklearn.metrics
import matplotlib.pyplot as plt
import PIL
from nvidia.dali.plugin.pytorch import DALIGenericIterator

from imageComponent import *
from remote_read_write import scp_read_wrapper, scp_write_wrapper

dali_device = 3
device = 'cuda:3'
T.backends.cudnn.benchmark = True
use_amp = True

scp_Tload = scp_read_wrapper(T.load, 'ly_1112103017@172.16.35.121', 30907, '/public/ly/zyz/cluster_id_rsa', '/public/ly/zyz/imageCL/tmp')
scp_Tsave = scp_write_wrapper(T.save, 'ly_1112103017@172.16.35.121', 30907, '/public/ly/zyz/cluster_id_rsa', '/public/ly/zyz/imageCL/tmp')



In [2]:
model = ViTCifar()
root = '/public/home/ly_1112103017/zyz/imageCL/ckpt/SCifarViT'

In [3]:
class SupervisedModel(nn.Module):
    def __init__(self, encoder, num_class, label_smoothing):
        super(SupervisedModel, self).__init__()
        self.encoder = encoder
        self.num_class = num_class
        self.label_smoothing = label_smoothing
        if isinstance(encoder, ResNet):
            target_dim = encoder.fc.out_features
        if isinstance(encoder, VisionTransformer):
            target_dim = encoder.heads.out_features
        self.predictor = nn.Linear(target_dim, num_class)
        self.target_dim = target_dim
    def forward(self, X, y, need_label=False):
        feat = self.encoder(X)
        pred = self.predictor(feat)
        loss = F.cross_entropy(pred, y, label_smoothing=self.label_smoothing)
        if need_label:
            return loss, T.argmax(pred, dim=1).detach().cpu().numpy()
        return loss

In [4]:
dataset = '/public/ly/zyz/imageCL/dataset/cifar10-image/train'
valset = '/public/ly/zyz/imageCL/dataset/cifar10-image/test'

In [5]:
size = (32, 32)

batch_size = 1000
lr = 5e-6
epoch = 600
random_crop_area = [0.2, 1.0]

warmlr = 1e-3
warmep = 100
eta_min = 1e-4

In [6]:
pipe = DALISupervisedImageFolders(dataset, size, random_crop_area, batch_size=batch_size, num_threads=8, device_id=dali_device)
loader = dali.plugin.pytorch.DALIGenericIterator([pipe], reader_name = 'reader', output_map = ['image', 'label'],
                                                 last_batch_policy = dali.plugin.base_iterator.LastBatchPolicy.DROP)
valpipe = DALIValdImageFolders(valset, size, batch_size=batch_size, num_threads=4, device_id=dali_device)
valloader = dali.plugin.pytorch.DALIGenericIterator([valpipe], reader_name ='reader', output_map=['image', 'label'],
                                                    last_batch_policy=dali.plugin.base_iterator.LastBatchPolicy.PARTIAL)

In [7]:
Smodel = SupervisedModel(model, 10, label_smoothing=0.1).to(device, memory_format = T.channels_last)
optim = T.optim.AdamW(Smodel.parameters(), lr = lr * batch_size / 256)
scheduler1 = T.optim.lr_scheduler.LambdaLR(optim, lambda x:min(1, warmlr + x * (1 - warmlr) / warmep))
scheduler2 = T.optim.lr_scheduler.CosineAnnealingWarmRestarts(optim, epoch, T_mult = 2, eta_min = eta_min)
scheduler = T.optim.lr_scheduler.SequentialLR(optim, schedulers = [scheduler1, scheduler2], milestones = [warmep])

In [None]:
scaler = T.cuda.amp.GradScaler(enabled = use_amp)
scp_Tsave(Smodel.encoder.state_dict(), '%s/%04d.pth' %(root, 0))
loss_r = []
for e in range(epoch):
    t = time.time()
    label_record = {'gt':[], 'pred':[]}
    l = 0
    for data in loader:
        with T.no_grad():
            x = data[0]['image']
            y = data[0]['label'].long()
            label_record['gt'].append(y.detach().cpu().numpy())
            x, y = x.to(device, memory_format = T.channels_last), y.to(device)
        with T.autocast(device_type = 'cuda', dtype = T.float16, enabled = use_amp):
            loss, pred = Smodel(x, y, need_label=True)
            label_record['pred'].append(pred)
        scaler.scale(loss).backward()
        scaler.step(optim)
        scaler.update()
        optim.zero_grad(set_to_none = True)
        l += loss.item()
    label_record['gt'] = np.concatenate(label_record['gt'], axis=0)
    label_record['pred'] = np.concatenate(label_record['pred'], axis=0)
    loss_r.append(l)
    train_acc = sklearn.metrics.accuracy_score(label_record['gt'], label_record['pred'])
    val_acc = 0#val_eval(Smodel, valloader, device, use_amp=use_amp)
    scp_Tsave(Smodel.encoder.state_dict(), '%s/%04d.pth' %(root, e + 1))
    print('epoch:%d\tloss:%f\ttrain acc:%f\tval acc:%f' %(e + 1, l, train_acc, val_acc))
    print(time.time() - t, end = '\n\n')
    scp_Tsave(loss_r, '%s/loss.pth' %root)
    scheduler.step()

epoch:1	loss:117.872937	train acc:0.095460	val acc:0.000000
43.274288177490234

epoch:2	loss:116.930980	train acc:0.103500	val acc:0.000000
21.149876594543457

epoch:3	loss:115.045252	train acc:0.126240	val acc:0.000000
23.408918619155884

epoch:4	loss:112.688933	train acc:0.161660	val acc:0.000000
23.55535316467285

epoch:5	loss:110.602211	train acc:0.184980	val acc:0.000000
21.569566011428833

epoch:6	loss:108.894205	train acc:0.197320	val acc:0.000000
21.384183645248413

epoch:7	loss:107.486531	train acc:0.206880	val acc:0.000000
23.84987783432007

epoch:8	loss:106.488157	train acc:0.215680	val acc:0.000000
23.633785724639893

epoch:9	loss:105.707887	train acc:0.220860	val acc:0.000000
21.364647388458252

epoch:10	loss:105.059051	train acc:0.229880	val acc:0.000000
21.486798763275146

epoch:11	loss:104.451369	train acc:0.233880	val acc:0.000000
21.514723777770996

epoch:12	loss:103.830260	train acc:0.240000	val acc:0.000000
23.601081371307373

epoch:13	loss:103.217319	train acc:0.24