In [1]:
import sys 
sys.path.append('/home/aistudio/external-libraries')

import numpy as np
import scipy as sp

import warnings
warnings.filterwarnings("ignore", category=Warning)

import paddle
import paddle.fluid as fluid
from paddle.static import InputSpec
from paddle.fluid.framework import core

import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd


import functools

import gc
from itertools import repeat
from collections import Counter
import tqdm
import json
import time

  from collections import MutableMapping
  from collections import Iterable, Mapping
  from collections import Sized


In [2]:
learning_rate = 5e-5
batch_size = 64
samples = 50000
test_samples = 10000
examples = samples
n_class = 100

training_epochs = 100

noise_type = 'uniform_flip' # can be set as 'pair_flip' or 'uniform_flip'
noise_rate = 0.8 # the noise rate
image_size = 112
n_C = 0.1 # nearly clean samples selection parameter
n_V = 0.2 # correction proportion
n_R = 0.05 # relabeling proportion
t_w = 3 # warm up epoch

In [3]:

import paddle.vision.transforms as T
from paddle.vision.transforms import BaseTransform

class Dataset(paddle.io.Dataset):
    def __init__(self, num_samples, paddle_data, ):
        super(Dataset, self).__init__()
        self.num_samples = num_samples
        self.paddle_data = paddle_data
        

    def __getitem__(self, index):
        image = self.paddle_data[index][0]
        data = np.array(image)

        return index, data
    def __len__(self):
        return self.num_samples

mean,std = ([0.4914, 0.4822, 0.4465],[0.2471, 0.2435,0.2616])
mean = np.array(mean).reshape(1,1,3)*255
std = np.array(std).reshape(1,1,3)*255

transform_robust = T.Compose([
                    T.ColorJitter(),
                    T.RandomHorizontalFlip(0.5),
                    T.Resize((image_size, image_size)),
                    T.Transpose(order=(2,1,0,)),
                    T.Normalize(mean=mean,std=std)
                    ])

transform_clean = T.Compose([
                    # T.ColorJitter(),
                    T.Resize((image_size, image_size)),
                    T.Transpose(order=(2,1,0,)),
                    T.Normalize(mean=mean,std=std)
                    ])


print('Start download training data and load training data.')
train_dataset_robust = paddle.vision.datasets.Cifar100(mode='train', transform=transform_robust, download = True)
test_dataset = paddle.vision.datasets.Cifar100(mode='test', transform=transform_clean, download = True)

train_data = Dataset(samples, train_dataset_robust)
test_data = Dataset(test_samples, test_dataset)
train_loaderrobust = paddle.io.DataLoader(train_data, batch_size=batch_size, shuffle=True, num_workers=6,)
valid_loader = paddle.io.DataLoader(test_data, batch_size=batch_size, shuffle=False, num_workers=6,)
print('Finished.')

Start download training data and load training data.
Finished.


In [4]:
try:
    train_Y = np.load(r'./work/cifar-100-train_Y.npy').reshape(-1, )
    test_Y = np.load(r'./work/cifar-100-test_Y.npy').reshape(-1, )
except :
    train_Y = []
    for d in tqdm.tqdm(train_dataset):
        train_Y.append(d[1])
    train_Y = np.array(train_Y).reshape(-1,)
    np.save(r'./work/cifar-100-train_Y.npy', train_Y)
    test_Y = []
    for d in tqdm.tqdm(test_dataset):
        test_Y.append(d[1])
    test_Y = np.array(test_Y).reshape(-1,)
    np.save(r'./work/cifar-100-test_Y.npy', test_Y)

In [5]:
np.random.seed(1)

if noise_type == 'pair_flip':
    noise_ = np.where(np.random.binomial(1, noise_rate, train_Y.shape), 1,0)
    train_Y_noised = train_Y.copy()
    for i in range(len(noise_)):
        if noise_[i]>0:
            if np.mod(train_Y_noised[i]+1,5) == 0:
                train_Y_noised[i] -=4
            else:
                train_Y_noised[i] +=1
elif noise_type == 'uniform_flip':
    noise_=np.where(np.random.binomial(n=1,p=noise_rate,size=train_Y.shape),np.random.randint(low=1,high=n_class-1,size=train_Y.shape),0)
    train_Y_noised = np.mod(noise_ + train_Y,n_class)
    train_Y_noised = np.array(train_Y_noised)
else:
    raise ValueError("The noise_type must be 'uniform_flip' or 'pair_flip'.")

Yt_list = [train_Y_noised]

In [6]:
def validation():
    loss_eval = 0
    acc_eval = 0
    network_3.eval()
    Py_temp = np.zeros((test_samples,),dtype = np.float32)
    for batch_id, (ind,X_data) in enumerate(valid_loader()):
        ind = np.array(ind)
        with paddle.no_grad():
            Y_data = np.array(test_Y[ind]).astype(np.int64)
            X_GPU = paddle.to_tensor(X_data)
            Y_GPU = paddle.to_tensor(Y_data)
            y_onehot = paddle.nn.functional.one_hot(paddle.reshape(Y_GPU,(-1,)),num_classes=n_class)

            logits = network_3(X_GPU)
            probs = paddle.nn.functional.softmax(logits)
            Py = paddle.sum(y_onehot * probs, axis = -1)
            Py_temp[ind] = Py.numpy()
 
            loss = loss_fn(logits, paddle.reshape(Y_GPU,(-1,1)))
            acc = paddle.metric.accuracy(logits, paddle.reshape(Y_GPU,(-1,1)))

        loss_eval += loss.numpy()
        acc_eval += acc.numpy()

    loss_eval/=(batch_id+1)
    acc_eval/=(batch_id+1)
    return loss_eval, acc_eval

In [7]:
Py_list = []
acc_list = []
loss_list = []
warnings.filterwarnings("ignore", category=Warning) 

In [None]:
network_3 = paddle.vision.models.resnet34(num_classes=n_class,pretrained=True)
loss_fn = paddle.nn.CrossEntropyLoss()
opt = paddle.optimizer.Adam(learning_rate=learning_rate, parameters=network_3.parameters(),)
save_time = time.asctime()

for epoch_id in range(training_epochs):
    network_3.train()
    loader = train_loaderrobust()

    loss_train = 0
    acc_train = 0
    acc_train_ori = 0
    loss_train_ori = 0
    Py_temp = np.zeros((samples,),dtype=np.float32)
    Pred_temp = np.zeros((samples,),dtype=np.float32)
    Probs_temp = np.zeros((samples,n_class),dtype=np.float32)
    feature_temp = np.zeros((samples, 512), dtype = np.float32)
    
    if len(Py_list) > 1:
        score = Py_list[-1]
    else:
        score = np.random.rand(samples,)
    OOD_mask = score < np.sort(score)[int(samples * n_R)]
    
    for batch_id, (ind,X_data) in tqdm.tqdm(enumerate(loader)):
        ind = np.array(ind)
        Y_data = np.array(Yt_list[-1][ind]).astype(np.int64)
        Y_data_ori = np.array(Yt_list[0][ind]).astype(np.int64)
        temp_X = paddle.to_tensor(X_data)
        Y_GPU = paddle.to_tensor(Y_data)
        Y_GPU_ori = paddle.to_tensor(Y_data_ori)
        y_onehot = paddle.nn.functional.one_hot(paddle.reshape(Y_GPU,(-1,)),num_classes=n_class)

        first = network_3.maxpool(network_3.relu(network_3.bn1(network_3.conv1(temp_X))))
        l1 = network_3.layer1(first)
        l2 = network_3.layer2(l1)
        l3 = network_3.layer3(l2)
        l4 = network_3.layer4(l3)
        feature = paddle.reshape(network_3.avgpool(l4), (l4.shape[0],-1))
        
        logits = network_3.fc(feature)

        probs = paddle.nn.functional.softmax(logits)
        Py = paddle.sum(y_onehot * probs, axis = -1)
        Pred = paddle.argmax(probs,axis=-1)
        logits_other = logits - logits * y_onehot
        Pred_other = paddle.argmax(logits_other,axis=-1)
        if epoch_id < t_w:
            loss = loss_fn(logits,paddle.reshape(Y_GPU,(-1,1)))
        else:
            Y_GPU = paddle.where(paddle.to_tensor(OOD_mask[ind]), Pred_other, Y_GPU)
            loss = loss_fn(logits,paddle.reshape(Y_GPU,(-1,1)))
            
        Py_temp[ind] = Py.numpy()
        Pred_temp[ind] = Pred.numpy()

        Probs_temp[ind] = probs.numpy()
        feature_temp[ind] = feature.numpy()
        
        acc = paddle.metric.accuracy(logits, paddle.reshape(Y_GPU,(-1,1)))
        acc_ori = paddle.metric.accuracy(logits, paddle.reshape(Y_GPU_ori,(-1,1)))
        loss_ori = loss_fn(logits,paddle.reshape(Y_GPU_ori,(-1,1)))

        loss_train += loss.numpy()
        acc_train += acc.numpy()
        acc_train_ori += acc_ori.numpy()
        loss_train_ori += loss_ori.numpy()
        
        loss.backward()
        
        opt.step()
        opt.clear_grad()
        
    loss_train/=(batch_id+1)
    acc_train/=(batch_id+1)
    acc_train_ori/=(batch_id+1)
    loss_train_ori/=(batch_id+1)

    print('epoch %d train complete'%epoch_id)
    loss_eval, acc_eval = validation()


    loss_list.append(loss_eval)
    acc_list.append(acc_eval)
    Py_list.append(Py_temp)
    

    print('train loss:%.4f, train acc:%.4f, train acc ori:%.4f,  eval loss:%.4f, eval acc:%.4f'
    %(loss_train, acc_train, acc_train_ori, loss_eval, acc_eval))  



    if epoch_id >= t_w:
        nC_points = []
        anchor_mask = np.zeros((samples,),dtype=bool)
        Py_mean = np.zeros((samples,))
        for j in range(len(Py_list)):
            Py_mean+=Py_list[j]
        Py_mean/=len(Py_list)   
        for j in range(n_class):
            class_mask = Yt_list[0] == j
            c_n = class_mask.sum()
            c_th = np.sort(Py_mean[class_mask])[-int(c_n * n_C)]
            nC_points.append(np.where(np.logical_and(Py_mean>=c_th,class_mask))[0])
        nC_points = np.concatenate(nC_points)
        L_batch = 1000
        Y_onehot = np.eye(n_class)[Yt_list[-1]]
        zC = paddle.to_tensor(feature_temp[nC_points])
        fC = paddle.to_tensor(Probs_temp[nC_points])
        yC = paddle.to_tensor(Y_onehot[nC_points])
        fCcyC = fC - yC
        lr = 1e-6
        learning_risk = np.zeros((samples,)) 
        for j in range(int(np.ceil(samples/L_batch))):
            i_ind = np.arange(j*L_batch, min(samples,(j+1)*L_batch))
            zi = paddle.to_tensor(feature_temp[i_ind])
            fi = paddle.to_tensor(Probs_temp[i_ind])
            yi = paddle.to_tensor(Y_onehot[i_ind])
            zixzC = zi @ zC.transpose([1,0])
            part_1_1 = (zixzC + 1) @ fCcyC
            part1 = (part_1_1 * (yi-fi)).sum(axis=-1,keepdim=True)*4*lr/len(nC_points)
            part1_all = part_1_1 * (fi-paddle.ones_like(yi))*4*lr/len(nC_points)

            learning_risk[i_ind] = part1.numpy().ravel()

        select = np.zeros((samples,),dtype=np.bool8)
        for j in range(n_class):
            class_mask = Yt_list[-1] == j
            c_n = class_mask.sum()
            c_th = np.sort(learning_risk[class_mask])[-min(int(c_n * n_V),c_n)]
            select[np.logical_and(learning_risk>=c_th, class_mask)] = True
        
        temp_Yt_noised=np.where(select.ravel(), Pred_temp.ravel(), Yt_list[-1].ravel()).astype(int)
        Yt_list.append(temp_Yt_noised)


    with open(r'./result/cifar100_acc_eval%s.txt'%save_time,'a') as f:
        f.write('%.8f\n'%acc_eval)
    with open(r'./result/cifar100_loss_eval%s.txt'%save_time,'a') as f:
        f.write('%.8f\n'%loss_eval)
    
    gc.collect()

W0425 17:12:59.835304 28524 gpu_resources.cc:61] Please NOTE: device: 0, GPU Compute Capability: 7.0, Driver API Version: 11.2, Runtime API Version: 11.2
W0425 17:12:59.839766 28524 gpu_resources.cc:91] device: 0, cuDNN Version: 8.2.
782it [00:53, 14.48it/s]

epoch 0 train complete





train loss:4.7390, train acc:0.0215, train acc ori:0.0215,  eval loss:4.1781, eval acc:0.1605


782it [00:49, 15.73it/s]

epoch 1 train complete





train loss:4.5031, train acc:0.0608, train acc ori:0.0608,  eval loss:3.6519, eval acc:0.3195


782it [00:49, 15.71it/s]

epoch 2 train complete





train loss:4.3541, train acc:0.0949, train acc ori:0.0949,  eval loss:3.3522, eval acc:0.3766


782it [00:51, 15.06it/s]

epoch 3 train complete





train loss:4.0135, train acc:0.1731, train acc ori:0.1231,  eval loss:3.0239, eval acc:0.4001


782it [00:51, 15.18it/s]

epoch 4 train complete





train loss:3.3586, train acc:0.2746, train acc ori:0.1593,  eval loss:2.7054, eval acc:0.4082


782it [00:52, 15.00it/s]

epoch 5 train complete





train loss:2.5883, train acc:0.4099, train acc ori:0.1957,  eval loss:2.5404, eval acc:0.4138


782it [00:51, 15.15it/s]

epoch 6 train complete





train loss:1.9600, train acc:0.5354, train acc ori:0.2285,  eval loss:2.4546, eval acc:0.4343


782it [00:52, 14.85it/s]

epoch 7 train complete





train loss:1.5312, train acc:0.6196, train acc ori:0.2456,  eval loss:2.4407, eval acc:0.4441


457it [00:31, 11.72it/s]

In [None]:
max(acc_list)