In [1]:
from __future__ import print_function
import sys
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torch.backends.cudnn as cudnn

import torchvision.models as models

import random
import os
import argparse
import numpy as np

import data_loader_Animal10N as dataloader

import time
import datetime
import gc



In [2]:
parser = argparse.ArgumentParser(description='PyTorch CIFAR Training')
parser.add_argument('--batch_size', default=64, type=int, help='train batchsize') 
parser.add_argument('--lr', '--learning_rate', default=0.02, type=float, help='initial learning rate')
parser.add_argument('--num_epochs', default=300, type=int)
parser.add_argument('--t_w', default=10, type=int)
parser.add_argument('--xi', default=0.04, type=float)
parser.add_argument('--eta', default=10., type=float)
parser.add_argument('--nc', default=0.2, type=float)
parser.add_argument('--nv', default=0.8, type=float)
parser.add_argument('--id', default='')
parser.add_argument('--seed', default=123)
parser.add_argument('--gpuid', default=0, type=int)
parser.add_argument('--data_path', default='./data/Animal10N', type=str, help='path to dataset')
parser.add_argument('--dataset', default='Animal10N', type=str)
args = parser.parse_args(args = ['--data_path', './data/Animal10N',
                                 '--dataset', 'Animal10N',
                                 '--t_w', '50',
                                 '--batch_size','32',
                                 '--lr','0.1',
                                 '--num_epochs','360',
                                 '--xi', '0.04',
                                 '--eta','1',
                                 '--nc','0.8',
                                 '--nv','0.08'])

In [3]:
torch.cuda.set_device(args.gpuid)
random.seed(args.seed)
torch.manual_seed(args.seed)
torch.cuda.manual_seed_all(args.seed)

In [4]:
samples = 50000
test_samples = 5000
n_class = 10
feature_num = 4096
t_w = args.t_w

In [5]:
def test(epoch,net,):
    net.eval()
    correct = 0
    total = 0
    feature_temp = np.zeros((test_samples, feature_num))
    with torch.no_grad():
        for batch_idx, (inputs, targets, ind) in enumerate(test_loader):
            ind = ind.numpy()
            inputs, targets = inputs.cuda(), targets.cuda()
            feature, output = forward_wf(net,inputs)       
            _, predicted = torch.max(output, 1)     
            
            feature_temp[ind] = feature.cpu().detach().numpy()
                       
            total += targets.size(0)
            correct += predicted.eq(targets).cpu().sum().item()                 
    acc = 100.*correct/total
    
    test_log.write('Epoch:%d   Accuracy:%.2f\n'%(epoch,acc))
    test_log.flush()  
    
    lossb = relevant_hard_np(feature_temp)
    return acc, lossb, feature_temp


def linear_rampup(current, warm_up, rampup_length=16):
    current = np.clip((current-warm_up) / rampup_length, 0.0, 1.0)
    return args.lambda_u*float(current)


class NegEntropy(object):
    def __call__(self,outputs):
        probs = torch.softmax(outputs, dim=1)
        return torch.mean(torch.sum(probs.log()*probs, dim=1))
    

class Orthogonal_loss(nn.Module):
    def __init__(self,):
        super(Orthogonal_loss, self).__init__()
        
    def forward(self, x, ):
        n = x.size(0)
        m = x.size(1)

        I = torch.eye(m).cuda()
        e = x - x.mean(dim=0, keepdims = True)
        m_nonz = (e.sum(dim = 0) != 0).sum()
        
        cov = e.T @ e
        
        cov2 = cov ** 2
        
        select_i = torch.argmax(cov2 - cov2 * I, dim = 1)
        cov_m = (F.one_hot(select_i, num_classes = m) * cov2).sum()
        cov_i = (I * cov).sum()
        
        result = (cov_m-cov_i) / (m_nonz * n)
        return result
    
def relevant_hard_np(x,):
    n = x.shape[0]
    nz = x.shape[1]
    e = x - x.mean(axis = 0,keepdims = True)

    cov = e.T @ e

    sigma = (e ** 2).sum(axis = 0, keepdims = True)
    r = cov / (sigma.T @ sigma) ** 0.5

    r = r ** 2
    r[np.isnan(r)] = 0.0

    return np.mean(np.max(r - r * np.eye(nz), axis = -1))
    
def create_model():
    model = models.vgg19_bn()
    model = model.cuda()
    return model     

def forward_wf(net, x):
    
    classifier = net.classifier
    first = net.avgpool(net.features(x))
    first = torch.reshape(first, (first.shape[0], -1))
    feature = classifier[5](classifier[4](classifier[3](classifier[2](classifier[1](classifier[0](first))))))
    logits = net.classifier[6](feature)[:,:n_class]
    
    return feature, logits

In [6]:
stats_log=open('./checkpoint/IFLC_%s_%s'%(
    args.dataset,str(datetime.date.today())+'_'+str(time.localtime().tm_hour))+'_stats.txt','w') 
test_log=open('./checkpoint/IFLC_%s_%s'%(
    args.dataset,str(datetime.date.today())+'_'+str(time.localtime().tm_hour))+'_acc.txt','w')     

In [7]:

loader = dataloader.animal_dataloader(args.dataset,batch_size=args.batch_size,num_workers=5,root_dir=args.data_path,log=stats_log,)

print('| Building net')
net = create_model()
cudnn.benchmark = True

opt = optim.SGD(net.parameters(),
                lr=args.lr,
                momentum=0.9,
                # weight_decay=1e-5
               )

sch = optim.lr_scheduler.MultiStepLR(opt, [150, 250,], gamma = 0.1)

CE = nn.CrossEntropyLoss(reduction='none')
CEloss = nn.CrossEntropyLoss()

loss_ortho = Orthogonal_loss()

all_loss = [[],[]] # save the history of losses from two networks

traindataset, trainloader = loader.run('warmup')
testdataset, test_loader = loader.run('test')

| Building net


In [None]:
train_Y = np.array(traindataset.noise_label)
test_Y = np.array(testdataset.test_label)
noisy_Y = np.array(traindataset.noise_label)
revised_Y = np.array(traindataset.noise_label)

acc_list = []
loss_sep_list = [[]]
loss_train_list = []
Py_temp_list = []

score = np.random.rand(samples,)
score_uncertainty = np.random.rand(samples,)

loss_before_temp = np.random.rand(samples,)

start_time = time.time()

for epoch in range(args.num_epochs):
     
    net.train()
    if epoch < t_w:
        _, trainloader = loader.run('warmup')
    else:
        _, trainloader = loader.run('train')

    loss_train = 0
    losso_train = 0
    acc_train = 0
    acc_train_ori = 0
    loss_train_ori = 0
    Py_temp = np.zeros((samples,),dtype=np.float32)
    Pred_temp = np.zeros((samples,),dtype=np.float32)
    Probs_temp = np.zeros((samples,n_class),dtype=np.float32)
    Logits_temp = np.zeros((samples,n_class),dtype=np.float32)
    
    feature_temp = np.zeros((samples, feature_num), dtype = np.float32)

    loss_after_temp = np.random.rand(samples,)
    
    loss_normalized = (loss_before_temp - loss_before_temp.min()) / (loss_before_temp.max() - loss_before_temp.min())
    mask_rand = np.logical_and(np.random.rand(samples,) >= loss_normalized, np.random.rand(samples,) < args.xi)
    
    Y_onehot = np.eye(n_class)[revised_Y].astype(np.float32)
    Y_onehot_0 = np.eye(n_class)[noisy_Y].astype(np.float32)
    
    for batch_id, (X_data, targets, ind) in enumerate(trainloader):
        ind = ind.numpy()

        Y_data = np.array(revised_Y[ind]).astype(np.int64)
        Y_data_ori = np.array(train_Y[ind]).astype(np.int64)
        temp_X = X_data.cuda()
        opt.zero_grad()
        Y_GPU = torch.from_numpy(Y_data).cuda()
        Y_GPU_ori = torch.from_numpy(Y_data_ori).cuda()
        y_onehot = F.one_hot(Y_GPU.view(-1,),num_classes=n_class)
        
        feature, logits = forward_wf(net, temp_X)

        probs = logits.softmax(1)
        Py = torch.sum(y_onehot * probs, dim = -1)
        Pred = probs.argmax(-1)

        loss_o = loss_ortho(feature)
        
        if epoch >= args.t_w:
            mask_ = torch.from_numpy(mask_rand[ind]).cuda()
            label_rand = torch.randint(low=0, high = n_class, size=Y_GPU.size()).cuda()
            loss_array = CE(logits,torch.where(mask_, label_rand, Y_GPU))
            L = loss_array.mean()+loss_o*args.eta
        else:
            loss_array = CE(logits,Y_GPU)
            L = loss_array.mean()
            
            
        Py_temp[ind] = Py.cpu().detach().numpy()
        Pred_temp[ind] = Pred.cpu().detach().numpy()

        Probs_temp[ind] = probs.cpu().detach().numpy()
        Logits_temp[ind] = logits.cpu().detach().numpy()
        feature_temp[ind] = feature.cpu().detach().numpy()
        
        loss_after_temp[ind] = loss_array.cpu().detach().numpy()
                
        correct = (Pred == Y_GPU).sum().item()
        correct_ori = (Pred == Y_GPU_ori).sum().item()

        loss_train += loss_array.mean().item()
        losso_train += loss_o.item()
        acc_train += correct
        acc_train_ori += correct_ori
        
        loss_sep_list[-1].append(loss_array.mean().item())
        
        L.backward()
        nn.utils.clip_grad_norm_(net.parameters(), 1)
        opt.step()
    sch.step()
    
    loss_before_temp = loss_after_temp.copy()

    loss_train/=(batch_id+1)
    losso_train/=(batch_id+1)
    acc_train/=samples
    acc_train_ori/=samples

    print('epoch %d train complete'%epoch)
    acc_eval, lossb_eval, feature_val = test(epoch, net)
    
    feature_temp += np.random.rand(*feature_temp.shape) * 1e-3
    feature_val += np.random.rand(*feature_val.shape) * 1e-3
    
    lossb = relevant_hard_np(feature_temp[:10000])

    acc_list.append(acc_eval)
    loss_train_list.append(loss_after_temp)
    Py_temp_list.append(Py_temp)
    
    end_time = time.time()
    print(
        'train loss:%.4f,train losso:%.4f, train acc:%.4f, train acc ori:%.4f,lossb:%e,\
          eval acc:%.4f, lossb eval:%e, time elapsed:%.4f'
    %(loss_train,losso_train, acc_train, acc_train_ori, lossb,
     acc_eval, lossb_eval, end_time - start_time))  
    
    if epoch < t_w:
        select = np.zeros((samples,),dtype = np.bool)
        score = np.random.rand(samples,)
    else:
        clean_mask = np.zeros((samples,),dtype = np.bool)
        for j_ in range(n_class):
            class_mask = noisy_Y == j_
            c_n = class_mask.sum()
            if c_n > 1:
                thres = np.sort(loss_before_temp[class_mask])[int(c_n * args.nc)]
                clean_mask[np.logical_and(loss_before_temp < thres, class_mask)] = True
        feature_gpu = torch.from_numpy(feature_temp[clean_mask]).cuda()
        Y_onehot_gpu = torch.from_numpy(Y_onehot_0[clean_mask],).cuda()
        

        W = torch.linalg.lstsq(feature_gpu, Y_onehot_gpu).solution

        W_cpu = W.cpu().detach().numpy()

        f_prob = feature_temp @ W_cpu
        f_prob_val = feature_val @ W_cpu
        
        f_pred = np.argmax(f_prob, axis = -1)
        
        print('f_prob_l4_val : %.4f'%((np.argmax(f_prob_val, axis = -1) == test_Y).mean()))

        score = np.sum((Y_onehot_0 - f_prob)**2, axis = -1)
        
        r_ = min(0.02 + 0.02 * (epoch - t_w), args.nv)

        class_thres = np.sort(score)[int(samples * (1-r_))]
        select = score >= class_thres
            
        revised_Y = np.where(select.ravel(), Pred_temp.ravel(), noisy_Y.ravel()).astype(int)
        

    is_noise = revised_Y != train_Y
    max_noised_class = -999
    for j_ in range(n_class):
        class_mask = train_Y == j_
        noise_n = np.logical_and(class_mask, is_noise).sum()
        if noise_n > max_noised_class:
            max_noised_class = noise_n
    
    
    print('epoch %d train cleaned, %d samples changed'%(
        epoch, np.sum(revised_Y!=noisy_Y)))
    Yt_remain_noise = np.sum(is_noise)
    print('total remain noise:%.4d, max class noise:%d'%(Yt_remain_noise, max_noised_class))
    
    loss_sep_list.append([])
    
    gc.collect()