In [1]:
import sys 
sys.path.append('/home/aistudio/external-libraries')

import numpy as np
import scipy as sp

import os

import warnings
warnings.filterwarnings("ignore", category=Warning)

import paddle
import paddle.fluid as fluid
from paddle.static import InputSpec
from paddle.fluid.framework import core
from paddle.vision import image_load

from models.vgg import VGG, make_layers

import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd

import functools
from collections import Counter

import gc
from itertools import repeat
import tqdm
import time
import random

# !unzip -q data/data99524/raw_image.zip -d data/data99524/

  from collections import MutableMapping
  from collections import Iterable, Mapping
  from collections import Sized


In [2]:
learning_rate = 1e-2
batch_size = 128
samples = 50000
test_samples = 5000
examples = samples
n_class = 10
training_epochs = 180

image_size = 112


t_w = 3

In [3]:
class Dataset(paddle.io.Dataset):
    def __init__(self, num_samples, dir_, save_path, transform):
        super(Dataset, self).__init__()
        self.num_samples = num_samples
        self.transform = transform
        self.dir = dir_
        for root, dirs, files in os.walk(dir_):
            pass
        self.files = files
        self.labels = np.array([int(f.split('_')[0]) for f in files])
        np.save(save_path, self.labels)

    def __getitem__(self, index):
        data_dir = os.path.join(self.dir,self.files[index])
        image = image_load(data_dir)
        data = np.array(image)
        if self.transform is not None:
            data = self.transform(data)
        return index, data
    def __len__(self):
        return self.num_samples

In [4]:
import paddle.vision.transforms as T


mean,std = ([131.0127508203125, 123.49685918457031, 107.42944918945312],[69.10535081507089, 67.92458442633819,70.61935597148465])
mean = np.array(mean).reshape(1,1,3)
std = np.array(std).reshape(1,1,3)

transform_robust = T.Compose([
                    T.RandomHorizontalFlip(0.5),
                    T.Resize((image_size, image_size)),
                    T.Transpose(order=(2,1,0,)),
                    T.Normalize(mean=mean,std=std)
                    ])

transform_clean = T.Compose([
                    # T.ColorJitter(0.125,0.4,0.4,0.08),
                    T.Resize((image_size, image_size)),
                    T.Transpose(order=(2,1,0,)),
                    T.Normalize(mean=mean,std=std),
                    # T.Transpose(order=(2,1,0,)),
                    ])

train_dataset = Dataset(samples, 'data/data99524/training', 'data/data99524/train_Y.npy', transform_robust)
test_dataset = Dataset(test_samples, 'data/data99524/testing', 'data/data99524/test_Y.npy', transform_clean)
train_loader = paddle.io.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=6,)
valid_loader = paddle.io.DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=6,)
train_Y = np.load('data/data99524/train_Y.npy')
test_Y = np.load('data/data99524/test_Y.npy')

train_Y_noised = train_Y
Yt_list = [train_Y_noised]
print('Finished.')

Finished.


In [5]:
class Orthogonal_loss(paddle.nn.Layer):
    def __init__(self,):
        super(Orthogonal_loss, self).__init__()
        
    def forward(self, x, ):
        n = x.shape[0]
        m = x.shape[1]

        I = paddle.eye(m)
        e = x - x.mean(axis=0, keepdim = True)
        m_nonz = (e.sum(axis = 0) != 0).sum()
        
        cov = e.t() @ e
        
        cov2 = cov ** 2
        
        select_i = paddle.argmax(cov2 - cov2 * I, axis = 1)
        cov_m = (paddle.nn.functional.one_hot(select_i, m) * cov2).sum()
        cov_i = (I * cov).sum()
        
        result = (cov_m-cov_i) / (m_nonz*n)
        return result

vgg19_cfg = [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512,
        'M', 512, 512, 512, 512, 'M']

network_3 = VGG(make_layers(vgg19_cfg, batch_norm = True))
network_3.set_state_dict(paddle.load(r'data/data184212/vgg19_bn_pretrain.pdparams'))
feature_num = network_3.classifier[6].parameters()[0].shape[0]


loss_fn = paddle.nn.CrossEntropyLoss()
loss_ortho = Orthogonal_loss()
scheduel_e = paddle.optimizer.lr.ExponentialDecay(learning_rate = learning_rate, gamma = 0.93)

opt = paddle.optimizer.Momentum(learning_rate=scheduel_e,
parameters=network_3.parameters(),weight_decay = 1e-4)

W0529 14:44:56.144913  2397 gpu_resources.cc:61] Please NOTE: device: 0, GPU Compute Capability: 7.0, Driver API Version: 11.2, Runtime API Version: 11.2
W0529 14:44:56.149894  2397 gpu_resources.cc:91] device: 0, cuDNN Version: 8.2.


In [6]:
def validation():
    loss_eval = 0
    acc_eval = 0
    feature_num = network_3.classifier[6].parameters()[0].shape[0]
    network_3.eval()
    PP_temp = np.zeros((test_samples,),dtype = np.float32)
    Py_temp = np.zeros((test_samples,),dtype = np.float32)
    Pm_temp = np.zeros((test_samples,),dtype = np.float32)
    Feature_temp = np.zeros((test_samples,feature_num),dtype = np.float32)
    with paddle.no_grad():
        for batch_id, (ind,X_data) in enumerate(valid_loader()):
            ind = np.array(ind)
            Y_data = np.array(test_Y[ind]).astype(np.int64)
            temp_X = paddle.to_tensor(X_data)
            Y_GPU = paddle.to_tensor(Y_data)
            y_onehot = paddle.nn.functional.one_hot(paddle.reshape(Y_GPU,(-1,)),num_classes=n_class)

            classifier = network_3.classifier
            first = network_3.avgpool(network_3.features(temp_X))
            first = paddle.reshape(first, (first.shape[0], -1))
            feature = classifier[5](classifier[4](classifier[3](classifier[2](classifier[1](classifier[0](first))))))

            logits = network_3.classifier[6](feature)[:,:n_class]
            probs = paddle.nn.functional.softmax(logits)
            PP = paddle.sum(probs * probs, axis = -1)
            Py = paddle.sum(y_onehot * probs, axis = -1)
            Pm = paddle.max(probs, axis = -1)
            PP_temp[ind] = PP.numpy()
            Py_temp[ind] = Py.numpy()
            Pm_temp[ind] = Pm.numpy()
            Feature_temp[ind] = feature.numpy()
            try:
                loss = loss_fn(logits, paddle.reshape(Y_GPU,(-1,1)))
            except:
                loss = loss_fn(logits, y_onehot)
            acc = paddle.metric.accuracy(logits, paddle.reshape(Y_GPU,(-1,1)))

            loss_eval += loss.numpy()
            acc_eval += acc.numpy()

        True_PP = np.mean(PP_temp[Py_temp >= Pm_temp])
        False_PP = np.mean(PP_temp[Py_temp < Pm_temp])
        loss_eval/=(batch_id+1)
        acc_eval/=(batch_id+1)
        lossb = relevant_hard_np(Feature_temp)
    return loss_eval, acc_eval, True_PP, False_PP, lossb, Feature_temp

def relevant_hard_np(x,):
    n = x.shape[0]
    nz = x.shape[1]
    r = np.corrcoef(x.T)
    r[np.isnan(r)] = 0
    r = r ** 2
    
    return np.mean(np.max(r - r * np.eye(nz), axis = -1))

In [7]:
probs_list = []
Py_list = []
PP_list = []
Pm_list = []
UU_list = []
Pred_list = []
acc_r_list = []
error_list = []
acc_list = []
loss_list = []
TPP_list = []
FPP_list = []
Tacc_list = []
Tacco_list = []
warnings.filterwarnings("ignore", category=Warning) 

In [None]:
logits_list = []
logits_other_list = []
Pred_list = []
Py_list = []
PP_list = []
Pm_list = []
Pm_other_list = []
acc_list = []
loss_list = []
False_PP_list = []
True_PP_list = []
du_p_list = []
du_y_list = []
du_prob_list = []

logits_list = []
l3_feature_list = []
l4_feature_list = []
feature_list = []
feature_pred_list = []
noise_n_list = []
select_n_list = []

delta_L_list = []
anchor_score_list = []
delta_L_all_list = []
part_1_list = []
part_2_list = []
part_t_list = []


lossb_list = []
score_list = [np.random.rand(samples,)]

Pred_onehot_temp = np.ones((samples,n_class),dtype=np.float32)

softmax = paddle.nn.Softmax(axis=0)

for epoch_id in range(training_epochs):
    network_3.train()
    feature_num = network_3.classifier[6].parameters()[0].shape[0]
    loader = train_loader()

    loss_train = 0
    losso_train = 0
    acc_train = 0
    acc_train_ori = 0
    loss_train_ori = 0
    PP_temp = np.zeros((samples,),dtype=np.float32)
    Py_temp = np.zeros((samples,),dtype=np.float32)
    Pm_temp = np.zeros((samples,),dtype=np.float32)
    Pm_other_temp = np.zeros((samples,),dtype=np.float32)
    Pred_temp = np.zeros((samples,),dtype=np.float32)
    Pred_temp_other = np.zeros((samples,),dtype=np.float32)
    Probs_temp = np.zeros((samples,n_class),dtype=np.float32)
    Logits_temp = np.zeros((samples,n_class),dtype=np.float32)
    Logits_temp_other = np.zeros((samples,n_class),dtype=np.float32)
    feature_temp = np.zeros((samples, feature_num), dtype = np.float32)
    mask_temp = np.zeros((samples,), dtype = np.bool)
    
    weight_temp = np.zeros((samples,),dtype=np.float32)
 
    if len(Py_list)>0:
        relabel_score1=Py_list[-1] - PP_list[-1]
        relabel_score2=Py_list[-1] - Pm_other_list[-1]
        PP_mean = PP_list[-1]
        Py_mean = Py_list[-1]
    else:
        relabel_score1 = np.zeros((samples,))
        relabel_score2 = np.zeros((samples,))
        PP_mean = np.zeros((samples,))
        Py_mean = np.zeros((samples,))

    thres = np.sort(score_list[-1])[int(samples*0.8)]
    r_mask = score_list[-1] >= thres
    
    side_mask_rand = np.logical_and(r_mask,np.random.rand(samples,) < 0.2)

    if len(Py_list) > 1:
        score = Py_list[-1]
    else:
        score = np.random.rand(samples,)
    OOD_mask = score < np.sort(score)[int(samples * 0.01)]

    Y_onehot = np.eye(n_class)[Yt_list[-1]]
    Y_onehot_0 = np.eye(n_class)[Yt_list[0]]
    
    for batch_id, (ind, X_data) in tqdm.tqdm(enumerate(loader)):
        ind = np.array(ind)

        Y_data = np.array(Yt_list[-1][ind]).astype(np.int64)
        Y_data_ori = np.array(train_Y[ind]).astype(np.int64)
        pred_onehot_temp = Pred_onehot_temp[ind]
        temp_X = paddle.to_tensor(X_data)
        Y_GPU = paddle.to_tensor(Y_data)
        Y_GPU_ori = paddle.to_tensor(Y_data_ori)
        y_onehot = paddle.nn.functional.one_hot(paddle.reshape(Y_GPU,(-1,)),num_classes=n_class)

        classifier = network_3.classifier
        first = network_3.avgpool(network_3.features(temp_X))
        first = paddle.reshape(first, (first.shape[0], -1))
        feature = classifier[5](classifier[4](classifier[3](classifier[2](classifier[1](classifier[0](first))))))

        logits = network_3.classifier[6](feature)[:,:n_class]


        probs = paddle.nn.functional.softmax(logits)
        PP = paddle.sum(probs * probs, axis = -1)
        Py = paddle.sum(y_onehot * probs, axis = -1)
        Pm = paddle.max(probs, axis = -1)
        Pm_other = paddle.max(probs - probs * y_onehot, axis = -1)
        Pred = paddle.argmax(probs,axis=-1)
        logits_other = logits - y_onehot*1e10
        Pred_other = paddle.argmax(logits_other,axis=-1)

        alpha = 1
        loss_o = loss_ortho(feature)
        if epoch_id < t_w:
            loss = loss_fn(logits,paddle.reshape(Y_GPU,(-1,1)))
            
        else:
            boundary_mask_ = paddle.to_tensor(OOD_mask[ind])
            Y_GPU = paddle.where(boundary_mask_, Pred_other, Y_GPU)
            loss = loss_fn(logits,paddle.reshape(Y_GPU,(-1,1)))

            
        PP_temp[ind] = PP.numpy()
        Py_temp[ind] = Py.numpy()
        Pm_temp[ind] = Pm.numpy()
        Pm_other_temp[ind] = Pm_other.numpy()
        Pred_temp[ind] = Pred.numpy()

        Probs_temp[ind] = probs.numpy()
        Logits_temp[ind] = logits.numpy()
        Logits_temp_other[ind] = logits_other.numpy()
        Pred_temp_other[ind] = Pred_other.numpy()
        feature_temp[ind] = feature.numpy()
                
        
        acc = paddle.metric.accuracy(logits, paddle.reshape(Y_GPU,(-1,1)))
        acc_ori = paddle.metric.accuracy(logits, paddle.reshape(Y_GPU_ori,(-1,1)))
        loss_ori = loss_fn(logits,paddle.reshape(Y_GPU_ori,(-1,1)))

        loss_train += loss.numpy()
        losso_train += loss_o.numpy()
        acc_train += acc.numpy()
        acc_train_ori += acc_ori.numpy()
        loss_train_ori += loss_ori.numpy()
        
        loss.backward()
        
        opt.step()
        opt.clear_grad()
        
    scheduel_e.step()
    loss_train/=(batch_id+1)
    losso_train/=(batch_id+1)
    acc_train/=(batch_id+1)
    acc_train_ori/=(batch_id+1)
    loss_train_ori/=(batch_id+1)

    print('epoch %d train complete'%epoch_id)
    loss_eval, acc_eval, True_PP, False_PP, lossb_eval, feature_val = validation()

    loss_list.append(loss_eval)
    acc_list.append(acc_eval)
    True_PP_list.append(True_PP)
    False_PP_list.append(False_PP)
    Pred_list.append(Pred_temp)
    PP_list.append(PP_temp)
    Py_list.append(Py_temp)
    Pm_list.append(Pm_temp)
    Pm_other_list.append(Pm_other_temp)

    
    lossb_list.append(lossb_eval)

    logits_list.append(Logits_temp)
    logits_other_list.append(Logits_temp_other)
    if np.mod(epoch_id,20) == 0:
        feature_list.append(feature_temp)
    else:
        feature_list.append([])
    
    print('train loss:%.4f,train losso:%.4f, train acc:%.4f, train acc ori:%.4f,  eval loss:%.4f, eval acc:%.4f, lossb eval:%e'
    %(loss_train,losso_train, acc_train, acc_train_ori, loss_eval, acc_eval, lossb_eval))  

    anchor_points = []
    anchor_mask = np.zeros((samples,),dtype=bool)
    Py_mean = np.zeros((samples,))
    
    for j in range(len(Py_list)):
        Py_mean+=Py_list[j]
    Py_mean/=len(Py_list)   

    for j in range(n_class):
        class_mask = Yt_list[0] == j
        c_n = class_mask.sum()
        c_th = np.sort(Py_mean[class_mask])[-int(c_n * 0.10)]
        anchor_points.append(np.where(np.logical_and(Py_mean>=c_th,class_mask))[0])
    anchor_points = np.concatenate(anchor_points)
    anchor_mask[anchor_points] = True
    
    L_batch = 1000
    Y_onehot = np.eye(n_class)[Yt_list[-1]]
    zC = paddle.to_tensor(feature_temp[anchor_points])
    fC = paddle.to_tensor(Probs_temp[anchor_points])
    yC = paddle.to_tensor(Y_onehot[anchor_points])
    fCcyC = fC - yC
    # lr = opt.get_lr()/10
    lr = 1e-6
    delta_L = np.zeros((samples,))
    part1_L = np.zeros((samples,)) 
    part2_L = np.zeros((samples,)) 
    anchor_score = np.zeros((samples,))
    delta_L_all = np.zeros((samples, n_class))
    for j in range(int(np.ceil(samples/L_batch))):
        i_ind = np.arange(j*L_batch, min(samples,(j+1)*L_batch))
        zi = paddle.to_tensor(feature_temp[i_ind])
        fi = paddle.to_tensor(Probs_temp[i_ind])
        yi = paddle.to_tensor(Y_onehot[i_ind])
        zixzC = zi @ zC.transpose([1,0])
        part_1_1 = (zixzC + 1) @ fCcyC
        part1 = (part_1_1 * (yi-fi)).sum(axis=-1,keepdim=True)*4*lr/len(anchor_points)
        part1_all = part_1_1 * (fi-paddle.ones_like(yi))*4*lr/len(anchor_points)

        part1_L[i_ind] = part1.numpy().ravel()

    select = np.zeros((samples,),dtype=np.bool8)
    for j in range(n_class):
        class_mask = Yt_list[-1] == j
        c_n = class_mask.sum()
        c_th = np.sort(part1_L[class_mask])[-min(int(c_n * (0.01)),c_n)]
        select[np.logical_and(part1_L>=c_th, class_mask)] = True
    if epoch_id < t_w:
        temp_Yt_noised = Yt_list[-1]
    else:
        temp_Yt_noised=np.where(select.ravel(), Pred_temp.ravel(), Yt_list[-1].ravel()).astype(int)

    is_noise = temp_Yt_noised != train_Y
    max_noised_class = -999
    for j_ in range(n_class):
        class_mask = train_Y == j_
        noise_n = np.logical_and(class_mask, is_noise).sum()
        if noise_n > max_noised_class:
            max_noised_class = noise_n
    Yt_list.append(temp_Yt_noised)
    print('epoch %d train cleaned, %d samples selected, %d samples changed'%(
        epoch_id,np.sum(Yt_list[-1]!=Yt_list[-2]), np.sum(Yt_list[-1]!=Yt_list[0])))
    Yt_remain_noise = np.sum(is_noise)
    print('total remain noise:%.4d, max class noise:%d'%(Yt_remain_noise, max_noised_class))
    noise_n_list.append(Yt_remain_noise)
    select_n_list.append(np.sum(select))
    score_list.append(score)

In [None]:
print(max(acc_list))

#### 