In [2]:
import numpy as np
import shutil
import os
import time
import datetime
import sys
import pathlib
import torch
import torchvision.datasets as datasets
import torchvision.models as models
from torchvision import transforms
from torch import nn
import torch.nn.functional as F
import torch.nn.init as init
import torch.optim as optim
!pip install torch-ema
from torch_ema import ExponentialMovingAverage
from torch.nn.utils import vector_to_parameters, parameters_to_vector

import random
import json
import matplotlib.pyplot as plt

from tqdm.notebook import tqdm #percentage meter on for-loops

gpu_info = !nvidia-smi
gpu_info = '\n'.join(gpu_info)
if gpu_info.find('failed') >= 0:
  print('Select the Runtime → "Change runtime type" menu to enable a GPU accelerator, ')
  print('and then re-execute this cell.')
else:
  print(gpu_info)

Collecting torch-ema
  Downloading torch_ema-0.3-py3-none-any.whl (5.5 kB)
Installing collected packages: torch-ema
Successfully installed torch-ema-0.3
Thu Dec  9 16:22:32 2021       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 495.44       Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla K80           Off  | 00000000:00:04.0 Off |                    0 |
| N/A   37C    P8    28W / 149W |      0MiB / 11441MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
               

In [3]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [4]:
#symmetric noise - flip 40% of labels randomly 
def symmetric_noise(ds,p=0.4):#this can be done in target transform
  labels = ds.targets
  classes = len(set(labels))
  n_rand_data = int(len(labels)*p)
  rand = np.random.choice(classes, n_rand_data)
  new_labels = np.concatenate((rand,labels[n_rand_data:]))

  new_labels_with_clean_indicator = []
  for nl,l in zip(new_labels,labels):
    if nl==l: new_labels_with_clean_indicator.append([nl,True])
    else: new_labels_with_clean_indicator.append([nl,False])
  #new_ds = torch.utils.data.Subset(ds,np.arange(len(labels))) 
  #makes a copy so we don't change existing dataset, must be a better way
  ds.targets = new_labels_with_clean_indicator
  return ds

#asymmetric noise - flip 40% specific categories
def asymmetric_noise(ds, cifar10=True, p=0.4):
  labels = ds.targets
  noisy_labels = []
  indices = []
  if cifar10:
    for i,label in enumerate(labels):
      flip_bool = random.random() < p
      if label == 9 and flip_bool:
        noisy_labels.append([1, False])
        indices.append(i)
      elif label == 1 and flip_bool:
        noisy_labels.append([9, False])
        indices.append(i)
      elif label == 0 and flip_bool:
        noisy_labels.append([2, False])
        indices.append(i)
      elif label == 2 and flip_bool:
        noisy_labels.append([0, False])
        indices.append(i)
      elif label == 4 and flip_bool:
        noisy_labels.append([7, False])
        indices.append(i)
      elif label == 7 and flip_bool:
        noisy_labels.append([4, False])
        indices.append(i)
      elif label == 3 and flip_bool:
        noisy_labels.append([5, False])
        indices.append(i)
      elif label == 5 and flip_bool:
        noisy_labels.append([3, False])
        indices.append(i)
      else:
        noisy_labels.append([label, True])
  else:
    for label in labels:
      flip_bool = random.random() < p
      if flip_bool:
        noisy_labels.append((target+1)%100)
  #new_ds = torch.utils.data.Subset(ds,np.arange(len(labels)))
  ds.targets = noisy_labels
  return ds



#open set noise - add classes from 
def open_set_noise(ds_cifar10, ds_cifar100, p=0.4):
    new_ds = []
    index1 = np.random.choice(len(ds_cifar10), int(len(ds_cifar10)*p), replace=False)
    index2 = np.random.choice(len(ds_cifar100), int(len(ds_cifar100)*p), replace=False)
    ds_cifar10.data[index1] = ds_cifar100.data[index2]
    for i in range(len(ds_cifar10)):
      if i in index1:
        new_ds.append([ds_cifar10.targets[i], True])
      else:
        new_ds.append([ds_cifar10.targets[i], False])

    ds_cifar10.targets = new_ds
    return new_ds


#instance dependent noise -
def idn_generation(ds,preds_path,p=0.4):#preds is (len(labels),classes)

  #indexed ds: cifar10_train.targets[:20]
  with open(preds_path, 'r') as f:
    preds = np.loadtxt(f)

  labels = ds.targets #assuming [label]
  #get argmax of the ones who aren't correct!
  sorted_preds = preds.argsort() #from small to high
  instance_labels = []
  max_wrong_preds = []

  for i,(label,sorted) in enumerate(zip(labels,sorted_preds)):
    if label==sorted[-1]: 
      instance_labels.append(sorted[-2])
      max_wrong_preds.append(preds[i,sorted[-2]])
    else: 
      instance_labels.append(sorted[-1])
      max_wrong_preds.append(preds[i,sorted[-1]])

  max_wrong_preds = np.argsort(max_wrong_preds)
  n_max_preds = int(p*preds.shape[0])
  idx_of_max = max_wrong_preds[-n_max_preds:]

  new_labels = []
  for idx,label in enumerate(labels):
    if idx in idx_of_max:
      new_labels.append([instance_labels[idx],False])
    else:
      new_labels.append([label,True])


  # for idx in idx_of_max:
  #   labels[idx] = instance_labels[idx]
  ds.targets=new_labels
  #get index of max 40% of maximum predictions, switch those labels in ds.targets, done
  return ds


#index dataset to use for instance dependent noise
def index_ds(ds):
  labels = ds.targets
  new_labels = []
  for i,l in enumerate(labels):
    new_labels.append([l,i])
  #new_ds = torch.utils.data.Subset(ds,np.arange(len(labels))) #makes a copy so we don't change existing dataset, must be a better way
  ds.targets = new_labels  
  return ds


In [5]:
train_transform = transforms.Compose([
            transforms.RandomCrop(32,padding=4),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
         ])

test_transform = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),

])

cifar10_train = datasets.CIFAR10(train=True,root='/content/cifar10/train',download=True,transform=train_transform)
cifar10_test = datasets.CIFAR10(train=False,root='/content/cifar10/test',download=True,transform=test_transform)

# cifar100_train= datasets.CIFAR100(train=True,root='/content/cifar100/train',download=True,transform=train_transform)
# cifar100_test= datasets.CIFAR100(train=False,root='/content/cifar100/test',download=True,transform=test_transform)

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to /content/cifar10/train/cifar-10-python.tar.gz


  0%|          | 0/170498071 [00:00<?, ?it/s]

Extracting /content/cifar10/train/cifar-10-python.tar.gz to /content/cifar10/train
Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to /content/cifar10/test/cifar-10-python.tar.gz


  0%|          | 0/170498071 [00:00<?, ?it/s]

Extracting /content/cifar10/test/cifar-10-python.tar.gz to /content/cifar10/test


##IDN comparison area

In [6]:
#fetching and creating IDN dataset
idn_path = "/content/gdrive/MyDrive/Colab Notebooks/dl_adv_proj/idx_pred.txt"

idn_ds = idn_generation(cifar10_train,idn_path)

In [63]:
#their dataset, [true label, noisy label]

og_idn_path = "/content/gdrive/MyDrive/Colab Notebooks/dl_adv_proj/dependent0.4.csv"
og_idn = np.genfromtxt(og_idn_path,delimiter=",")

idn_gen = np.genfromtxt(idn_path)

In [68]:
og_idn_sum = 0
our_idn_sum= 0
og_vs_our_sum=0
diff_idx = {}
nana = og_idn[0]

diff_plot = []
for idx,(og,idn) in enumerate(zip(og_idn[1:],idn_ds.targets)):
  if og[0]==idn[0]: our_idn_sum+=1
  if og[0]==og[1]: og_idn_sum+=1
  if og[1]==idn[0]:
    og_vs_our_sum+=1
    diff_plot.append(True)
  else: 
    diff_idx[idx]=[og,idn]
    diff_plot.append(False)



print(f'their ratio of noise:{og_idn_sum}, our ratio:{our_idn_sum}, ours vs theirs:{og_vs_our_sum}')

their ratio of noise:30000, our ratio:30000, ours vs theirs:39407


In [70]:
np.argsort(idn_gen[3933])

array([8, 1, 0, 9, 2, 5, 6, 4, 3, 7])

In [74]:
idn_gen[3933,3]

0.06524672259840181

In [75]:
idn_gen[3933,4]

0.06078417808212974

In [76]:
idn_gen[3933,7]

0.8054707036545016

In [69]:
diff_idx

{13: [array([2., 2.]), [8, False]],
 18: [array([2., 3.]), [6, False]],
 19: [array([6., 1.]), [2, False]],
 29: [array([0., 8.]), [3, False]],
 33: [array([3., 3.]), [5, False]],
 37: [array([7., 5.]), [7, True]],
 38: [array([3., 8.]), [5, False]],
 42: [array([2., 0.]), [4, False]],
 50: [array([9., 0.]), [9, True]],
 52: [array([7., 7.]), [2, False]],
 57: [array([2., 0.]), [2, True]],
 67: [array([9., 8.]), [9, True]],
 70: [array([5., 3.]), [5, True]],
 71: [array([9., 1.]), [9, True]],
 72: [array([6., 9.]), [6, True]],
 75: [array([1., 1.]), [9, False]],
 80: [array([3., 0.]), [5, False]],
 81: [array([5., 2.]), [3, False]],
 84: [array([7., 4.]), [5, False]],
 85: [array([7., 2.]), [7, True]],
 90: [array([2., 6.]), [3, False]],
 91: [array([3., 3.]), [5, False]],
 93: [array([0., 0.]), [9, False]],
 98: [array([4., 4.]), [6, False]],
 101: [array([3., 3.]), [2, False]],
 107: [array([5., 7.]), [3, False]],
 113: [array([7., 2.]), [7, True]],
 121: [array([2., 2.]), [3, False]

Change what sort of dataset to use here

In [None]:
batch_size = 128
num_workers = 2

#TODO: add clothing1M dataset if necessary

cifar10_train =  symmetric_noise(cifar10_train)

c10_train_dl = torch.utils.data.DataLoader(cifar10_train,
                                          batch_size=batch_size,
                                          shuffle=True,
                                          num_workers=num_workers)


c10_test_dl = torch.utils.data.DataLoader(cifar10_test,
                                          batch_size=batch_size,
                                          shuffle=False,
                                          num_workers=num_workers)


# c100_train_dl = torch.utils.data.DataLoader(cifar100_train,
#                                           batch_size=batch_size,
#                                           shuffle=True,
#                                           num_workers=num_workers)


# c100_test_dl = torch.utils.data.DataLoader(cifar10_test,
#                                           batch_size=batch_size,
#                                           shuffle=False,
#                                           num_workers=num_workers)


In [None]:
#EQ1: add noise to gradients
def loss_eq1(output, target, sigma=0.5):
  grad = torch.mean(torch.sum(F.log_softmax(output, dim=1)*target, dim=1))
  grad += sigma*torch.randn(grad.size()).cuda()
  return -grad

#EQ2: add noise to gradients w.r.t. model output
def loss_eq2():
  return True

#EQ3: add noise to one-hot labels
def loss_eq3(output, target, sigma=0.5):
  target += sigma*torch.randn(target.size()).cuda()
  grad = torch.mean(torch.sum(F.log_softmax(output, dim=1)*target, dim=1))
  return -grad

In [None]:
#resnet50 = models.resnet50(pretrained=True)# Pretrained on imagenet, use for clothing1m later

In [None]:
""" Wide Resnet """


def conv3x3(in_planes, out_planes, stride=1):
    return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, padding=1, bias=True)

def conv_init(m):
    classname = m.__class__.__name__
    if classname.find('Conv') != -1:
        init.xavier_uniform(m.weight, gain=np.sqrt(2))
        init.constant(m.bias, 0)
    elif classname.find('BatchNorm') != -1:
        init.constant(m.weight, 1)
        init.constant(m.bias, 0)

class wide_basic(nn.Module):
    def __init__(self, in_planes, planes, dropout_rate, stride=1):
        super(wide_basic, self).__init__()
        self.bn1 = nn.BatchNorm2d(in_planes)
        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, padding=1, bias=True)
        self.dropout_rate = dropout_rate
        if self.dropout_rate>0:
            self.dropout = nn.Dropout(p=dropout_rate)
        self.bn2 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, padding=1, bias=True)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, planes, kernel_size=1, stride=stride, bias=True),
            )

    def forward(self, x):
        out = self.conv1(F.relu(self.bn1(x)))
        if self.dropout_rate>0:
            out = self.dropout(out)
        out = self.conv2(F.relu(self.bn2(out)))
        out += self.shortcut(x)

        return out

class Wide_ResNet(nn.Module):
    def __init__(self, dropout_rate=0, depth=28, widen_factor=2, num_classes=10):
        super(Wide_ResNet, self).__init__()
        self.in_planes = 16

        assert ((depth-4)%6 ==0), 'Wide-resnet depth should be 6n+4'
        n = (depth-4)//6
        k = widen_factor

        print('| Wide-Resnet %dx%d' %(depth, k))
        nStages = [16, 16*k, 32*k, 64*k]

        self.conv1 = conv3x3(3,nStages[0])
        self.layer1 = self._wide_layer(wide_basic, nStages[1], n, dropout_rate, stride=1)
        self.layer2 = self._wide_layer(wide_basic, nStages[2], n, dropout_rate, stride=2)
        self.layer3 = self._wide_layer(wide_basic, nStages[3], n, dropout_rate, stride=2)
        self.bn1 = nn.BatchNorm2d(nStages[3], momentum=0.9)
        self.linear = nn.Linear(nStages[3], num_classes)

    def _wide_layer(self, block, planes, num_blocks, dropout_rate, stride):
        strides = [stride] + [1]*(num_blocks-1)
        layers = []

        for stride in strides:
            layers.append(block(self.in_planes, planes, dropout_rate, stride))
            self.in_planes = planes

        return nn.Sequential(*layers)

    def forward(self, x, get_feat=False):
        out = self.conv1(x)
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = F.relu(self.bn1(out))
        out = F.avg_pool2d(out, 8)
        out = out.view(out.size(0), -1)
        
        if get_feat:
            return out
        else:
            return self.linear(out)

In [None]:
class NoiseNet:
  def __init__(self, name='default_net'):
    self.net = Wide_ResNet().cuda()
    self.name = name
    self.lr = 0.001
    self.optimizer = optim.SGD(self.net.parameters(), lr=self.lr, momentum=0.9, weight_decay=5e-4)
    self.hist = {'cleanloss':[], 'noisyloss':[], 'loss':[], 'test_acc':[], 'epoch':0}
    self.CE_loss = nn.CrossEntropyLoss()#F.cross_entropy()


  #saving and loading, FIX FOR WIDE RESNET
  def save_net(self, name='version_', path='/content/gdrive/MyDrive/dl_adv_proj/'):
    net_state ={'state_dict':self.net.state_dict(),'optimizer':self.optimizer.state_dict()}

    torch.save(net_state,  path+name+'net')
    torch.save(self.hist, path+name+'hist')
    print(f'saved at: {path+name}')

  def load_net(self, name='version_', path='/content/gdrive/MyDrive/dl_adv_proj/'):
    net_state  = torch.load(path+name+'net')
    self.net.load_state_dict(net_state['state_dict'])
    self.optimizer.load_state_dict(net_state['optimizer'])

    self.hist = torch.load(path+name+'hist')
    print(f'loaded from {path+name}')

  def eq1_func(self,sigma=1e-3):  
    # param_vector = parameters_to_vector(self.net.parameters())#
    # noise = sigma*torch.normal(0,1,size=param_vector.shape).cuda()
    # param_vector.add_(noise)
    # vector_to_parameters(param_vector,self.net.parameters())
    for param in self.net.parameters():
      #param.grad += sigma*torch.normal(0,1,size=param.grad.shape).cuda()
      param.grad += sigma*torch.randn_like(param.grad).cuda()

    return True

  def eq2_func(self,grad,sigma=1e-3):#loss,pred,sigma=1e-3):
    # logsoft = F.log_softmax(pred)
    # one_target = F.one_hot(target, num_classes=10)
    # presum = torch.mul(logsoft,one_target)#BEFORE SUMMING
    # #presum += sigma*torch.normal(0,1,size=presum.shape).cuda()
    # loss = -presum.sum(axis=1) #same as loss pre-reduction, checksum
    # mean = torch.mean(loss)
    #autog =  torch.autograd.grad(mean,pred)
    #print(f'mean auto grads {autog[0].shape}:{autog}')

    # loss.backward(inputs=pred,retain_graph=True)
    # pred.grad += sigma*torch.normal(0,1,size=pred.grad.shape).cuda()
    # pred.mean().backward()#torch.tensor(np.ones_like((pred.shape)),dtype=torch.float)

    #self.eq2_func(loss,pred,sigma)
    #loss.backward()
    #loss.backward(inputs=pred)
    #pred.grad += sigma*torch.normal(0,1,size=pred.grad.shape).cuda()
    #pred.backward(torch.tensor(np.ones_like((pred.shape)),dtype=torch.float))
    tensor_size = torch.numel(grad.data)
    noise = sigma*(torch.randn_like(grad.data)/np.sqrt(tensor_size)).cuda()
    #grad + sigma*torch.normal(0,1,size=grad.data.shape).cuda()
    return grad+noise

  def test_acc(self, test_dl):   
    self.net.eval()
    correct = 0
    
    with torch.no_grad():
      for data in test_dl:
        x, y = data[0].cuda(),data[1].cuda()
        output = self.net(x)
        _, predictions = torch.max(output, 1)
        for i in range(len(y)):
          if predictions[i] == y[i]:
            correct += 1 
    
    self.net.train() 

    return correct / 10000
      
  def train(self,dataloader, testdataloader, epochs,noise=True, ema=None,eq2=False,eq1=False, sigma=1e-3):
      epoch_range = tqdm(range(self.hist['epoch'],epochs))
      self.net.train()

      for epoch in epoch_range:
        total_loss = 0
        total_cleanl = 0
        total_noisyl = 0
        for i,(x,y) in enumerate(dataloader):#dataloaders give us (image,label) 

          if noise:            
            target = y[0].cuda()
            clean_indicator = y[1].cuda()
            x = x.cuda()
          else:
            x,target = x.cuda(),y.cuda()

          self.optimizer.zero_grad()
          pred = self.net(x)
          loss = F.cross_entropy(pred, target, reduction='none')

          if noise:
            clean_loss = torch.mean(torch.masked_select(loss,clean_indicator))
            noisy_loss = torch.mean(torch.masked_select(loss,~clean_indicator))
            total_cleanl += clean_loss.item()
            total_noisyl += noisy_loss.item()

          loss = torch.mean(loss)

          #eq2 has to be before loss.backward()
          if eq2: pred.register_hook(lambda grad,sigma=sigma: self.eq2_func(grad,sigma=sigma)) 
          loss.backward()

          #eq1 has to be before optimizer.step()
          if eq1: self.eq1_func(sigma)
          self.optimizer.step()

          if ema: ema.update()

          total_loss += loss.item()

          epoch_range.set_postfix({'Progress in epoch': i/390.25, ' _loss': loss.item() })
        self.hist['loss'].append(total_loss/(i+1))
        if noise:
          self.hist['cleanloss'].append(total_cleanl/(i+1))
          self.hist['noisyloss'].append(total_noisyl/(i+1))
        if (epoch % 1) == 0:
          test_acc = self.test_acc(testdataloader)
          print(test_acc)
          self.hist['test_acc'].append(test_acc)

        self.hist['epoch']+=1
    
        #self.save_net(name='--')#FOR SAVING LATER
        print(f'epoch:{epoch}')

  def instance_train(self,dataloader, testdataloader, epochs):
    epoch_range = tqdm(range(self.hist['epoch'],epochs))
    self.net.train()
    idx_pred=np.zeros((50000,10))#cifar10 train length
    for epoch in epoch_range:
      total_loss = 0
      total_cleanl = 0
      total_noisyl = 0
      for i,(x,y) in enumerate(dataloader):#dataloaders give us (image,label) 
        target = y[0].cuda()
        idx_indicator = y[1].cuda()
        x=x.cuda()

        self.optimizer.zero_grad()               

        pred = self.net(x)
        soft = F.softmax(pred)
        #print(f'log_soft:{log_soft[:2]}, \nsoft:{soft[:2]}')
        loss = F.cross_entropy(pred, target, reduce=False) 

        for i,s in enumerate(soft):
          idx_pred[idx_indicator[i]] += s.cpu().detach().numpy()

        loss = torch.sum(loss)

        #print(f'loss:{loss.item()}, clean+noise:{clean_loss.item()+noisy_loss.item()},clean:{clean_loss.item()}, noisy:{noisy_loss.item()}')

        loss.backward()
        self.optimizer.step()


        total_loss += loss.item()
        epoch_range.set_postfix({'Progress in epoch': i/390.25, ' _loss': loss.item() })
      self.hist['loss'].append(total_loss/(i+1))
      if (epoch % 5) == 0:
        test_acc = self.test_acc(testdataloader)
        print(test_acc)
        self.hist['test_acc'].append(test_acc)

      self.hist['epoch']+=1
  
      #self.save_net(name=self.name)#FOR SAVING LATER
      print(f'epoch:{epoch}')
    return idx_pred/epochs

In [None]:
¨testnet = NoiseNet()
#ema = ExponentialMovingAverage(testnet.net.parameters(), decay=0.999) #https://github.com/fadel/pytorch_ema
testnet.train(c10_train_dl, c10_test_dl, 10,eq1=True)



| Wide-Resnet 28x2


  0%|          | 0/10 [00:00<?, ?it/s]

KeyboardInterrupt: ignored

In [None]:
torch.autograd.set_detect_anomaly(True)

<torch.autograd.anomaly_mode.set_detect_anomaly at 0x7fed42c505d0>