In [1]:
import numpy as np
import struct
import os
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.utils.data as Data
import torchvision
from copy import deepcopy
from torchvision import datasets, transforms
import matplotlib.pyplot as plt
import time
from scipy import spatial
from util.util import mnist_noise

from trajectoryReweight.model import WeightedCrossEntropyLoss, TrajectoryReweightNN
from trajectoryReweight.baseline import StandardTrainingNN
from trajectoryReweight.gmm import GaussianMixture

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

def read_idx(filename):
    with open(filename, 'rb') as f:
        zero, data_type, dims = struct.unpack('>HBB', f.read(4))
        shape = tuple(struct.unpack('>I', f.read(4))[0] for d in range(dims))
        return np.frombuffer(f.read(), dtype=np.uint8).reshape(shape)

cuda:0


In [2]:
"""
CNN
"""
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.layer1 = nn.Sequential(
            nn.Conv2d(1, 32, kernel_size=5, stride=1, padding=2),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2))
        self.layer2 = nn.Sequential(
            nn.Conv2d(32, 64, kernel_size=5, stride=1, padding=2),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2))
        self.drop_out = nn.Dropout()
        self.fc1 = nn.Linear(7 * 7 * 64, 1000)
        self.fc2 = nn.Linear(1000, 10)

    def forward(self, x):
        out = self.layer1(x)
        out = self.layer2(out)
        out = out.reshape(out.size(0), -1)
        out = self.drop_out(out)
        out = self.fc1(out)
        out = self.fc2(out)
        return out
    
def accuracy(predict_y, test_y):
    score = 0
    for pred, acc in zip(predict_y, test_y):
        if pred == acc:
            score +=1
    return score / test_y.shape[0]

In [3]:
"""
MNIST DATA
"""
x_train = read_idx('data/train-images.idx3-ubyte')
y_train = read_idx('data/train-labels.idx1-ubyte')
x_test = read_idx('data/t10k-images.idx3-ubyte')
y_test = read_idx('data/t10k-labels.idx1-ubyte')
valid_idx = np.random.choice(range(60000), size=1000, replace=False)
x_valid = x_train[valid_idx]
y_valid = y_train[valid_idx]
x_train = np.delete(x_train, valid_idx, axis=0)
y_train = np.delete(y_train, valid_idx)

"""
shrink dataset to make noisy significant
"""
subset_idx = np.random.choice(range(60000-1000), size=10000, replace=False)
x_train = x_train[subset_idx]
y_train = y_train[subset_idx]

"""
Add Noise label to training data
"""
y_train_noisy, noise_index = mnist_noise(y_train,0.1)

In [4]:
"""
Initializaion
"""
train_idx = np.arange(len(x_train))
x_train = np.transpose(x_train,(2,1,0))
x_valid = np.transpose(x_valid,(2,1,0))
x_test = np.transpose(x_test,(2,1,0))
x_train_tensor = torchvision.transforms.ToTensor()(x_train).unsqueeze(1)
x_valid_tensor = torchvision.transforms.ToTensor()(x_valid).unsqueeze(1)
x_test_tensor = torchvision.transforms.ToTensor()(x_test).unsqueeze(1)
y_train_tensor = torch.from_numpy(y_train.astype(np.long))
y_train_noisy_tensor = torch.from_numpy(y_train_noisy.astype(np.long))
y_valid_tensor = torch.from_numpy(y_valid.astype(np.long))
y_test_tensor = torch.from_numpy(y_test.astype(np.long))

In [5]:
"""
CNN without reweight
"""
cnn = CNN()
cnn.to(device)

stand_trainNN = StandardTrainingNN(cnn,
                                   batch_size=64,
                                   num_iter=80,
                                   learning_rate=1e-3,
                                   early_stopping=7,
                                   device=device,
                                   iprint=1)
stand_trainNN.fit(x_train_tensor, y_train_noisy_tensor, x_valid_tensor, y_valid_tensor, x_test_tensor, y_test_tensor)

test_output_y = stand_trainNN.predict(x_test_tensor)
test_accuracy = accuracy(test_output_y, y_test_tensor.data.numpy())

print('test accuracy is {}%'.format(100 * test_accuracy))

Standard NN training...
epoch = 1 | training loss = 0.9581 | valid loss = 0.0067 | valid accuarcy = 93.8% | early stopping = 0/7 | test loss = 0.0062 | test accuarcy = 96.04% [9604/10000]
epoch = 2 | training loss = 0.7439 | valid loss = 0.0040 | valid accuarcy = 96.1% | early stopping = 0/7 | test loss = 0.0036 | test accuarcy = 97.31% [9731/10000]
epoch = 3 | training loss = 0.7070 | valid loss = 0.0037 | valid accuarcy = 96.3% | early stopping = 0/7 | test loss = 0.0032 | test accuarcy = 97.28% [9728/10000]
epoch = 4 | training loss = 0.6747 | valid loss = 0.0032 | valid accuarcy = 96.4% | early stopping = 0/7 | test loss = 0.0028 | test accuarcy = 97.48% [9748/10000]
epoch = 5 | training loss = 0.6577 | valid loss = 0.0032 | valid accuarcy = 97.2% | early stopping = 0/7 | test loss = 0.0028 | test accuarcy = 97.77% [9777/10000]
epoch = 6 | training loss = 0.6444 | valid loss = 0.0033 | valid accuarcy = 97.3% | early stopping = 1/7 | test loss = 0.0030 | test accuarcy = 98.03% [9803

In [6]:
"""
CNN with reweight
"""
cnn = CNN()
cnn.to(device)

tra_weightNN = TrajectoryReweightNN(cnn,
                                    burnin=3,
                                    num_cluster=10,
                                    batch_size=64,
                                    num_iter=80,
                                    learning_rate=1e-3,
                                    early_stopping=7,
                                    device=device,
                                    traj_step=1,
                                    iprint=2)
tra_weightNN.fit(x_train_tensor, y_train_noisy_tensor, x_valid_tensor, y_valid_tensor,x_test_tensor, y_test_tensor, noise_index)

test_output_y = tra_weightNN.predict(x_test_tensor)
test_accuracy = accuracy(test_output_y, y_test_tensor.data.numpy())

print('test accuracy is {}%'.format(100 * test_accuracy))

Train 3 burn-in epoch...
epoch = 1 | test loss = 0.0052 | test accuarcy = 94.84% [9484/10000]
epoch = 2 | test loss = 0.0045 | test accuarcy = 95.83% [9583/10000]
epoch = 3 | test loss = 0.0038 | test accuarcy = 96.94% [9694/10000]
Train 3 burn-in epoch complete.
------------------------------------------------------------
Trajectory clustering for burn-in epoch...
| - {0: 0, 'size': 1111, 'sim': '0.9801', 'num_spe': 0, 'spe_ratio': '0.0000'}
| - {1: 1, 'size': 566, 'sim': '-0.9384', 'num_spe': 545, 'spe_ratio': '0.9629'}
| - {2: 2, 'size': 1235, 'sim': '-0.9224', 'num_spe': 0, 'spe_ratio': '0.0000'}
| - {3: 3, 'size': 387, 'sim': '-0.9057', 'num_spe': 63, 'spe_ratio': '0.1628'}
| - {4: 4, 'size': 812, 'sim': '-0.9018', 'num_spe': 0, 'spe_ratio': '0.0000'}
| - {5: 5, 'size': 405, 'sim': '-0.9274', 'num_spe': 389, 'spe_ratio': '0.9605'}
| - {6: 6, 'size': 1236, 'sim': '-0.9229', 'num_spe': 0, 'spe_ratio': '0.0000'}
| - {7: 7, 'size': 1379, 'sim': '-0.9099', 'num_spe': 1, 'spe_ratio': '0

| - {0: 0, 'size': 305, 'sim': '0.8290', 'num_spe': 59, 'spe_ratio': '0.1934'}
| - {1: 1, 'size': 1994, 'sim': '0.9187', 'num_spe': 0, 'spe_ratio': '0.0000'}
| - {2: 2, 'size': 746, 'sim': '0.9301', 'num_spe': 0, 'spe_ratio': '0.0000'}
| - {3: 3, 'size': 635, 'sim': '-0.7738', 'num_spe': 634, 'spe_ratio': '0.9984'}
| - {4: 4, 'size': 308, 'sim': '-0.8929', 'num_spe': 305, 'spe_ratio': '0.9903'}
| - {5: 5, 'size': 1626, 'sim': '-0.8821', 'num_spe': 0, 'spe_ratio': '0.0000'}
| - {6: 6, 'size': 1694, 'sim': '-0.8802', 'num_spe': 0, 'spe_ratio': '0.0000'}
| - {7: 7, 'size': 1083, 'sim': '-0.8846', 'num_spe': 0, 'spe_ratio': '0.0000'}
| - {8: 8, 'size': 820, 'sim': '-0.8551', 'num_spe': 2, 'spe_ratio': '0.0024'}
| - {9: 9, 'size': 789, 'sim': '-0.8518', 'num_spe': 0, 'spe_ratio': '0.0000'}
epoch = 12 | training loss = 0.4567 | valid loss = 0.0026 | valid accuarcy = 97.4% | early stopping = 1/7 | test loss = 0.0024 | test accuarcy = 98.24% [9824/10000]
| - {0: 0, 'size': 1309, 'sim': '0.9910

| - {7: 7, 'size': 1903, 'sim': '-0.7508', 'num_spe': 0, 'spe_ratio': '0.0000'}
| - {8: 8, 'size': 1087, 'sim': '-0.7496', 'num_spe': 0, 'spe_ratio': '0.0000'}
| - {9: 9, 'size': 365, 'sim': '-0.8425', 'num_spe': 363, 'spe_ratio': '0.9945'}
epoch = 20 | training loss = 0.3000 | valid loss = 0.0017 | valid accuarcy = 97.7% | early stopping = 1/7 | test loss = 0.0014 | test accuarcy = 98.5% [9850/10000]
| - {0: 0, 'size': 1779, 'sim': '0.9587', 'num_spe': 0, 'spe_ratio': '0.0000'}
| - {1: 1, 'size': 968, 'sim': '0.9765', 'num_spe': 0, 'spe_ratio': '0.0000'}
| - {2: 2, 'size': 1740, 'sim': '0.9658', 'num_spe': 0, 'spe_ratio': '0.0000'}
| - {3: 3, 'size': 383, 'sim': '-0.8716', 'num_spe': 377, 'spe_ratio': '0.9843'}
| - {4: 4, 'size': 1611, 'sim': '-0.8614', 'num_spe': 0, 'spe_ratio': '0.0000'}
| - {5: 5, 'size': 578, 'sim': '-0.8268', 'num_spe': 578, 'spe_ratio': '1.0000'}
| - {6: 6, 'size': 415, 'sim': '-0.7973', 'num_spe': 45, 'spe_ratio': '0.1084'}
| - {7: 7, 'size': 766, 'sim': '-0.79

| - {2: 2, 'size': 1185, 'sim': '-0.4058', 'num_spe': 0, 'spe_ratio': '0.0000'}
| - {3: 3, 'size': 1376, 'sim': '-0.4058', 'num_spe': 0, 'spe_ratio': '0.0000'}
| - {4: 4, 'size': 435, 'sim': '-0.4057', 'num_spe': 0, 'spe_ratio': '0.0000'}
| - {5: 5, 'size': 397, 'sim': '-0.4764', 'num_spe': 393, 'spe_ratio': '0.9899'}
| - {6: 6, 'size': 1898, 'sim': '-0.4767', 'num_spe': 0, 'spe_ratio': '0.0000'}
| - {7: 7, 'size': 639, 'sim': '-0.4756', 'num_spe': 1, 'spe_ratio': '0.0016'}
| - {8: 8, 'size': 269, 'sim': '-0.4767', 'num_spe': 51, 'spe_ratio': '0.1896'}
| - {9: 9, 'size': 1715, 'sim': '-0.4770', 'num_spe': 0, 'spe_ratio': '0.0000'}
epoch = 29 | training loss = 0.0900 | valid loss = 0.0011 | valid accuarcy = 98.1% | early stopping = 1/7 | test loss = 0.0008 | test accuarcy = 98.71% [9871/10000]
| - {0: 0, 'size': 1775, 'sim': '0.9905', 'num_spe': 0, 'spe_ratio': '0.0000'}
| - {1: 1, 'size': 404, 'sim': '-0.4297', 'num_spe': 400, 'spe_ratio': '0.9901'}
| - {2: 2, 'size': 599, 'sim': '-0.4

| - {0: 0, 'size': 345, 'sim': '0.0436', 'num_spe': 341, 'spe_ratio': '0.9884'}
| - {1: 1, 'size': 1299, 'sim': '0.0436', 'num_spe': 0, 'spe_ratio': '0.0000'}
| - {2: 2, 'size': 1846, 'sim': '0.0436', 'num_spe': 0, 'spe_ratio': '0.0000'}
| - {3: 3, 'size': 2912, 'sim': '0.0436', 'num_spe': 0, 'spe_ratio': '0.0000'}
| - {4: 4, 'size': 1275, 'sim': '0.0439', 'num_spe': 0, 'spe_ratio': '0.0000'}
| - {5: 5, 'size': 518, 'sim': '0.0504', 'num_spe': 1, 'spe_ratio': '0.0019'}
| - {6: 6, 'size': 610, 'sim': '0.1365', 'num_spe': 610, 'spe_ratio': '1.0000'}
| - {7: 7, 'size': 934, 'sim': '0.1374', 'num_spe': 0, 'spe_ratio': '0.0000'}
| - {8: 8, 'size': 259, 'sim': '0.1451', 'num_spe': 48, 'spe_ratio': '0.1853'}
| - {9: 9, 'size': 2, 'sim': '0.1450', 'num_spe': 0, 'spe_ratio': '0.0000'}
epoch = 38 | training loss = 0.0404 | valid loss = 0.0007 | valid accuarcy = 98.5% | early stopping = 0/7 | test loss = 0.0006 | test accuarcy = 98.81% [9881/10000]
| - {0: 0, 'size': 339, 'sim': '-0.3576', 'num_s

In [None]:
"""
Different noise level
"""

noise_level = [0.1,0.2,0.4,0.6,0.8]
exp_summary = []

for level in noise_level:
    
    level_summary = {"noise_level": level, "std_test":0, "rewgt_test":0}       
        
    np.random.seed(int(time.time()))
    x_train = read_idx('data/train-images.idx3-ubyte')
    y_train = read_idx('data/train-labels.idx1-ubyte')
    x_test = read_idx('data/t10k-images.idx3-ubyte')
    y_test = read_idx('data/t10k-labels.idx1-ubyte')
    valid_idx = np.random.choice(range(60000), size=1000, replace=False)
    x_valid = x_train[valid_idx]
    y_valid = y_train[valid_idx]
    x_train = np.delete(x_train, valid_idx, axis=0)
    y_train = np.delete(y_train, valid_idx)

    y_train_noisy = mnist_noise(y_train, level) #apply noise level
 
    train_idx = np.arange(len(x_train))
    x_train = np.transpose(x_train,(2,1,0))
    x_valid = np.transpose(x_valid,(2,1,0))
    x_test = np.transpose(x_test,(2,1,0))
    x_train_tensor = torchvision.transforms.ToTensor()(x_train).unsqueeze(1)
    x_valid_tensor = torchvision.transforms.ToTensor()(x_valid).unsqueeze(1)
    x_test_tensor = torchvision.transforms.ToTensor()(x_test).unsqueeze(1)
    y_train_tensor = torch.from_numpy(y_train.astype(np.long))
    y_train_noisy_tensor = torch.from_numpy(y_train_noisy.astype(np.long))
    y_valid_tensor = torch.from_numpy(y_valid.astype(np.long))
    y_test_tensor = torch.from_numpy(y_test.astype(np.long))

    cnn = CNN()
    cnn.to(device)

    stand_trainNN = StandardTrainingNN(cnn,
                                       batch_size=64,
                                       num_iter=80,
                                       learning_rate=1e-3,
                                       early_stopping=5,
                                       device=device,
                                       iprint=0)
    stand_trainNN.fit(x_train_tensor, y_train_noisy_tensor, x_valid_tensor, y_valid_tensor, x_test_tensor, y_test_tensor)

    test_output_y = stand_trainNN.predict(x_test_tensor)
    test_accuracy = accuracy(test_output_y, y_test_tensor.data.numpy())
    level_summary['std_test'] = test_accuracy

    time.sleep(10)

    cnn = CNN()
    cnn.to(device)

    tra_weightNN = TrajectoryReweightNN(cnn,
                                        burnin=5,
                                        num_cluster=5,
                                        batch_size=64,
                                        num_iter=80,
                                        learning_rate=1e-3,
                                        early_stopping=5,
                                        device=device,
                                        traj_step = 3,
                                        iprint=0)
    tra_weightNN.fit(x_train_tensor, y_train_noisy_tensor, x_valid_tensor, y_valid_tensor,x_test_tensor, y_test_tensor)

    test_output_y = tra_weightNN.predict(x_test_tensor)
    test_accuracy = accuracy(test_output_y, y_test_tensor.data.numpy())
    level_summary['rewgt_test'] = test_accuracy

    time.sleep(10)
    
    exp_summary.append(level_summary)
    print('nosie level = {} finished running'.format(level))
    time.sleep(30)

In [None]:
noise_index == False

In [None]:
print(exp_summary)