In [1]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from mpl_toolkits.mplot3d import Axes3D

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils import data

import random, os, pathlib, time
from tqdm import tqdm
from sklearn import datasets

In [2]:
device = torch.device("cuda:1")
# device = torch.device("cpu")

## MNIST dataset

In [3]:
import mylibrary.datasets as datasets
import mylibrary.nnlib as tnn

In [4]:
mnist = datasets.FashionMNIST()
# mnist.download_mnist()
# mnist.save_mnist()
train_data, train_label_, test_data, test_label_ = mnist.load()

train_data = train_data / 255.
test_data = test_data / 255.

# train_label = tnn.Logits.index_to_logit(train_label_)
train_size = len(train_label_)

In [5]:
## converting data to pytorch format
train_data = torch.Tensor(train_data)
test_data = torch.Tensor(test_data)
train_label = torch.LongTensor(train_label_)
test_label = torch.LongTensor(test_label_)

In [6]:
input_size = 784
output_size = 10

In [7]:
class MNIST_Dataset(data.Dataset):
    
    def __init__(self, data, label):
        self.data = data
        self.label = label
        
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        img, lbl = self.data[idx], self.label[idx]
        return img, lbl

In [8]:
train_dataset = MNIST_Dataset(train_data, train_label)
test_dataset = MNIST_Dataset(test_data, test_label)

In [9]:
batch_size = 50
train_loader = data.DataLoader(dataset=train_dataset,
                                    num_workers=4, 
                                    batch_size=batch_size, 
                                    shuffle=True)

test_loader = data.DataLoader(dataset=test_dataset,
                                    num_workers=4, 
                                    batch_size=batch_size, 
                                    shuffle=False)

In [14]:
class DistanceTransform(nn.Module):
    
    def __init__(self, input_dim, num_centers):
        super().__init__()
        self.input_dim = input_dim
        self.num_centers = num_centers
        
        self.centers = torch.randn(num_centers, input_dim)/3.
        self.centers = nn.Parameter(self.centers)
        
    def forward(self, x):
        x = x[:, :self.input_dim]
        dists = torch.cdist(x, self.centers)
#         print(dists.shape)
#         dists = dists/np.sqrt(self.input_dim) ### correction to make diagonal of unit square 1 in nD space
        return dists
    
    def reverse(self, x):
        ### here x is the distance, so weighted distance is the reversed mode
        ##### for a single input, y = mean(x_i * center_i : for i in range(num_centers))
        pass

In [15]:
dt = DistanceTransform(784, 784)

In [16]:
list(dt.parameters())

[Parameter containing:
 tensor([[ 0.0809, -0.5722,  0.0215,  ...,  0.5122,  0.1792, -0.5046],
         [ 0.2112,  0.9331, -0.3379,  ..., -0.0551,  0.0880,  0.4146],
         [ 0.0876,  0.1773, -0.4028,  ..., -0.4853, -0.1762, -0.2653],
         ...,
         [ 0.1757, -0.2568,  0.1863,  ...,  0.0192, -0.1767,  0.1164],
         [ 0.4840, -0.2985,  0.1743,  ..., -0.0716,  0.1156, -0.0039],
         [-0.7521, -0.0627,  0.1564,  ..., -0.0910, -0.0911, -0.3136]],
        requires_grad=True)]

In [17]:
dt(torch.randn(2, 784)).shape

torch.Size([2, 784])

In [18]:
x=train_dataset[[3,7]][0]
dists = dt(x)
x.std(), dists.std()

(tensor(0.3944), tensor(4.4122, grad_fn=<StdBackward0>))

In [19]:
dists.max(), dists.min()

(tensor(23.2282, grad_fn=<MaxBackward1>),
 tensor(12.3328, grad_fn=<MinBackward1>))

In [20]:
dt.centers.shape

torch.Size([784, 784])

In [21]:
### single variable inverse
x = torch.randn(1, 2)
x

tensor([[ 0.3183, -1.5020]])

In [22]:
centers = torch.randn(2, 2) ## output_dim / num_centers, input_dim
y = torch.cdist(x, centers.t())  
y.shape

torch.Size([1, 2])

In [23]:
y

tensor([[0.3355, 2.3012]])

In [24]:
weighted_sum = 0
weight = (1/y).sum()
for i in range(len(centers)):
    weighted_sum += centers[i]*(1/y[0,i])/weight
weighted_sum

tensor([-0.2091, -1.1646])

In [21]:
##### test for inverse
x = torch.randn(2, 4)

In [22]:
centers = torch.randn(4, 4) ## output_dim / num_centers, input_dim
y = torch.cdist(x, centers)  
y.shape

torch.Size([2, 4])

In [23]:
y

tensor([[3.2547, 2.3560, 3.5674, 2.1832],
        [1.8720, 2.1646, 1.7646, 1.8540]])

In [24]:
(y.unsqueeze(2)*centers).shape

torch.Size([2, 4, 4])

In [25]:
z = (y.unsqueeze(2)*centers.t()).sum(dim=2)/y.sum(dim=1, keepdim=True)
z.shape

torch.Size([2, 4])

In [26]:
z

tensor([[-0.8764,  0.2324, -0.2695,  0.4836],
        [-0.7481,  0.3169, -0.1978,  0.6095]])

In [27]:
x

tensor([[ 1.9518,  0.0419,  0.0577,  0.8557],
        [-0.5095,  0.8375, -0.1642, -0.9393]])

In [40]:
model = nn.Sequential(
#                 DistanceTransform(784, 784),
                nn.BatchNorm1d(784),
                nn.Linear(784, 200),
                nn.LeakyReLU(),
                nn.Linear(200, 50),
                nn.LeakyReLU(),
                nn.Linear(50, 10))
model.to(device)

Sequential(
  (0): BatchNorm1d(784, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (1): Linear(in_features=784, out_features=200, bias=True)
  (2): LeakyReLU(negative_slope=0.01)
  (3): Linear(in_features=200, out_features=50, bias=True)
  (4): LeakyReLU(negative_slope=0.01)
  (5): Linear(in_features=50, out_features=10, bias=True)
)

In [41]:
optimizer = optim.Adam(list(model.parameters()), 
                            lr=0.0003, weight_decay=1e-15)
criterion = nn.CrossEntropyLoss()

In [42]:
index = 0
train_accs, test_accs = [], []
for epoch in tqdm(list(range(40))):
    train_acc = 0
    train_count = 0
    for xx, yy in train_loader:
        xx, yy = xx.to(device), yy.to(device)
        yout = model(xx)
        loss = criterion(yout, yy)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        outputs = torch.argmax(yout, dim=1).data.cpu().numpy()
        correct = (outputs == yy.data.cpu().numpy()).astype(float).sum()
        train_acc += correct
        train_count += len(outputs)

    train_accs.append(float(train_acc)/train_count*100)
    train_acc = 0
    train_count = 0

    print(f'Epoch: {epoch}:{index},  Loss:{float(loss)}')
    test_count = 0
    test_acc = 0
    for xx, yy in test_loader:
        xx, yy = xx.to(device), yy.to(device)
        with torch.no_grad():
            yout = model(xx)
        outputs = torch.argmax(yout, dim=1).data.cpu().numpy()
        correct = (outputs == yy.data.cpu().numpy()).astype(float).sum()
        test_acc += correct
        test_count += len(xx)
    test_accs.append(float(test_acc)/test_count*100)
    print(f'Train Acc:{train_accs[-1]:.2f}%, Test Acc:{test_accs[-1]:.2f}%')
    print()

### after each class index is finished training
print(f'\t-> MAX Train Acc {max(train_accs)} ; Test Acc {max(test_accs)}')

  0%|          | 0/40 [00:00<?, ?it/s]

Epoch: 0:0,  Loss:0.29208049178123474


  2%|▎         | 1/40 [00:03<02:16,  3.49s/it]

Train Acc:81.78%, Test Acc:85.08%

Epoch: 1:0,  Loss:0.3728877902030945


  5%|▌         | 2/40 [00:06<02:12,  3.50s/it]

Train Acc:86.54%, Test Acc:85.89%

Epoch: 2:0,  Loss:0.317286878824234


  8%|▊         | 3/40 [00:10<02:03,  3.35s/it]

Train Acc:87.80%, Test Acc:86.55%

Epoch: 3:0,  Loss:0.3154541254043579


 10%|█         | 4/40 [00:13<02:01,  3.36s/it]

Train Acc:88.73%, Test Acc:87.24%

Epoch: 4:0,  Loss:0.22585216164588928


 12%|█▎        | 5/40 [00:16<01:56,  3.34s/it]

Train Acc:89.39%, Test Acc:87.06%

Epoch: 5:0,  Loss:0.312459260225296


 15%|█▌        | 6/40 [00:19<01:51,  3.27s/it]

Train Acc:89.87%, Test Acc:87.18%

Epoch: 6:0,  Loss:0.2763053774833679


 18%|█▊        | 7/40 [00:23<01:49,  3.31s/it]

Train Acc:90.62%, Test Acc:87.69%

Epoch: 7:0,  Loss:0.3330936133861542


 20%|██        | 8/40 [00:26<01:47,  3.35s/it]

Train Acc:90.98%, Test Acc:88.02%

Epoch: 8:0,  Loss:0.331051230430603


 22%|██▎       | 9/40 [00:30<01:43,  3.34s/it]

Train Acc:91.36%, Test Acc:87.94%

Epoch: 9:0,  Loss:0.225890651345253


 25%|██▌       | 10/40 [00:33<01:41,  3.38s/it]

Train Acc:91.66%, Test Acc:88.47%

Epoch: 10:0,  Loss:0.06453756988048553


 28%|██▊       | 11/40 [00:36<01:36,  3.32s/it]

Train Acc:92.14%, Test Acc:88.32%

Epoch: 11:0,  Loss:0.10818453133106232


 30%|███       | 12/40 [00:40<01:33,  3.36s/it]

Train Acc:92.47%, Test Acc:88.67%

Epoch: 12:0,  Loss:0.17215467989444733


 32%|███▎      | 13/40 [00:43<01:30,  3.34s/it]

Train Acc:92.91%, Test Acc:88.78%

Epoch: 13:0,  Loss:0.10279855132102966


 35%|███▌      | 14/40 [00:46<01:27,  3.36s/it]

Train Acc:93.15%, Test Acc:88.82%

Epoch: 14:0,  Loss:0.15734629333019257


 38%|███▊      | 15/40 [00:50<01:22,  3.31s/it]

Train Acc:93.42%, Test Acc:88.47%

Epoch: 15:0,  Loss:0.33542683720588684


 40%|████      | 16/40 [00:53<01:20,  3.37s/it]

Train Acc:93.52%, Test Acc:88.51%

Epoch: 16:0,  Loss:0.2011193335056305


 42%|████▎     | 17/40 [00:57<01:17,  3.37s/it]

Train Acc:93.90%, Test Acc:88.61%

Epoch: 17:0,  Loss:0.15027286112308502


 45%|████▌     | 18/40 [01:00<01:13,  3.34s/it]

Train Acc:94.08%, Test Acc:88.23%

Epoch: 18:0,  Loss:0.09197285771369934


 48%|████▊     | 19/40 [01:03<01:10,  3.34s/it]

Train Acc:94.40%, Test Acc:88.56%

Epoch: 19:0,  Loss:0.17155756056308746


 50%|█████     | 20/40 [01:07<01:07,  3.39s/it]

Train Acc:94.50%, Test Acc:88.68%

Epoch: 20:0,  Loss:0.10575899481773376


 52%|█████▎    | 21/40 [01:10<01:04,  3.41s/it]

Train Acc:94.73%, Test Acc:88.83%

Epoch: 21:0,  Loss:0.15065334737300873


 55%|█████▌    | 22/40 [01:14<01:01,  3.43s/it]

Train Acc:94.99%, Test Acc:88.25%

Epoch: 22:0,  Loss:0.0940486490726471


 57%|█████▊    | 23/40 [01:17<00:58,  3.45s/it]

Train Acc:95.09%, Test Acc:87.93%

Epoch: 23:0,  Loss:0.10237213969230652


 60%|██████    | 24/40 [01:20<00:54,  3.42s/it]

Train Acc:95.28%, Test Acc:88.75%

Epoch: 24:0,  Loss:0.1218610480427742


 62%|██████▎   | 25/40 [01:24<00:50,  3.37s/it]

Train Acc:95.47%, Test Acc:88.12%

Epoch: 25:0,  Loss:0.067459836602211


 65%|██████▌   | 26/40 [01:27<00:46,  3.31s/it]

Train Acc:95.71%, Test Acc:88.18%

Epoch: 26:0,  Loss:0.06179826706647873


 68%|██████▊   | 27/40 [01:30<00:42,  3.31s/it]

Train Acc:95.85%, Test Acc:88.80%

Epoch: 27:0,  Loss:0.11750123649835587


 70%|███████   | 28/40 [01:33<00:39,  3.32s/it]

Train Acc:95.99%, Test Acc:88.14%

Epoch: 28:0,  Loss:0.13930989801883698


 72%|███████▎  | 29/40 [01:37<00:35,  3.25s/it]

Train Acc:96.10%, Test Acc:88.80%

Epoch: 29:0,  Loss:0.13838665187358856


 75%|███████▌  | 30/40 [01:40<00:32,  3.27s/it]

Train Acc:96.16%, Test Acc:88.12%

Epoch: 30:0,  Loss:0.13982413709163666


 78%|███████▊  | 31/40 [01:43<00:28,  3.21s/it]

Train Acc:96.21%, Test Acc:88.66%

Epoch: 31:0,  Loss:0.032835010439157486


 80%|████████  | 32/40 [01:46<00:25,  3.21s/it]

Train Acc:96.33%, Test Acc:88.13%

Epoch: 32:0,  Loss:0.01711474172770977


 82%|████████▎ | 33/40 [01:49<00:22,  3.23s/it]

Train Acc:96.61%, Test Acc:89.01%

Epoch: 33:0,  Loss:0.10650651156902313


 85%|████████▌ | 34/40 [01:53<00:19,  3.32s/it]

Train Acc:96.63%, Test Acc:88.54%

Epoch: 34:0,  Loss:0.09793972223997116


 88%|████████▊ | 35/40 [01:56<00:16,  3.29s/it]

Train Acc:96.73%, Test Acc:88.60%

Epoch: 35:0,  Loss:0.08458631485700607


 90%|█████████ | 36/40 [01:59<00:13,  3.27s/it]

Train Acc:96.71%, Test Acc:88.67%

Epoch: 36:0,  Loss:0.03837250545620918


 92%|█████████▎| 37/40 [02:03<00:09,  3.27s/it]

Train Acc:96.81%, Test Acc:87.97%

Epoch: 37:0,  Loss:0.05911695584654808


 95%|█████████▌| 38/40 [02:06<00:06,  3.30s/it]

Train Acc:97.09%, Test Acc:88.74%

Epoch: 38:0,  Loss:0.09802291542291641


 98%|█████████▊| 39/40 [02:10<00:03,  3.37s/it]

Train Acc:97.07%, Test Acc:88.63%

Epoch: 39:0,  Loss:0.10097994655370712


100%|██████████| 40/40 [02:13<00:00,  3.33s/it]

Train Acc:97.25%, Test Acc:88.52%

	-> MAX Train Acc 97.255 ; Test Acc 89.01





In [35]:
# -> MAX Train Acc 87.678 ; Test Acc 85.87  ### after distance transformation
# -> MAX Train Acc 97.255 ; Test Acc 89.01  ### normal nn

In [36]:
model.eval()
dists = model[0](train_dataset[[0,3,7]][0].to(device))
dists = model[1](dists)
model.train()
dists.shape

torch.Size([3, 784])

In [37]:
dists.mean()

tensor(0.8673, device='cuda:1', grad_fn=<MeanBackward0>)

In [38]:
dists.std()

tensor(1.3996, device='cuda:1', grad_fn=<StdBackward>)

In [39]:
dists[0]

tensor([0.6848, 1.3316, 0.7227, 0.5346, 1.1709, 1.3659, 0.5240, 0.4794, 0.7535,
        1.0121, 1.1492, 0.5022, 0.9565, 1.4633, 0.6267, 1.2688, 0.4046, 1.2330,
        1.0712, 0.6266, 0.6519, 0.6877, 1.1010, 0.4282, 0.6463, 1.4526, 0.8620,
        0.5370, 0.5170, 0.8810, 0.4652, 0.6724, 0.5919, 0.9145, 0.7398, 1.3985,
        0.8407, 0.4311, 0.6553, 1.4876, 1.3120, 1.3471, 0.9884, 0.6859, 0.6714,
        1.5286, 1.1423, 0.7222, 0.8848, 1.4915, 0.5261, 0.6724, 0.7562, 1.3607,
        0.2688, 1.8160, 1.1118, 0.5817, 0.8911, 1.5470, 0.9284, 1.4022, 0.3211,
        0.7805, 0.3566, 0.7487, 1.1619, 1.1347, 1.3580, 1.0779, 0.9561, 0.7349,
        0.1853, 1.3280, 0.5563, 1.0959, 1.1058, 1.5512, 1.2634, 1.0623, 0.9049,
        1.0813, 0.6733, 0.6099, 0.7811, 2.0137, 0.5007, 0.4348, 1.4182, 0.7736,
        0.9781, 0.8759, 0.6814, 0.3576, 1.0184, 0.2800, 1.4223, 0.8915, 0.6941,
        1.1333, 0.8379, 1.8258, 0.5853, 1.0714, 0.9740, 0.9794, 1.4452, 1.2711,
        1.6786, 1.4298, 1.2954, 1.1850, 