## Autoencoder template

In [1]:
import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"   
os.environ["CUDA_VISIBLE_DEVICES"]="3"

In [2]:
import torch
from torch import nn
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import Variable
from torch.utils.data import Dataset, DataLoader

In [77]:
device = torch.device("cuda")

## get DATA

In [65]:
def int2onehot(x, n_class):
    ret = torch.zeros(n_class)
    ret[x] = 1
    return ret
int2onehot(3, 10)

tensor([0., 0., 0., 1., 0., 0., 0., 0., 0., 0.])

In [87]:
import torchvision.datasets as dset
import torchvision.transforms as transforms

def int2onehot(x, n_class):
    ret = torch.zeros(n_class)
    ret[x] = 1
    return ret
    
trans = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (1.0,))])
# if not exist, download mnist dataset
dir_s = '/home/bwlee/data'
train_set = dset.MNIST(root=dir_s, train=True, transform=trans, download=True)
test_set = dset.MNIST(root=dir_s, train=False, transform=trans, download=True)
# change 2D to 1D for MLP
# train_set
# [(tensor(batch, channel, x, y), label_one_hot)]
# train_set2
# [(tensor(batch, channel, x*y), label_one_hot)] 
n_class = 10
train_set2, test_set2 = [], []
for data in train_set:
    #train_set2.append([data[0].view(1,-1), int2onehot(data[1], n_class)])
    train_set2.append([data[0].view(-1), data[1]])
for data in test_set:
    #test_set2.append([data[0].view(1,-1), int2onehot(data[1], n_class)])
    test_set2.append([data[0].view(-1), data[1]])
    
batch_size = 512
# train_loader, test_loader has form of [data_batch, tgt_batch]
# data_batch in [batch, channel, x, y]
# data_batch in [batch]
train_loader = torch.utils.data.DataLoader(
    dataset=train_set2,
    batch_size=batch_size,
    shuffle=True
)
test_loader = torch.utils.data.DataLoader(
    dataset=test_set2,
    batch_size=batch_size,
    shuffle=False
)

In [88]:
print(test_set[1][0].size())
print(test_set[1][1])
print(len(test_set))
print(test_set2[1][0].size())
print(test_set2[1][1])
print(len(test_set2))

torch.Size([1, 28, 28])
2
10000
torch.Size([784])
2
10000


In [89]:
for i, data in enumerate(test_loader):
    if i>0: break
    print(data[0].size())
    print(data[1].size())
    print(data[0][:3])
    print(data[1][:3])

torch.Size([512, 784])
torch.Size([512])
tensor([[-0.5000, -0.5000, -0.5000,  ..., -0.5000, -0.5000, -0.5000],
        [-0.5000, -0.5000, -0.5000,  ..., -0.5000, -0.5000, -0.5000],
        [-0.5000, -0.5000, -0.5000,  ..., -0.5000, -0.5000, -0.5000]])
tensor([7, 2, 1])


In [82]:
def get_MLP(n_hiddens, activation=nn.ReLU(), dropout=0.1):
    def get_a_layer(n_in, n_out, activation, dropout):
        seq = [nn.Dropout(dropout), nn.Linear(n_in, n_out),
                activation]
        return seq
    layers = [get_a_layer(n_in, n_out, activation, dropout) for 
              n_in, n_out in zip(n_hiddens, n_hiddens[1:])]
    layers = [ x for xs in layers for x in xs ]
    return nn.Sequential(*layers)

In [120]:
class Model(nn.Module):
    def __init__(self, model=None, loss=None, 
                 optimizer=None):
        super(Model, self).__init__()
        self.model = model
        self.loss = loss
        self.optimizer = optimizer
    
    def run_batch(self, i_batch, data):
        self.optimizer.zero_grad()
        data_in, tgt = data
        data_in = data_in.to(device)
        tgt = tgt.to(device)
        out = self.model(data_in)
        loss = self.loss(out, tgt)
        loss.backward()
        self.optimizer.step()
        return loss.detach().cpu().item()
    
    def run_train(self, n_epoch, data):
        self.model.train()
        for i_epoch in range(n_epoch):
            loss = 0
            n_batch = len(data)
            for i_batch, data_batch in enumerate(data):
                loss += self.run_batch(i_batch, data_batch)
            #print(i_batch, n_batch)
            loss /= 1.0*n_batch
            print('epoch', i_epoch, 'loss', loss)
            
    def run_eval(self, data):
        self.model.eval()
        loss = 0
        for i_batch, data_batch in enumerate(data):
            data_in, tgt = data_batch
            out = self.model(data_in)
            loss += self.loss(out, tgt).detach().cpu()
        loss /= 1.0*i_batch
        return out, loss

In [121]:
class Autoencoder(Model):
    def __init__(self, model=None, loss=None, 
                 optimizer=None):
        super(Autoencoder, self).__init__(model, loss, optimizer)
    
    def run_batch(self, i_batch, data):
        self.optimizer.zero_grad()
        data_in, _ = data
        data_in = data_in.to(device)
        out = self.model(data_in)
        loss = self.loss(out, data_in)
        loss.backward()
        self.optimizer.step()
        return loss.detach().cpu().item()

In [117]:
model = Model()

## TEST

In [28]:
mlp = get_MLP([100, 200, 50])

In [29]:
mlp

Sequential(
  (0): Dropout(p=0.1, inplace=False)
  (1): Linear(in_features=100, out_features=200, bias=True)
  (2): ReLU()
  (3): Dropout(p=0.1, inplace=False)
  (4): Linear(in_features=200, out_features=50, bias=True)
  (5): ReLU()
)

### Classifier example

In [None]:
dim_mnist = 784

encoder = get_MLP([784, 300, 100, 10])
#decoder = get_MLP([100, 300, 784])
#ae_model = nn.Sequential(encoder, decoder)
encoder = encoder.to(device)

loss = nn.CrossEntropyLoss()
optimizer = optim.Adam(encoder.parameters())
classifier = Model(model=encoder, 
                loss=loss, optimizer=optimizer)
classifier.run_train(20, train_loader)

### Autoencoder example

In [123]:
dim_mnist = 784
dims = [784, 300, 300]
encoder = get_MLP(dims)
decoder = get_MLP(list(reversed(dims)))
ae_model = nn.Sequential(encoder, decoder)
ae_model = ae_model.to(device)

loss = nn.MSELoss()
optimizer = optim.Adam(ae_model.parameters())
ae = Autoencoder(model=ae_model, 
                loss=loss, optimizer=optimizer)
ae.run_train(100, train_loader)

epoch 0 loss 0.23071343237060613
epoch 1 loss 0.2276317797980066
epoch 2 loss 0.22480619521969455
epoch 3 loss 0.2231647442963164
epoch 4 loss 0.22215470797934775
epoch 5 loss 0.22136260569095612
epoch 6 loss 0.22082046267844863
epoch 7 loss 0.22053612200385433
epoch 8 loss 0.2203439912300999
epoch 9 loss 0.22011182154134168
epoch 10 loss 0.21999640005119778
epoch 11 loss 0.2198753744616347
epoch 12 loss 0.21977928886979314
epoch 13 loss 0.2196949321334645
epoch 14 loss 0.21961670217372603
epoch 15 loss 0.21953157941668722
epoch 16 loss 0.21940884991722592
epoch 17 loss 0.2193568220077935
epoch 18 loss 0.21931206763295805
epoch 19 loss 0.2192240796856961
epoch 20 loss 0.21917644585088147
epoch 21 loss 0.21914299918433366
epoch 22 loss 0.21911113077806213
epoch 23 loss 0.21905980132899042
epoch 24 loss 0.21902856546438346
epoch 25 loss 0.21899219387668675
epoch 26 loss 0.21892157247511007
epoch 27 loss 0.21891247165405145
epoch 28 loss 0.21887813685304028
epoch 29 loss 0.218881076170226

In [111]:
for x in ae_model.parameters():
    print(x)

Parameter containing:
tensor([[-0.0025,  0.0034, -0.0259,  ..., -0.0353, -0.0060, -0.0130],
        [ 0.0096, -0.0023,  0.0006,  ...,  0.0057,  0.0107, -0.0239],
        [ 0.0069,  0.0345,  0.0016,  ...,  0.0049, -0.0037,  0.0266],
        ...,
        [ 0.0059, -0.0077, -0.0157,  ..., -0.0220,  0.0125,  0.0081],
        [-0.0191,  0.0032, -0.0260,  ...,  0.0027,  0.0303, -0.0320],
        [-0.0301,  0.0239,  0.0299,  ..., -0.0351,  0.0192,  0.0254]],
       device='cuda:0', requires_grad=True)
Parameter containing:
tensor([ 1.1499e-02,  3.5574e-02, -1.5282e-02, -3.4488e-02, -6.6427e-03,
        -3.0787e-02,  8.8574e-03, -2.5748e-02,  3.1820e-03, -1.5123e-02,
        -3.5610e-02,  3.1792e-03,  1.4448e-02,  1.1403e-02,  3.5713e-02,
        -2.4667e-02,  1.9531e-02,  1.6594e-02,  2.1051e-02, -3.0093e-02,
         1.0338e-02,  2.0535e-02,  2.8502e-02,  2.2742e-02, -2.7530e-02,
         7.9542e-03,  2.6036e-02,  2.7786e-02, -3.9303e-03, -1.9438e-02,
         2.2545e-02, -3.3450e-03,  2.960