In [1]:
import os, sys
import torch
import torch.nn as nn
from torch.nn import Parameter
from torch.autograd import Variable
import torchvision
import torchvision.datasets as datasets
import torchvision.transforms as transforms
import torch.nn.functional as F
import torch.optim as optim
from tqdm import tqdm

trans = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (1.0,))])
train_set = datasets.MNIST('../datasets/mnist', train=True, download=True, transform=trans)
test_set = datasets.MNIST('../datasets/mnist', train=False, download=True, transform=trans)

batch_size = 100

train_loader = torch.utils.data.DataLoader(
                 dataset=train_set,
                 batch_size=batch_size,
                 shuffle=True)

print('==>>> total trainning batch number: {}'.format(len(train_loader)))

os.environ["CUDA_VISIBLE_DEVICES"]="1"

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Processing...
Done!
==>>> total trainning batch number: 600


In [None]:
class NADE(nn.Module):
    
    def __init__(self, num_feas, num_hidden_dim):
        super(NADE, self).__init__()
        
        self.num_feas = num_feas
        self.num_hidden_dim = num_hidden_dim

        self.C = Parameter(torch.randn(1, num_hidden_dim))
        self.W = Parameter(torch.randn(num_feas, num_hidden_dim))
        self.B = Parameter(torch.randn(num_feas))
        
    def forward(self, batch_x):
        prob_mat = torch.empty(self.num_feas).type(torch.cuda.FloatTensor)
        prob_mat[0] = F.sigmoid(torch.mv(self.C, self.W[0]) + self.B[0])
        loss = 0.
        for x in batch_x:
            for i in range(1, self.num_feas):
                t = x[:i].unsqueeze(0)
                h = F.sigmoid(torch.mm(t, self.W[:i]) + self.C)
                prob_mat[i] = F.sigmoid(torch.mv(h, self.W[i]) + self.B[i])
            loss += F.binary_cross_entropy(prob_mat, (x > 0).type(torch.cuda.FloatTensor))
        return loss 

In [None]:
model = NADE(784, 32)
model = model.cuda()

optimizer = optim.SGD(model.parameters(), lr=0.0001, momentum=0.9)

i = 0
for batch_x, batch_y in tqdm(train_loader):
    batch_x = batch_x.view(-1, 784)
    
    optimizer.zero_grad()

    loss = model(batch_x.cuda())
    loss.backward()
    
    optimizer.step()
    
    i+=1
    if i % 100 == 0:
        print(loss.item())

In [None]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt

In [None]:
plt.imshow(batch_x[15].view(28,28) > 0.0, cmap='gray')