In [1]:
import os
import torch
import torch.nn as nn
from torch.autograd import Variable
import torchvision.datasets as dset
import torchvision.transforms as transforms
import torch.nn.functional as F
import torch.optim as optim
from sklearn.cluster import KMeans
## load mnist dataset
use_cuda = torch.cuda.is_available()
root = './data'
if not os.path.exists(root):
    os.mkdir(root)
trans = transforms.Compose([transforms.Resize(32), transforms.ToTensor(), transforms.Normalize((0.5,), (1.0,))])
# if not exist, download mnist dataset
train_set = dset.MNIST(root=root, train=True, transform=trans, download=True)
test_set = dset.MNIST(root=root, train=False, transform=trans, download=True)
batch_size = 100
train_loader = torch.utils.data.DataLoader(
                 dataset=train_set,
                 batch_size=batch_size,
                 shuffle=True)
test_loader = torch.utils.data.DataLoader(
                dataset=test_set,
                batch_size=batch_size,
                shuffle=False)

In [2]:
class DEC_AE(nn.Module):
    def __init__(self):
        super(DEC_AE,self).__init__()
        self.dropout = nn.Dropout(p=0.1)
        self.conv_ae1 = nn.Conv2d(1,50,4,stride=2,padding=2)
        self.conv_ae2 = nn.Conv2d(50,50,5,stride=2,padding=2)
        self.leReLU = nn.LeakyReLU()
        self.fc1 = nn.Linear(50*9*9,68)
        self.tanh = nn.Tanh()
        self.fc_de = nn.Linear(68,50*9*9)
        self.conv_de2 = nn.ConvTranspose2d(50,50,5,stride=2,padding=2)
        self.conv_de1 = nn.ConvTranspose2d(50,1,4,stride=2,padding=2)
        
        for m in self.modules():
            if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear):
                import scipy.stats as stats
                stddev = m.stddev if hasattr(m, 'stddev') else 0.1
                X = stats.truncnorm(-2, 2, scale=stddev)
                values = torch.Tensor(X.rvs(m.weight.numel()))
                values = values.view(m.weight.size())
                m.weight.data.copy_(values)
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)
                
    def forward(self,x):
        # 32x32x1
        x = self.dropout(x)
        # 32x32x1
        x = self.conv_ae1(x)
        # 17x17x50
        x = self.leReLU(x)
        # 17x17x50
        x = self.dropout(x)
        # 17x17x50
        x = self.conv_ae2(x)
        # 9x9x50
        x = self.leReLU(x)
        # 9x9x50
        x = self.dropout(x)
        # 9x9x50
        x = x.view(-1, 50*9*9)
        # 1x4050
        x = self.fc1(x)
        # 1x68
        x = self.tanh(x)
        
        x_ae = x # this is the returned auto encoder
        # 1x68
        ##### auto encoder is done, followed by decoder #####
        # 1x68
        x = self.fc_de(x)
        # 1x4050
        x = self.tanh(x)
        # 1x4050
        x = x.view(-1,50,9,9)
        # 9*9*50
        x = self.conv_de2(x)
        # 17x17x50
        x = self.leReLU(x)
        # 17x17x50
        x = self.conv_de1(x)
        # 32x32x1
        x = self.tanh(x)
        x_de = x # this is the returned decoder
        
        return x_ae, x_de

In [7]:
class BasicUnit(nn.Module):
    def __init__(self):
        super(BasicUnit,self).__init__()
        self.dropout = nn.Dropout(p=0.1)
        self.conv_a1 = nn.Conv2d(1,50,4,stride=2,padding=2)
        self.conv_a2 = nn.Conv2d(50,50,5,stride=2,padding=2)
        self.leReLU = nn.LeakyReLU()
        self.fca1 = nn.Linear(50*9*9,10)
        self.softmax = nn.Softmax()
        
        for m in self.modules():
            if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear):
                import scipy.stats as stats
                stddev = m.stddev if hasattr(m, 'stddev') else 0.1
                X = stats.truncnorm(-2, 2, scale=stddev)
                values = torch.Tensor(X.rvs(m.weight.numel()))
                values = values.view(m.weight.size())
                m.weight.data.copy_(values)
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)
                
    def forward(self,x):
        # 32x32x1
        x = self.dropout(x)        
        # 32x32x1
        x = self.conv_a1(x)
        # 17x17x50
        x = self.leReLU(x)
        # 17x17x50
        x = self.dropout(x)
        # 17x17x50
        x = self.conv_a2(x)
        # 9x9x50
        x = self.leReLU(x)
        # 9x9x50
        x = self.dropout(x)
        # 9x9x50
        x = x.view(-1, 50*9*9)
        # 1x4050
        x = self.fca1(x)
        # 1x68
        x = self.softmax(x)
        return x
    
class DTC(nn.Module):
    def __init__(self):
        super(DTC,self).__init__()
        self.unit_a = BasicUnit()
        self.unit_b = BasicUnit()
    
    def forward(self,x):
        x1 = self.unit_a(x)
        x2 = self.unit_b(x)
        
        return x1,x2




In [4]:
import numpy as np
from sklearn.metrics import normalized_mutual_info_score, adjusted_rand_score

nmi = normalized_mutual_info_score
ari = adjusted_rand_score


def acc(y_true, y_pred):
    """
    Calculate clustering accuracy. Require scikit-learn installed
    # Arguments
        y: true labels, numpy.array with shape `(n_samples,)`
        y_pred: predicted labels, numpy.array with shape `(n_samples,)`
    # Return
        accuracy, in [0,1]
    """
    y_true = y_true.astype(np.int64)
    assert y_pred.size == y_true.size
    D = max(y_pred.max(), y_true.max()) + 1
    w = np.zeros((D, D), dtype=np.int64)
    for i in range(y_pred.size):
        w[y_pred[i], y_true[i]] += 1
    from sklearn.utils.linear_assignment_ import linear_assignment
    ind = linear_assignment(w.max() - w)
    return sum([w[i, j] for i, j in ind]) * 1.0 / y_pred.size

In [5]:
class DEC:
    """The class for controlling the training process of DEC"""
    def pretrain(self,train_loader, test_loader, epochs):
        
        dec_ae = DEC_AE().cuda() #auto encoder
        mseloss = nn.MSELoss()
        optimizer = optim.Adam(dec_ae.parameters())
        
        for epoch in range(epochs):
            dec_ae.train()
            running_loss=0.0
            for i,data in enumerate(train_loader):
                x, label = data
                x,label=Variable(x).cuda(),Variable(label).cuda()
                optimizer.zero_grad()
                x_ae,x_de = dec_ae(x)
                loss = F.mse_loss(x_de,x).mean() #mseloss(x_de,x) # so the aim is to minimize the reconstruct error
                loss.backward()
                optimizer.step()
                # print statistics
                running_loss += loss.data.cpu().numpy()[0]
                if i % 100 == 99:    # print every 2000 mini-batches
                    print('[%d, %5d] loss: %.7f' %
                          (epoch + 1, i + 1, running_loss / 100))
                    #print('x_de:',x_de, x)
                    running_loss = 0.0
            #now we evaluate the accuracy with AE
            dec_ae.eval()
            for i,data in enumerate(test_loader):
                x, label = data
                x=Variable(x).cuda()
                x_ae,_ = dec_ae(x)
                x_ae = x_ae.data.cpu().numpy()
                label = label.cpu().numpy()
                km = KMeans(n_clusters=len(np.unique(label)), n_init=20, n_jobs=4)
                y_pred = km.fit_predict(x_ae)
                print(' '*8 + '|==>  acc: %.4f,  nmi: %.4f  <==|'
                          % (acc(label, y_pred), nmi(label, y_pred)))
                break
                

In [21]:
class DTC_trainer:
    """The class for controlling the training process of DEC"""
    
    
    def target_distribution(self,q):
        weight = q ** 2 / q.sum(0)
        return Variable(((weight.t() / weight.sum(1)).t()).data,requires_grad=False)
    
    def pretrain(self,train_loader, test_loader, epochs):
        
        dtc = DTC().cuda() #auto encoder
        mseloss = nn.MSELoss()
        optimizer = optim.Adam(dtc.parameters())
        
        for epoch in range(epochs):
            dtc.train()
            running_loss=0.0
            for i,data in enumerate(train_loader):
                x, label = data
                x,label=Variable(x).cuda(),Variable(label).cuda()
                optimizer.zero_grad()
                x_1,x_2 = dtc(x)
                td = self.target_distribution(x_1)
                loss =  F.kl_div(x_2,td,reduce=True) #mseloss(x_de,x) # so the aim is to minimize the reconstruct error
                loss.backward()
                optimizer.step()
                # print statistics
                running_loss += loss.data.cpu().numpy()[0]
                if i % 100 == 99:    # print every 2000 mini-batches
                    print('[%d, %5d] loss: %.7f' %
                          (epoch + 1, i + 1, running_loss / 100))
                    #print('x_de:',x_de, x)
                    running_loss = 0.0
            #now we evaluate the accuracy with AE
            dtc.eval()
            for i,data in enumerate(test_loader):
                x, label = data
                x=Variable(x).cuda()
                x_ae,_ = dtc(x)
                x_ae = x_ae.data.cpu().numpy()
                label = label.cpu().numpy()
                y_pred = np.argmax(x_ae,axis=1)
                print('y_pred',y_pred)
                print(' '*8 + '|==>  acc: %.4f,  nmi: %.4f  <==|'
                          % (acc(label, y_pred), nmi(label, y_pred)))
                break
import random
random.seed(7)
dtc = DTC_trainer()
dtc.pretrain(train_loader, test_loader, 20)



[1,   100] loss: -0.1554796
[1,   200] loss: -0.1554712
[1,   300] loss: -0.1556141
[1,   400] loss: -0.1555810
[1,   500] loss: -0.1560651
[1,   600] loss: -0.1554027
y_pred [0 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 4 1 1 0 0 0 0 0 0 0 4 0 0 0 0 1 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 1 0 4 0 1 0 4 0 0 0 0 0 4 0 0 0 0 0 4 0 0 0
 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1]
        |==>  acc: 0.2200,  nmi: 0.1432  <==|
[2,   100] loss: -0.1554729
[2,   200] loss: -0.1555303
[2,   300] loss: -0.1559069
[2,   400] loss: -0.1553018
[2,   500] loss: -0.1555729
[2,   600] loss: -0.1557736
y_pred [0 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 4 1 1 0 0 0 0 0 0 0 4 0 0 0 0 1 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 1 0 4 0 1 0 4 0 0 0 0 0 4 0 0 0 0 0 4 0 0 0
 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1]
        |==>  acc: 0.2200,  nmi: 0.1432  <==|
[3,   100] loss: -0.1558164
[3,   200] loss: -0.1556884
[3,   300] loss: -0.1558963
[3,   400] loss: -0.1556924
[3,   500] loss: -0.1560506
[3,   600]

KeyboardInterrupt: 

In [8]:
#now start training
import random
random.seed(7)
dec = DEC()
dec.pretrain(train_loader, test_loader, 20)

[1,   100] loss: 0.0645982
[1,   200] loss: 0.0546203
[1,   300] loss: 0.0410921
[1,   400] loss: 0.0157458
[1,   500] loss: 0.0102162
[1,   600] loss: 0.0079046
        |==>  acc: 0.6000,  nmi: 0.6467  <==|
[2,   100] loss: 0.0066989
[2,   200] loss: 0.0059005
[2,   300] loss: 0.0052914
[2,   400] loss: 0.0048581
[2,   500] loss: 0.0045226
[2,   600] loss: 0.0042733
        |==>  acc: 0.5700,  nmi: 0.6122  <==|
[3,   100] loss: 0.0040128
[3,   200] loss: 0.0038080
[3,   300] loss: 0.0036345
[3,   400] loss: 0.0034556
[3,   500] loss: 0.0033510
[3,   600] loss: 0.0032324
        |==>  acc: 0.5300,  nmi: 0.5939  <==|
[4,   100] loss: 0.0031155
[4,   200] loss: 0.0030300
[4,   300] loss: 0.0029553
[4,   400] loss: 0.0028632
[4,   500] loss: 0.0028077
[4,   600] loss: 0.0027246
        |==>  acc: 0.5100,  nmi: 0.5617  <==|
[5,   100] loss: 0.0026454
[5,   200] loss: 0.0026348
[5,   300] loss: 0.0025579
[5,   400] loss: 0.0025043
[5,   500] loss: 0.0024836
[5,   600] loss: 0.0024285
      

In [None]:
dtc = DTC()
