In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt
import numpy as np

In [2]:
torch.manual_seed(0)
torch.cuda.manual_seed(0)
torch.cuda.manual_seed_all(0)

In [3]:
if torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')

In [None]:
import torchvision
import torchvision.transforms as transforms

train_dataset = torchvision.datasets.MNIST(root="MNIST_data/",
                                train=True,
                                transform = transforms.ToTensor(),
                                download=True)

test_dataset = torchvision.datasets.MNIST(root="MNIST_data/",
                                train = False,
                                transform = transforms.ToTensor(),
                                download=True)


In [6]:
batch_size = 128

train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size)

In [7]:
class Encoder1(nn.Module):
    def __init__(self):
        super(Encoder1, self).__init__()
        self.linear = nn.Linear(784, 256)
        self.activation = nn.Sigmoid()

    def forward(self, x):
        x = self.linear(x)
        x = self.activation(x)
        return x

class Decoder1(nn.Module):
    def __init__(self):
        super(Decoder1, self).__init__()
        self.linear = nn.Linear(256, 784)
        self.activation = nn.Sigmoid()

    def forward(self, x):
        x = self.linear(x)
        x = self.activation(x)
        return x

class AutoEncoder1(nn.Module):
    def __init__(self):
        super(AutoEncoder1, self).__init__()
        self.encoder = Encoder1()
        self.decoder = Decoder1()

    def forward(self, x):
        z = self.encoder(x)
        x_hat = self.decoder(z)
        return z, x_hat



In [8]:
class Encoder2(nn.Module):
    def __init__(self):
        super(Encoder2, self).__init__()
        self.linear = nn.Linear(256, 64)
        self.activation = nn.Sigmoid()

    def forward(self, x):
        x = self.linear(x)
        x = self.activation(x)
        return x

class Decoder2(nn.Module):
    def __init__(self):
        super(Decoder2, self).__init__()
        self.linear = nn.Linear(64, 256)
        self.activation = nn.Sigmoid()

    def forward(self, x):
        x = self.linear(x)
        x = self.activation(x)
        return x

class AutoEncoder2(nn.Module):
    def __init__(self):
        super(AutoEncoder2, self).__init__()
        self.encoder = Encoder2()
        self.decoder = Decoder2()

    def forward(self, x):
        z = self.encoder(x)
        x_hat = self.decoder(z)
        return z, x_hat



In [9]:
autoencoder1 = AutoEncoder1().to(device).train()
autoencoder2 = AutoEncoder2().to(device).train()

In [10]:
optimizer1 = optim.Adam(autoencoder1.parameters(), lr=0.001)
optimizer2 = optim.Adam(autoencoder2.parameters(), lr=0.001)

In [11]:
criterion = nn.MSELoss()

In [12]:
epochs = 31

autoencoder1.train()
for epoch in range(epochs):
    autoencoder1.train()
    avg_cost = 0
    total_batch_num = len(train_dataloader)

    for b_x, b_y in train_dataloader:
        b_x = b_x.view(-1, 784).to(device)
        z, b_x_hat = autoencoder1.forward(b_x)
        loss = criterion(b_x_hat, b_x)

        avg_cost += loss / total_batch_num
        optimizer1.zero_grad()
        loss.backward()
        optimizer1.step()
    print(f'Epoch: {epoch + 1} / {epochs}, cost : {avg_cost}')        

Epoch: 1 / 31, cost : 0.04563755542039871
Epoch: 2 / 31, cost : 0.01899774558842182
Epoch: 3 / 31, cost : 0.011343770660459995
Epoch: 4 / 31, cost : 0.00797191634774208
Epoch: 5 / 31, cost : 0.006147636566311121
Epoch: 6 / 31, cost : 0.005006956867873669
Epoch: 7 / 31, cost : 0.0042284526862204075
Epoch: 8 / 31, cost : 0.003666963893920183
Epoch: 9 / 31, cost : 0.0032454102765768766
Epoch: 10 / 31, cost : 0.0029162222053855658
Epoch: 11 / 31, cost : 0.002647258108481765
Epoch: 12 / 31, cost : 0.00242629530839622
Epoch: 13 / 31, cost : 0.002243498107418418
Epoch: 14 / 31, cost : 0.0020903004333376884
Epoch: 15 / 31, cost : 0.0019587997812777758
Epoch: 16 / 31, cost : 0.0018454494420439005
Epoch: 17 / 31, cost : 0.0017473312327638268
Epoch: 18 / 31, cost : 0.001661212183535099
Epoch: 19 / 31, cost : 0.0015869609778746963
Epoch: 20 / 31, cost : 0.0015213640872389078
Epoch: 21 / 31, cost : 0.0014626721385866404
Epoch: 22 / 31, cost : 0.0014097841922193766
Epoch: 23 / 31, cost : 0.001362742

In [13]:
epochs = 31

autoencoder1.eval()
autoencoder2.train()

for epoch in range(epochs):
    autoencoder2.train()
    avg_cost = 0
    total_batch_num = len(train_dataloader)

    for b_x, b_y in train_dataloader:
        b_x = b_x.view(-1, 784).to(device)
        with torch.no_grad():
            z1, b_x_hat = autoencoder1.forward(b_x)
        z2, b_x_hat = autoencoder2.forward(z1)
        loss = criterion(b_x_hat, z1)

        avg_cost += loss / total_batch_num
        optimizer2.zero_grad()
        loss.backward()
        optimizer2.step()
    print(f'Epoch: {epoch + 1} / {epochs}, cost : {avg_cost}')        

Epoch: 1 / 31, cost : 0.04752444475889206
Epoch: 2 / 31, cost : 0.03200024366378784
Epoch: 3 / 31, cost : 0.02533634752035141
Epoch: 4 / 31, cost : 0.022034740075469017
Epoch: 5 / 31, cost : 0.020352469757199287
Epoch: 6 / 31, cost : 0.019559301435947418
Epoch: 7 / 31, cost : 0.019150830805301666
Epoch: 8 / 31, cost : 0.018883200362324715
Epoch: 9 / 31, cost : 0.018673012033104897
Epoch: 10 / 31, cost : 0.018504148349165916
Epoch: 11 / 31, cost : 0.01836523786187172
Epoch: 12 / 31, cost : 0.018249118700623512
Epoch: 13 / 31, cost : 0.018149780109524727
Epoch: 14 / 31, cost : 0.018066557124257088
Epoch: 15 / 31, cost : 0.017994508147239685
Epoch: 16 / 31, cost : 0.017933277413249016
Epoch: 17 / 31, cost : 0.017877468839287758
Epoch: 18 / 31, cost : 0.017827734351158142
Epoch: 19 / 31, cost : 0.017781900241971016
Epoch: 20 / 31, cost : 0.017742149531841278
Epoch: 21 / 31, cost : 0.01770501211285591
Epoch: 22 / 31, cost : 0.017669925466179848
Epoch: 23 / 31, cost : 0.01763796992599964
Epo

## Semi-Supervised Learning 

In [14]:
class Classifier(nn.Module):
    def __init__(self):
        super(Classifier, self).__init__()
        self.linear = nn.Linear(64, 32)
        self.activation = nn.Sigmoid()
        self.cls = nn.Linear(32, 10)
    
    def forward(self, x):
        x = self.linear(x)
        x = self.activation(x)
        x = self.cls(x)
        return x

In [15]:
classifier = Classifier().to(device)

In [16]:
cls_criterion = nn.CrossEntropyLoss()

In [17]:
optimizer =  optim.Adam([
    {"params": autoencoder1.parameters(), "lr": 0.001},
    {"params": autoencoder2.parameters(), "lr": 0.001},
    {"params": classifier.parameters(), "lr": 0.001},
])

In [19]:
autoencoder1.train()
autoencoder2.train()
classifier.train()

total_batch_num = len(train_dataloader)
epochs = 31

for epoch in range(epochs):
    avg_cost = 0

    for b_x, b_y in train_dataloader:
        b_x = b_x.view(-1, 784).to(device)
        z1, b_x_hat = autoencoder1.forward(b_x)
        z2, b_x_hat2 = autoencoder2.forward(z1)
        
        logits = classifier(z2)
        loss = cls_criterion(logits, b_y.to(device))

        avg_cost += loss / total_batch_num
        optimizer.zero_grad()

        loss.backward()
        
        optimizer.step()
        
    print(f'Epoch: {epoch + 1} / {epochs}, cost : {avg_cost}')        

Epoch: 1 / 31, cost : 1.1965051889419556
Epoch: 2 / 31, cost : 0.31456199288368225
Epoch: 3 / 31, cost : 0.1916721761226654
Epoch: 4 / 31, cost : 0.139480859041214
Epoch: 5 / 31, cost : 0.10811206698417664
Epoch: 6 / 31, cost : 0.08731108903884888
Epoch: 7 / 31, cost : 0.0701720342040062
Epoch: 8 / 31, cost : 0.05779353156685829
Epoch: 9 / 31, cost : 0.04650409147143364
Epoch: 10 / 31, cost : 0.03868270292878151
Epoch: 11 / 31, cost : 0.030626988038420677
Epoch: 12 / 31, cost : 0.025524167343974113
Epoch: 13 / 31, cost : 0.020108507946133614
Epoch: 14 / 31, cost : 0.016009269282221794
Epoch: 15 / 31, cost : 0.01295915525406599
Epoch: 16 / 31, cost : 0.010571417398750782
Epoch: 17 / 31, cost : 0.00816864799708128
Epoch: 18 / 31, cost : 0.008442527614533901
Epoch: 19 / 31, cost : 0.005209405440837145
Epoch: 20 / 31, cost : 0.005045057274401188
Epoch: 21 / 31, cost : 0.004466630052775145
Epoch: 22 / 31, cost : 0.004558964166790247
Epoch: 23 / 31, cost : 0.004590666852891445
Epoch: 24 / 31

In [20]:
correct = 0
total = 0

classifier.eval()
autoencoder1.eval()
autoencoder2.eval()

for b_x, b_y in test_dataloader:
    b_x = b_x.view(-1, 784).to(device)
    with torch.no_grad():
        z1, b_x_hat = autoencoder1(b_x)
        z2, b_x_hat2 = autoencoder2(z1)
        logits = classifier(z2)

    predicts = torch.argmax(logits, dim=1)

    total += len(b_y)
    correct += (predicts == b_y.to(device)).sum().item()

print(f'Accuracy of the network on test images: {100 * correct / total} %')

Accuracy of the network on test images: 97.9 %
