In [None]:
import numpy as np
import torch 
import matplotlib.pyplot as plt
import sklearn
import torchvision
from torchvision import datasets
import torch.nn as nn
from torch import optim
from torch.autograd import Variable
import pandas as pd
import random
from torchvision import transforms
import torch.nn.functional as F
from torch.utils.data import DataLoader, random_split
from torchsummary import summary
from tqdm import tqdm
import wandb


In [None]:
from autoencoder_functions import *

# Co-training autoencoder with classifier

In [None]:
## Input data formatting

data_dir = 'dataset'

train_dataset = torchvision.datasets.MNIST(data_dir, train=True, download=True)
test_dataset  = torchvision.datasets.MNIST(data_dir, train=False, download=True)

train_transform = transforms.Compose([
transforms.ToTensor(),
])

test_transform = transforms.Compose([
transforms.ToTensor(),
])

train_dataset.transform = train_transform
test_dataset.transform = test_transform

In [None]:
#use dataloaders to efficiently store and retrieve the data
batch_size=256
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=True)

In [None]:
## definition of loss function
loss_fn_reconstruction = torch.nn.MSELoss()
loss_fn_classifier = torch.nn.CrossEntropyLoss()
loss_p = 0.95


In [None]:
## initalization of models
d=25
encoder = Encoder_original(encoded_space_dim=d,fc2_input_dim=128)
decoder = Decoder_original(encoded_space_dim=d,fc2_input_dim=128)
classifier = MLP_Classifier(low_d=d)
params_to_optimize = [
    {'params': encoder.parameters()},
    {'params': decoder.parameters()},
    {'params': classifier.parameters()}
]

In [None]:
## definition of optimizer
lr = 0.01
optim = torch.optim.Adam(params_to_optimize, lr=lr, weight_decay=1e-05)
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")


In [None]:
encoder.to(device)
decoder.to(device)

# Co-Training of autoencoder and classifier

In [None]:
num_epochs = 10 #number of iterations
diz_loss = {'train_loss':[],'val_loss':[]} #store training and evaluation loss
for epoch in range(num_epochs):
   train_loss, w_ae, w_cl = co_train(loss_p=loss_p, dataloader=train_loader, encoder=encoder, decoder=decoder, classifier=classifier,
                         loss_fn_classifier=loss_fn_classifier, loss_fn_autoencoder=loss_fn_reconstruction, optimizer=optim, device=device) #train autoencoder on training set
   val_loss_rec, val_loss_class, val_loss = co_evaluate(encoder=encoder, decoder=decoder, classifier=classifier, device=device, 
                                dataloader=test_loader, loss_fn_classifier=loss_fn_classifier, loss_fn_autoencoder=loss_fn_reconstruction, loss_p=loss_p) #evaluate perfomance of autoencoder on test set
   print('\n EPOCH {}/{} \t train loss {} \t val loss {}'.format(epoch + 1, num_epochs,train_loss,val_loss))
   fig = plot_ae_outputs(test_dataset, device, encoder,decoder,n=10)
   fig.show()
   diz_loss['train_loss'].append(train_loss)
   diz_loss['val_loss'].append(val_loss)

In [None]:
test_loss = co_evaluate(encoder=encoder, decoder=decoder, classifier=classifier, device=device, 
                                dataloader=test_loader, loss_fn_classifier=loss_fn_classifier, loss_fn_autoencoder=loss_fn_reconstruction, loss_p=0.999)
print(test_loss)

# Evaluation and Visualization

In [None]:
rec_img = plot_ae_outputs(encoder=encoder, decoder=decoder, test_dataset=test_dataset, device=device)
rec_img

In [None]:
accuracy_test = co_evaluate_classifier(classifier, test_loader, encoder)
print(accuracy_test)

In [None]:
enc_samples_train = embedding(train_dataset, device, encoder)
enc_samples_test = embedding(test_dataset, device, encoder)

In [None]:
import plotly.express as px

px.scatter(enc_samples_test, x='Enc. Variable 0', y='Enc. Variable 1', 
           color=enc_samples_test.label.astype(str), opacity=0.7)

In [None]:
from sklearn.manifold import TSNE
import plotly.io as pio

tsne = TSNE(n_components=2)
tsne_results = tsne.fit_transform(enc_samples_test.drop(['label'],axis=1))
fig = px.scatter(tsne_results, x=0, y=1,
                 color=enc_samples_test.label.astype(str),
                 labels={'0': 'tsne-2d-one', '1': 'tsne-2d-two'})
fig.show()



In [None]:
from sklearn.decomposition import PCA
pca = PCA(n_components=2)
pca_results = pca.fit_transform(enc_samples_test.drop(['label'],axis=1))
fig = px.scatter(pca_results, x=0, y=1,
                 color=enc_samples_test.label.astype(str),
                 labels={'0': 'pca-2d-one', '1': 'pca-2d-two'})
fig.show()