In [None]:
import numpy as np
import torch 
import matplotlib.pyplot as plt
import sklearn
import torchvision
from torchvision import datasets
import torch.nn as nn
from torch import optim
from torch.autograd import Variable
import pandas as pd
import random
from torchvision import transforms
import torch.nn.functional as F
from torch.utils.data import DataLoader, random_split
from torchsummary import summary
from tqdm import tqdm
import wandb
import scipy.ndimage

In [None]:
from autoencoder_functions import *
from healing_mnist_functions import *

# Data Import and Processing   

In [None]:
ratio_rot = 0.5
ratio_square = 0.5
square_size = 5
min = 3
max = 8

In [None]:
data = HealingMNIST_rot_square(min=min, max=max, ratio_rot=ratio_rot, ratio_square=ratio_square)

In [None]:
train_dataset = imageToTensor(data.train_images, data.train_labels, data.train_squares)
test_dataset = imageToTensor(data.test_images, data.test_labels, data.test_squares)

In [None]:
#use dataloaders to efficiently store and retrieve the data
batch_size=256
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=True)

# Autoencoder: set-up, training and evalulation

In [None]:
### Define the loss function
loss_fn = torch.nn.MSELoss()

### Define an optimizer (both for the encoder and the decoder!)
lr= 0.001

### Set the random seed for reproducible results
torch.manual_seed(0)

### Initialize the two networks
d = 25

### Define weight decay
weight_decay = 0

#model = Autoencoder(encoded_space_dim=encoded_space_dim)
encoder = Encoder_original(encoded_space_dim=d,fc2_input_dim=128)
decoder = Decoder_original(encoded_space_dim=d,fc2_input_dim=128)
params_to_optimize = [
    {'params': encoder.parameters()},
    {'params': decoder.parameters()}
]

optim = torch.optim.Adam(params_to_optimize, lr=lr, weight_decay=weight_decay)

# Check if the GPU is available
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
print(f'Selected device: {device}')

# Move both the encoder and the decoder to the selected device
encoder.to(device)
decoder.to(device)

In [None]:
initial_loss = evaluate_square(encoder, decoder, device, train_loader, loss_fn)
print(initial_loss.detach().numpy())

In [None]:
print(encoder)
print(decoder)

In [None]:
device = "cpu"
num_epochs = 25 #number of iterations
diz_loss = {'train_loss':[],'val_loss':[]} #store training and evaluation loss
for epoch in range(num_epochs):
   train_loss = train_square(encoder,decoder,device,
   train_loader,loss_fn,optim) #train autoencoder on training set
   val_loss = evaluate_square(encoder,decoder,device,test_loader,loss_fn) #evaluate perfomance of autoencoder on test set
   print('\n EPOCH {}/{} \t train loss {} \t val loss {}'.format(epoch + 1, num_epochs,train_loss,val_loss))
   fig = plot_ae_outputs_square(test_dataset, device, encoder,decoder,n=10)
   fig.show()
   diz_loss['train_loss'].append(train_loss)
   diz_loss['val_loss'].append(val_loss)

In [None]:
rec_img = plot_ae_outputs_square(encoder=encoder, decoder=decoder, test_dataset=test_dataset, device=device)

In [None]:
plot_ae_outputs_square_custom(test_dataset= test_dataset, device=device, encoder=encoder, decoder=decoder, n=10, class_num=None)

# Low-dimensional representation after training of convolutional autoencoder

In [None]:
## How to get encoded samples
#from network_functions import embedding
enc_samples_train = embedding_with_square(train_dataset, device, encoder)
enc_samples_test = embedding_with_square(test_dataset, device, encoder)

In [None]:
import plotly.express as px

px.scatter(enc_samples_test, x='Enc. Variable 0', y='Enc. Variable 1', 
           color=enc_samples_test.label.astype(str), opacity=0.7)

In [None]:
from sklearn.manifold import TSNE
import plotly.io as pio

tsne = TSNE(n_components=2)
tsne_results = tsne.fit_transform(enc_samples_test.drop(['label', 'square'],axis=1))
fig = px.scatter(tsne_results, x=0, y=1,
                 color=enc_samples_test.label.astype(str),
                 symbol=enc_samples_test.square.astype(str),
                 symbol_sequence=['circle', 'cross'],
                 labels={'0': 'tsne-2d-one', '1': 'tsne-2d-two'})
fig.show()



# Classifier from low-dimensional embedding: set-up, training and evaluation

In [None]:
enc_samples_train.drop("square", axis=1).columns

In [None]:
n_train = enc_samples_train.shape[0]
n_test = enc_samples_test.shape[0]


#print(n_train)

enc_samples_train = np.array(enc_samples_train.astype(float))
enc_samples_test = np.array(enc_samples_test.astype(float))

enc_x_train = torch.Tensor(enc_samples_train[:,0:d]).to(torch.float).view(n_train,d)
enc_y_train = torch.Tensor(enc_samples_train[:,d]).int().view(n_train,)

enc_x_test = torch.Tensor(enc_samples_test[:,0:d]).to(torch.float).view(n_test,d)
enc_y_test = torch.Tensor(enc_samples_test[:,d]).int().view(n_test,)

In [None]:
from torch.utils.data import TensorDataset

enc_train_dataset = TensorDataset(enc_x_train, enc_y_train)
enc_test_dataset = TensorDataset(enc_x_test, enc_y_test)

In [None]:
batch_size = 32
enc_train_loader = DataLoader(enc_train_dataset, batch_size=batch_size, shuffle=False)
enc_test_loader = DataLoader(enc_test_dataset, batch_size=batch_size, shuffle=False)

In [None]:
classifier_nn = MLP_Classifier(low_d=d)

enc_loss_fn = torch.nn.CrossEntropyLoss()

enc_optim = torch.optim.Adam(classifier_nn.parameters(), lr=0.01)

In [None]:
n=10
for epoch in range(n):
        classifier_nn.train()
        train_loss = []
        for batch_idx, (inputs, labels) in enumerate(enc_train_loader):
            labels = torch.tensor(labels, dtype=torch.long)
            enc_optim.zero_grad()

            outputs = classifier_nn(inputs)
            loss = enc_loss_fn(outputs, labels)
            loss.backward()
            enc_optim.step()
            train_loss.append(loss.detach().cpu().numpy())

            if batch_idx % 100 == 0:
                print(f'Epoch [{epoch+1}/{n}], Batch [{batch_idx+1}/{len(enc_train_loader)}], Loss: {loss.item():.4f}' )
        train_loss = np.mean(train_loss)
        test_accuracy = evaluate_classifier(classifier_nn, enc_test_loader)
        run.log({"Classifier/train_loss": train_loss, "Classifier/accuracy": test_accuracy}, step = epoch)

In [None]:
test_accuracy = evaluate_classifier(classifier_nn, enc_test_loader)


In [None]:
evaluate_classifier_classwise(classifier_nn, enc_test_loader)