In [None]:
%load_ext autoreload
%autoreload 2

# Setup environment

This notebook and package was created in `Python 3.10.6`.
<br>
It is highly recommended to create a new virtual environment in `3.10.6` before executing the following setup.

Run the following code in your terminal to install required libraries & dependencies:
```
cd {your/file/path}/corpy-assessment
make install
```
where `{your/file/path}` is the location where you saved the `corpy-assessment` folder.

## Checking the setup

Once setup completes, run in the terminal:
```
pip list | grep corpy
```
<br>

If the terminal shows the following, then setup has been successful.
```
corpy       0.0.1
```
<br>
Continue with the rest of the notebook once setup is complete.

# Download dataset

In [None]:
from corpy.get_data.get_data import data_download

data_download()

# Import packages

In [None]:
# import official packages
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow.keras as keras

# import custom package
from corpy.ml_logic.data import load_data
from corpy.ml_logic.model import create_encoder, create_decoder, AutoEncoder

# Load training & test data

In [None]:
# set batch size
batch_size = 32

# set resized image size (pixels)
img_size = 128

# load data augmented training data (250 original + 750 augmented images)
ds_train = load_data(batch_size, img_size, 'train')

# load test data (180 images)
ds_test = load_data(batch_size, img_size, 'test')

# Build Encoder & Decoder

In [None]:
# set dimension of latent layer
latent_dim = 128

# build encoder & decoder
encoder, shape = create_encoder(img_size, latent_dim)
decoder = create_decoder(shape, latent_dim)

In [None]:
# check encoder
encoder.summary()

In [None]:
# check decoder
decoder.summary()

# Build & compile AutoEncoder

In [None]:
# set learning rate decay
lr_decay = keras.callbacks.ReduceLROnPlateau(
    monitor='loss',
    factor=0.2,
    patience=10,
    min_lr=0.00001,
    cooldown=100,
    verbose=1)

# compile AutoEncoder
ae = AutoEncoder(encoder, decoder)
ae.compile(optimizer=keras.optimizers.Adam(learning_rate=0.001))

# Train AutoEncoder

Uncomment the next cell to train AutoEncoder.
<br>
To skip training, a pre-trained model can be loaded in the following cell.

In [None]:
# history = ae.fit(ds_train, epochs=250, verbose=1, callbacks=[lr_decay])

In [None]:
# skip training and load pre-trained weights
ae.build(input_shape=(None,128,128,3))
ae.load_weights('trained/ae_mse_weights.h5')

In [None]:
# training history
mse_history = pd.read_csv('trained/ae_mse_history.csv')
mse_history.loc[:, ['loss']].plot()

# Prediction on test images

In [None]:
# load test images & predictions
original = []
reconstructed = []
for batch in range(len(list(ds_test))):
	input = list(ds_test)[batch]
	output = ae(input)
	original.extend(input)
	reconstructed.extend(list(output))

print(f"[INFO] number of test images: {len(original)}")


# calculate mse between original and reconstructed images
error = []
for (o, r) in zip(original, reconstructed):
	mse = np.mean((o - r)**2)
	error.append(mse)


# compute the q-th quantile of the mses to be threshold to identify anomalies
# any reconstructed image with mse > threshold will be defined as an anomaly
thresh = np.quantile(error, 0.90)
print(f"[INFO] mse threshold: {thresh}")


# count anomalies found
idxs = np.where(np.array(error) >= thresh)[0]
print(f"[INFO] {len(idxs)} anomalies found")

# retrieve original & reconstructed images where mse > threshold
outputs = None
for i in idxs:
	orig = original[i]
	recon = reconstructed[i]
	diff = (orig - recon) * 2
	
    # stack the original, reconstructed, & difference mask images side-by-side
	output = np.hstack([orig, recon, diff])

    # if output is the first image, initialize
	if outputs is None:
		outputs = output

    # otherwise, vertically stack the output to previous outputs
	else:
		outputs = np.vstack([outputs, output])

# show the output
plt.figure(figsize=(12, 50))
plt.imshow(outputs)