In [None]:
# A denoising autoencoder for CIFAR dataset(s)
# Reference: https://codahead.com/blog/a-denoising-autoencoder-for-cifar-datasets

In [None]:
import torch
try:
    # Get GPU name, check if it's K80
    GPU_name = torch.cuda.get_device_name()
    if GPU_name[-3:] == "K80":
        print("Get K80! :'( RESTART!")
        exit()  # Restart the session
    else:
        print("Your GPU is {}!".format(GPU_name))
        print("Great! Keep going~")
except RuntimeError as e:
    if e.args == ("No CUDA GPUs are available",):
        print("You are training with CPU! "
              "Please restart!")
        exit()  # Restart the session
    else:
        print("What's wrong here?")
        print("Error message: \n", e)

Your GPU is Tesla P100-PCIE-16GB!
Great! Keep going~


In [None]:
from google.colab import drive
drive.mount('/content/gdrive')
import os

# your workspace in your drive
workspace = 'Course_ML2021Spring/HW08'


try:
  os.chdir(os.path.join('/content/gdrive/My Drive/', workspace))
except:
  os.mkdir(os.path.join('/content/gdrive/My Drive/', workspace))
  os.chdir(os.path.join('/content/gdrive/My Drive/', workspace))

Mounted at /content/gdrive


In [None]:
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras.layers import Conv2D, Input, Dense, Reshape, Conv2DTranspose,\
   Activation, BatchNormalization, ReLU, Concatenate
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import ModelCheckpoint

In [None]:
train_data_clean = np.load('data-bin/trainingset.npy', allow_pickle=True).astype('float32') / 255.
test_data_clean = np.load('data-bin/testingset.npy', allow_pickle=True).astype('float32') / 255.
# Garbage Collector - use it like gc.collect()


In [None]:
# add_noise_and_clip_data
train_data_noisy = np.clip(train_data_clean + np.random.normal(loc=0.0, scale=0.1, size=train_data_clean.shape), 0., 1.)
# test_data_noisy = add_noise_and_clip_data(test_data_clean)

In [None]:
idx = 1
import matplotlib.pyplot as plt
plt.subplot(1,2,1)
plt.imshow(train_data_clean[idx])
plt.title('Original image')
plt.subplot(1,2,2)
plt.imshow(train_data_noisy[idx])
plt.title('Image with noise')
plt.show()

In [None]:
def conv_block(x, filters, kernel_size, strides=2):
   x = Conv2D(filters=filters,
              kernel_size=kernel_size,
              strides=strides,
              padding='same')(x)
   x = BatchNormalization()(x)
   x = ReLU()(x)
   return x

In [None]:
def deconv_block(x, filters, kernel_size):
   x = Conv2DTranspose(filters=filters,
                       kernel_size=kernel_size,
                       strides=2,
                       padding='same')(x)
   x = BatchNormalization()(x)
   x = ReLU()(x)
   return x

In [None]:
def denoising_autoencoder():
    dae_inputs = Input(shape=(64, 64, 3), name='dae_input')
    conv_block1 = conv_block(dae_inputs, 64, 3)
    conv_block2 = conv_block(conv_block1, 128, 3)
    conv_block3 = conv_block(conv_block2, 256, 3)
    conv_block4 = conv_block(conv_block3, 512, 3)
    conv_block5 = conv_block(conv_block4, 512, 3, 1)
    
    deconv_block1 = deconv_block(conv_block5, 512, 3)
    merge1 = Concatenate()([deconv_block1, conv_block3])
    deconv_block2 = deconv_block(merge1, 256, 3)
    merge2 = Concatenate()([deconv_block2, conv_block2])
    deconv_block3 = deconv_block(merge2, 128, 3)
    merge3 = Concatenate()([deconv_block3, conv_block1])
    deconv_block4 = deconv_block(merge3, 64, 3)
    
    final_deconv = Conv2DTranspose(filters=3,kernel_size=3,padding='same')(deconv_block4)
    
    dae_outputs = Activation('sigmoid', name='dae_output')(final_deconv)
    
    return Model(dae_inputs, dae_outputs, name='dae')

In [None]:
dae = denoising_autoencoder()
dae.compile(loss='mse', optimizer='adam')
epochs = 30

checkpoint = ModelCheckpoint(filepath = './TFoutput/{epoch:04d}.h5', verbose=1, save_best_only=False, save_weights_only=True)

dae.fit(train_data_noisy,
       train_data_clean,
    #    validation_data=(train_data_noisy, train_data_clean),
       epochs=epochs,
       batch_size=128,
       callbacks=[checkpoint])

In [None]:
#Inference to do anomoly detection
dae.load_weights('./TFoutput/0030.h5')
test_data_denoised = dae.predict(test_data_noisy)

In [None]:
test_data_denoised.shape

In [None]:
# MSE
anomality =  np.square(np.subtract(test_data_clean, test_data_denoised)).sum(-1)

# anomality = losses.MSE(test_data_clean ,test_data_denoised)

In [None]:
anomalityScore = anomality.sum(axis = (1,2))
anomalityScore = np.sqrt(anomalityScore)
AnomalityScore = np.expand_dims(anomalityScore , axis=-1)
AnomalityScore.shape
AnomalityScore[:10]

In [None]:
import pandas as pd
out_file = 'PREDICTION_FILE0.csv'
df = pd.DataFrame(AnomalityScore, columns=['Predicted'])
df.to_csv(out_file, index_label = 'Id')