<a href="https://colab.research.google.com/github/yskuchi/wf_denoising/blob/master/denoising5_tf2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Waveform denoising 'denoising5_tf2'

A denoising convolutional autoencoder with Tensorflow2.x
applied to waveform data.

Noise from data is added to MC signal data.
You need datasets of signal and noise, separately, in pickle format.

See [Bitbucket repository](https://bitbucket.org/meg_ilc_tokyo/wf_denoising/src/master/) or 
[GitHub repository](https://github.com/yskuchi/wf_denoising)

## Setting

### Comet ML

In [0]:
! pip install comet-ml
#! [ ! -z "$COLAB_GPU" ] && pip install comet-ml

In [0]:
#! pip install typing-extensions==3.7.4

In [0]:
# import comet_ml in the top of your file
from comet_ml import Experiment

# Add the following code anywhere in your machine learning file
# api_key and workspace are supposed to be set in .comet.config file,
# otherwise set here like Experiment(api_key="AAAXXX", workspace = "yyy", project_name="zzz")
# experiment = Experiment(project_name="wf_denoising")
experiment = Experiment(api_key="gBJn86Y1oAYKM2oxaoY0oV4Af", workspace="yskuchi", project_name="wf_denoising")

### Other packages

In [0]:
import os, sys
import numpy as np
import pandas as pd
import json

import tensorflow as tf
from tensorflow.keras.layers import Input, Dense, Conv1D, MaxPooling1D, UpSampling1D
from tensorflow.keras import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint

### Parameters

In [0]:
# arg
load_weights = False
plot_data = True 
filename = "denoising5_tf2"

import matplotlib
if not plot_data:
    matplotlib.use("Agg") # this is necessary when using plt without display (batch)
import matplotlib.pyplot as plt

In [0]:
# Waveform has 1024 sample-points
npoints = 1024 # 256 # number of sample-points to be used
scale = 5
offset = 0.05 # 50 mV

signal_dataset_file = 'wf11100.pkl'
noise_dataset_file  = 'wf328469.pkl'

#### Hyper-parameters

In [0]:
# basic hyper-parameters
params = {
    'optimizer':   'adam',
    'loss':        'mse', #'binary_crossentropy', 
    'epochs':      10, # 20,
    'batch_size':  256,
}
# additional parameters
params2 = {
    'conv_activation':     'relu',
    'output_activation':   'linear', #'sigmoid',
    'signal_dataset_file': signal_dataset_file,
    'noise_dataset_file':  noise_dataset_file,
    'npoints':             npoints,
    'scale':               scale,
    'offset':              offset,
}
experiment.log_parameters(params2)

## Prepare datasets
On Google Colb, data is loaded via Google Drive.
Files are supposed to be in `/content/drive/My Drive/ML/data`.

### Mount Google Drive

In [0]:
from google.colab import drive
drive.mount('/content/drive')
data_dir = '/content/drive/My Drive/ML/data/'

### Load pickle files

In [0]:
x_original = pd.read_pickle(data_dir+signal_dataset_file).to_numpy()
x_noise = pd.read_pickle(data_dir+noise_dataset_file ).to_numpy()

nsamples = min(len(x_original), len(x_noise))
x_original = x_original[0:nsamples]
x_noise = x_noise[0:nsamples]

### Shape data in appropriate format with adding noise

In [0]:
x_original = x_original.astype('float32')
x_original = x_original.T[-npoints:].T # keep last npoints
x_noise = x_noise.astype('float32')
x_noise = x_noise.T[-npoints:].T # keep last npoints

# Add noise
x_train_noisy = x_original + x_noise

# Adjust scale and offset of waveforms
x_original *= scale # scale
x_original += offset * scale;
x_train_noisy *= scale # scale
x_train_noisy += offset * scale; # add 50 mV offset

# Values in [0,1]
x_original = np.clip(x_original, 0, 1);
x_train_noisy = np.clip(x_train_noisy, 0, 1);

# To match the input shape for Conv1D with 1 channel
x_original = np.reshape(x_original, (len(x_original), npoints, 1))
x_train_noisy = np.reshape(x_train_noisy, (len(x_train_noisy), npoints, 1))

## Build model with functional API

In [0]:
input_img = Input(shape=(npoints,1))
x = Conv1D(64, 5, padding='same', activation=params2['conv_activation'])(input_img)
x = MaxPooling1D(2, padding='same')(x)
x = Conv1D(32, 5, padding='same', activation=params2['conv_activation'])(x)
x = MaxPooling1D(2, padding='same')(x)
x = Conv1D(32, 5, padding='same', activation=params2['conv_activation'])(x)
encoded = MaxPooling1D(2, padding='same')(x)

x = Conv1D(32, 5, padding='same', activation=params2['conv_activation'])(encoded)
x = UpSampling1D(2)(x)
x = Conv1D(32, 5, padding='same', activation=params2['conv_activation'])(x)
x = UpSampling1D(2)(x)
x = Conv1D(64, 5, padding='same', activation=params2['conv_activation'])(x)
x = UpSampling1D(2)(x)
decoded = Conv1D(1, 5, padding='same', activation=params2['output_activation'])(x)

autoencoder = Model(inputs=input_img, outputs=decoded)

autoencoder.compile(optimizer=params['optimizer'], loss=params['loss']) 
autoencoder.summary()

## Fit

In [0]:
history=[]
if not load_weights:

    # Callback for model checkpoints
    checkpoint = ModelCheckpoint(
        filepath = filename + "-{epoch:02d}.h5",
        save_best_only=True)
    
    # 'labels' are the pictures themselves
    hist = autoencoder.fit(x_train_noisy, x_original,
                           epochs=params['epochs'],
                           batch_size=params['batch_size'],
                           shuffle=True,
                           validation_split=0.1,
                           callbacks=[checkpoint])


    # Save history
    with open(filename + '_hist.json', 'w') as f:
        json.dump(hist.history, f)
    history = hist.history
        
    # Save the weights
    autoencoder.save_weights(filename + '_weights.h5')
else:
    # Load weights
    autoencoder.load_weights(f'{filename}_weights.h5')

    # Load history
    with open(f'{filename}_hist.json', 'r') as f:
        history = json.load(f)

    autoencoder.save(filename + '.h5', include_optimizer=False)
        
# Plot training history 
plt.plot(history['loss'], linewidth=3, label='train')
plt.plot(history['val_loss'], linewidth=3, label='valid')
plt.grid()
plt.legend()
plt.xlabel('epoch')
plt.ylabel('loss')
plt.ylim(1e-2, 0.1)
plt.ylim(1e-5, 1e-3) #mse
plt.show()

## Test

In [0]:
x_test = x_original[-11:]
x_test_noisy = x_train_noisy[-11:]
decoded_imgs = autoencoder.predict(x_test_noisy)

# revert scale and offset
x_test -= scale * offset
x_test /= scale
x_test_noisy -= scale * offset
x_test_noisy /= scale
decoded_imgs -= scale * offset
decoded_imgs /= scale


# How many waveforms to be displayed
n = 1
plt.figure(figsize=(20, 6))
for i in range(n):
    plt.plot(x_test[i], label="original")
    plt.plot(x_test_noisy[i], label="noisy")
    plt.plot(decoded_imgs[i], label="decoded")
    plt.legend()

# Send this plot to comet
experiment.log_figure(figure=plt)

if plot_data:
    plt.show()