In [1]:
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import Dropout
from keras.layers import Conv2D
from keras.layers import Input
from keras.layers import Flatten
from keras.models import Model
from keras.layers import Reshape
from keras import regularizers
from keras.preprocessing.sequence import pad_sequences
from functools import partial
from keras import backend as K
from keras.losses import mean_absolute_error,mean_squared_error
from keras.callbacks import ModelCheckpoint
import sys
import argparse
from keras.optimizers import Adam
from skimage.util import view_as_blocks
from keras.utils.training_utils import multi_gpu_model
import tensorflow as tf
import numpy as np
from utilities.config_handler import get_config
from utilities.learning import   split_train_validation,  train_model, predict_ae_error_vectors
from utilities.preprocessing import  add_noise,load_fft_test_data ,load_fft_train_data, reshape_to_blocks

Using TensorFlow backend.


In [None]:
from matplotlib import pyplot as plt
%matplotlib inline

# Argument parsing

In [None]:
sys.argv = '-m train -n iq_data/gps_new/norm --weights-save-path model/ae/ae_model.hdf5'.split()

In [None]:
parser = argparse.ArgumentParser()
parser.prog = 'Spectrum Anomaly Detection'
parser.description = 'Use this command parser for training or testing the anomaly detector'
parser.add_argument('-m', '--mode', help='train or test mode', choices=['train', 'test'])
parser.add_argument('-n', '--normal-data-dir', help='normal I/Q recording directory (for train mode)')
parser.add_argument('-a', '--anomaly-data-dir', help='anomaly I/Q recording directory (for test mode)')
parser.add_argument('-s', '--weights-save-path', help='path for trained weights (for train mode)')
parser.add_argument('-l', '--weights-load-path', help='path for loading weights (for test mode)')

namespace = parser.parse_args(sys.argv)
if (not namespace.normal_data_dir and namespace.mode == 'train'):
    parser.error('the -n arg must be present when mode is train')
if (not namespace.weights_save_path and namespace.mode == 'train'):
    parser.error('the -s arg must be present when mode is train')

if (not namespace.anomaly_data_dir and namespace.mode == 'test'):
    parser.error('the -a arg must be present when mode is test')
if (not namespace.weights_load_path and namespace.mode == 'test'):
    parser.error('the -l arg must be present when mode is test')

# Hyper parameters

In [None]:
conf=get_config()
gpus = conf['gpus']
lr=conf['learning']['lr']
use_noise=conf['preprocessing']['ae']['use_noise']
feature_names = conf['preprocessing']['ae']['feature_names']
rbw_l = conf['preprocessing']['ae']['rbw_l']
rbw_s = conf['preprocessing']['ae']['rbw_s']
block_size = conf['preprocessing']['ae']['block_size']

train_scaler_ae_large_path = conf['preprocessing']['ae']['train_scaler_path_large']
train_scaler_ae_small_path =conf['preprocessing']['ae']['train_scaler_path_small']
train_zca_ae_path_large = conf['preprocessing']['ae']['train_zca_path_large']
train_zca_ae_path_small = conf['preprocessing']['ae']['train_zca_path_small']

normal_data_dir = namespace.normal_data_dir
anomaly_data_dir = namespace.anomaly_data_dir
train = namespace.mode == 'train'
opt = Adam(lr=lr)

In [None]:
if normal_data_dir:
    assert len(normal_data_dir) != 0
if anomaly_data_dir:
    assert len(anomaly_data_dir) != 0

In [None]:
def get_conv_autoencoder_model(input_shape):
    num_features=input_shape[0]
    block_length = input_shape[1]
    inputs = Input(shape=input_shape,name='input')
    conv1 = Conv2D(1, (num_features , int(block_length / 2) - 4 ), activation='linear', padding='same')(inputs)

    conv1_flat = Flatten()(conv1)
#      h1 = Dense((int(block_length / 2)), activation=K.hard_sigmoid ,
#                 activity_regularizer=regularizers.l1(10e-5) ,
#                 kernel_regularizer=regularizers.l2(0.5))(conv1_flat)

    h1=Dense(int(block_length) ,activation=K.sigmoid , activity_regularizer=regularizers.l1(0) , name='hidden1')(conv1_flat)
#     h2 = Dense(num_features * block_length , activation=K.hard_sigmoid ,
#                 activity_regularizer=regularizers.l1(10e-5) ,
#                 kernel_regularizer=regularizers.l2(0.1))(h1)

    h2=Dense(block_length*num_features , activation=K.sigmoid, activity_regularizer=regularizers.l1(0) , name='hidden2')(h1)
    h2_reshape = Reshape(input_shape)(h2)
    outputs = Conv2D(1, ((int(block_length / 2) - 4), 1), activation='linear', padding='same')(h2_reshape)

    model = Model(inputs=inputs, outputs=outputs)
    model.compile(optimizer='adam', loss='mse')
    model.summary()
    return model

# Loading,whitening,scaling,fft

In [None]:
if train:
    weights_save_path = namespace.weights_save_path
    fft_train_large = load_fft_train_data(normal_data_dir , rbw_l , train_scaler_ae_large_path,train_zca_ae_path_large)
    fft_train_small = load_fft_train_data(normal_data_dir,rbw_s , train_scaler_ae_small_path,train_zca_ae_path_small)
    #TODO infer block size
    X_train_large = reshape_to_blocks(fft_train_large,block_size)
    X_train_small = reshape_to_blocks(fft_train_small,block_size)
    (X_train_large, _, X_val_large, _) = split_train_validation(X_train_large, X_train_large)
    (X_train_small, _, X_val_small, _) = split_train_validation(X_train_small, X_train_small)
else:
    #TODO also make it generic
    weights_load_path = namespace.weights_load_path
    fft_test = load_fft_test_data(anomaly_data_dir , rbw_l ,train_scaler_ae_large_path , train_zca_ae_path_large)
    X_test = reshape_to_blocks(fft_test,block_size)


In [None]:
plt.figure(figsize=(20,20))
plt.imshow(fft_blocks[0])

In [None]:
plt.figure(figsize=(50,50))
plt.imshow(fft_train_large)

In [None]:
plt.figure(figsize=(40,40))
plt.imshow(fft_train_small)

In [None]:
if train:
    conv_model_large = get_conv_autoencoder_model(X_train_large.shape[1:])
    conv_model_small = get_conv_autoencoder_model(X_train_small.shape[1:])
    if use_noise:
        X_train_large_noisy = add_noise(X_train_large)
        X_train_small_noisy = add_noise(X_train_small)
        train_model(conv_model_large,X_train_large_noisy,X_train_large , X_val=X_val_large, Y_val=X_val_large)
    else:
        train_model(conv_model_large,X_train_large,X_train_large,X_val=X_val_large, Y_val=X_val_large)

    conv_model_large.save_weights(weights_save_path)
    conv_model_small.save_weights(weights_save_path)
    train_errors = predict_ae_error_vectors(X_train_large, X_train_large, conv_model_large)
    
else:
    #TODO also make it generic
    weights_load_path = namespace.weights_load_path
    conv_model = get_conv_autoencoder_model(X_test.shape[1:])
    conv_model.load_weights(weights_load_path)
    test_errors = predict_ae_error_vectors(X_test, X_test, conv_model)   

In [None]:
def plot_prediction_sample(x,ax,title=None):
    #X.shape = (1, 2, 128, 1)
    x = np.squeeze(x)
    I = x[0]
    Q = x[1]
    ax.plot(I)
    ax.plot(Q)
    if title:
        ax.set_title(title,fontsize=35)

In [None]:
sample_index=120
sample = X_test[sample_index]
pred = model.predict(np.expand_dims(sample,0))

f, (ax1, ax2) = plt.subplots(1, 2,figsize=(40,15),sharey=True)
for label in (ax1.get_xticklabels() + ax1.get_yticklabels()):
    label.set_fontsize(25)
    
for label in (ax2.get_xticklabels() + ax2.get_yticklabels()):
    label.set_fontsize(25)
    
plot_prediction_sample(sample[:16],ax1,'real signal')
plot_prediction_sample(pred[:16],ax2,'reconstruction')

print(np.sqrt(np.sum(np.square(pred - sample)))/len(pred))


# Reconstruction error analysis

In [None]:
def get_batch(data, batch_size):
    l = len(data)
    for ndx in range(0, l, batch_size):
        yield data[ndx:min(ndx + batch_size, l)]
        

def predict_error_vectors(X,Y,model,batch_size = batch_size):  
    i=0
    errors = np.empty((X_train.shape[0]))
    for (batch_X,batch_Y) in zip(get_batch(X,batch_size),get_batch(Y,batch_size)):
        Y_pred = model.predict_on_batch(batch_X)
        batch_error = compute_batch_error(batch_Y,Y_pred)
        errors[i*batch_size:(i+1)*batch_size] = batch_error
        i=i+1
        if i%50 == 0:
            print('Prediction batch {:d} / {:d}'.format(i,int(len(X)/(batch_size))))
    return errors

In [None]:
train_errors = predict_error_vectors(X_train,X_train,model)
test_errors = predict_error_vectors(X_test,X_test,model)

In [None]:
train_errors

In [None]:
test_errors

In [None]:
plt.hist(train_errors,bins=100)
plt.hist(test_errors,bins=100)
plt.show()