In [None]:
import sys
import argparse
import os
import numpy as np
from skimage.util import view_as_windows
from utilities.config_handler import get_config
from utilities.learning import split_train_validation, train_model, predict_ae_error_vectors
from utilities.detection import detect_reconstruction_anomalies_median,plot_spectogram_anomalies
from utilities.preprocessing import  add_noise,load_fft_test_data ,load_fft_train_data,  reshape_to_blocks,persist_object\
    ,load_object,persist_val_stat, load_val_stat ,get_xhdr_sample_rate , compute_fft_train_data , load_raw_data , compute_fft_train_data \
, compute_fft_test_data,trim_iq_basic_block, complex2power
compute_fft_train_data
import matplotlib.patches as patches
import matplotlib.pyplot as plt
from sklearn import mixture
from sklearn.cluster import MiniBatchKMeans, KMeans
from sklearn.metrics.pairwise import pairwise_distances_argmin_min
import pandas as pd
from utilities.visualization import plot_spectogram
from scipy.stats import entropy
%matplotlib inline

In [None]:
normal_records = ['CELL_NORM_0', 'CELL_NORM_3', 'CELL_NORM_4']
anomal_records = ['CELL_SWP_18MHz_50us_0dB', \
          'CELL_SWP_18MHz_50us_10dB', 'CELL_SWP_18MHz_100us_0dB', 'CELL_SWP_18MHz_100us_10dB']

normal_path='iq_data/CELL/normal'
anomal_path='iq_data/CELL/anomal'

In [None]:
parser = argparse.ArgumentParser()
parser.prog = 'Spectrum Anomaly Detection'
parser.description = 'Use this command parser for training or testing the anomaly detector'
parser.add_argument('-m', '--mode', help='train or test mode', choices=['train', 'test'])
parser.add_argument('-d', '--data-dir', help='I/Q recording directory',nargs='?')
parser.add_argument('-w', '--weights-path', help='path for trained weights')


sys.argv = "-m test -d -w model/baseline_kmeans".split()
# sys.argv = "-m test -d iq_data/CELL/anomal/CELL_NORM_4 -w model/baseline_kmeans".split()
# sys.argv = "-m train -d iq_data/CELL/normal/CELL_NORM_2 -w model/baseline_kmeans".split()

namespace = parser.parse_args(sys.argv)
if not namespace.data_dir and namespace.mode == 'train':
    parser.error('the -d arg must be present when mode is train')
if not namespace.weights_path and namespace.mode == 'train':
    parser.error('the -w arg must be present when mode is train')

# if not namespace.data_dir and namespace.mode == 'test':
#     parser.error('the -d arg must be present when mode is test')

if not namespace.weights_path and namespace.mode == 'test':
    parser.error('the -w arg must be present when mode is test')
    
train = namespace.mode == 'train'

# Hyper parameters

In [None]:
conf=get_config()
gpus = conf['gpus']
lr=conf['learning']['ae']['lr']
validation_split = conf['learning']['ae']['validation_split']
train_params = conf['learning']['ae']
rbw_set = conf['preprocessing']['ae']['rbw_set']
feature_names = conf['preprocessing']['ae']['feature_names']
train = namespace.mode == 'train'


atom_height = 1
num_clusters_set = [250,500,750]
cv_types = ['diag']

In [None]:
def split_spectogram_to_atoms(spectogram,atom_height,stride=1):
    window_shape = (atom_height , spectogram.shape[1])
    return view_as_windows(spectogram,window_shape,step=stride).reshape(-1,*window_shape)

In [None]:
def split_spectogram_by_fraction(spectogram,frac):
    return spectogram[:int(frac*len(spectogram))]

In [None]:
def consecutive(data, stepsize=1):
    return [list(part) for part in np.split(data, np.where(np.diff(data) != stepsize)[0]+1)]

In [None]:
def train_k_means(data_dir,rbw,num_clusters):
    assert len(data_dir) != 0
    dataset_name = str.split(data_dir, '/')[1]
    recording_name = str.split(data_dir,'/')[-1]
    weights_dir = "_".join((dataset_name, str(rbw)))
    weights_path = os.path.join(namespace.weights_path, weights_dir)
    sample_rate = get_xhdr_sample_rate(data_dir)
    iq_data = load_raw_data(data_dir)
    iq_data = trim_iq_basic_block(iq_data , sample_rate)
    freqs, time, fft_train = compute_fft_train_data(iq_data,sample_rate,rbw,weights_path)
    print("Spectrogram length: {}".format(len(fft_train)))
    atom_fft_train = split_spectogram_to_atoms(fft_train,atom_height)
    flatten_atom_fft_train = atom_fft_train.reshape(len(atom_fft_train),-1)

    k_means = fit_kmeans(flatten_atom_fft_train,num_clusters)
    (min_clusters_train,min_distances_train) = pairwise_distances_argmin_min(\
                                                flatten_atom_fft_train,k_means.cluster_centers_,metric='euclidean')

    train_clusters_distances_df = pd.DataFrame({'cluster':min_clusters_train , 'distance':min_distances_train})
    max_cluster_distance_train = train_clusters_distances_df.groupby('cluster').max()
    persist_object(k_means,os.path.join(weights_path,'k_means_k={}_a={}.pkl'.format(num_clusters,atom_height)))
    persist_object(train_clusters_distances_df,os.path.join(weights_path,\
                                        'train_clusters_distances_k={}_a={}.pkl'.format(num_clusters,atom_height)))
    persist_object(max_cluster_distance_train , os.path.join(weights_path,\
                                             'max_cluster_distance_k={}_a={}.pkl'.format(num_clusters,atom_height)))

In [None]:
def test_k_means(data_dir,rbw,num_clusters):
    assert len(data_dir) != 0
    dataset_name = str.split(data_dir, '/')[1]
    recording_name = str.split(data_dir,'/')[-1]
    weights_dir = "_".join((dataset_name, str(rbw)))
    weights_path = os.path.join(namespace.weights_path, weights_dir)
    sample_rate = get_xhdr_sample_rate(data_dir)
    
    iq_data = load_raw_data(data_dir)
    iq_data = trim_iq_basic_block(iq_data , sample_rate)
    test_freqs, test_time, fft_test = compute_fft_test_data(iq_data,sample_rate,rbw,weights_path)
    
    atom_fft_test = split_spectogram_to_atoms(fft_test,atom_height)
    flatten_atom_fft_test = atom_fft_test.reshape(len(atom_fft_test),-1)
    k_means = load_object(os.path.join(weights_path,'k_means_k={}_a={}.pkl'.format(num_clusters,atom_height)))
    train_clusters_distances_df = load_object(os.path.join(weights_path,'train_clusters_distances_k={}_a={}.pkl'\
                                                   .format(num_clusters,atom_height)))

    max_cluster_distance_train = load_object(os.path.join(weights_path,\
                                             'max_cluster_distance_k={}_a={}.pkl'.format(num_clusters,atom_height)))

    window_shape = (atom_height , atom_fft_test.shape[1] // atom_height)
    score_spectogram = np.ones_like(atom_fft_test)
    (min_clusters_test,min_distances_test) = pairwise_distances_argmin_min(flatten_atom_fft_test,\
                                                                           k_means.cluster_centers_,\
                                                                            metric='euclidean')

    test_clusters_distances_df = pd.DataFrame({'cluster':min_clusters_test , 'distance':min_distances_test})
    test_clusters_distances_df['anomaly'] = test_clusters_distances_df\
                            .apply(lambda x : x.distance > max_cluster_distance_train.iloc[int(x.cluster)] , axis=1)

    anomalies_indices = np.argwhere(test_clusters_distances_df.anomaly).squeeze().tolist()

    f, ax = plt.subplots(figsize=(10,10))
    percent_anomalies = len(anomalies_indices) / len(test_clusters_distances_df) * 100
    ax.plot(np.sort(min_distances_test))
#         for index, row in max_cluster_distance_train.iterrows():
#             ax.axhline(row.distance,color='r',linewidth=0.1)

    ax.set_title('Test minimum distances, num clusters {0:d} ,atom height {1}, num anomolous atoms {2:.2f}%'.\
                  format(num_clusters,atom_height,percent_anomalies))

    distances_plot_path = os.path.join(data_dir ,'cluster_distances_k={}_a={}_{}.png'.\
                                       format(num_clusters,atom_height,recording_name))
    plt.savefig(distances_plot_path)

    fig, ax = plt.subplots(figsize=(20,20))
    anomalies_blocks = consecutive(anomalies_indices)
    ax.imshow(fft_test,aspect='auto', origin='upper')
    if len(anomalies_indices) > 0:
        anomalies_plot_path = os.path.join(data_dir ,'cluster_anomalies_k={}_a={}_{}.png'.\
                                       format(num_clusters,atom_height,recording_name))
        for block in anomalies_blocks:
            x_cord = 0
            y_cord = block[0]
            block_height , block_width = len(block) + 1,fft_test.shape[1]
            rect = patches.Rectangle((x_cord,y_cord),block_width-1,block_height,edgecolor='r',\
                                     facecolor='r',fill=True,alpha=0.5,linewidth=0.1,rasterized=True)
            ax.add_patch(rect)

    plt.savefig(anomalies_plot_path)    

In [None]:
if train:
    data_dir=namespace.data_dir
    for rbw in rbw_set:
        for num_clusters in num_clusters_set:
            train_k_means(data_dir,rbw,num_clusters)
            
else:
    
    if not namespace.data_dir:
        for r in normal_records:
            data_dir = os.path.join(normal_path, r)
            for rbw in rbw_set:
                for num_clusters in num_clusters_set:
                    test_k_means(data_dir,rbw,num_clusters)

        for r in anomal_records:
            data_dir = os.path.join(anomal_path,r)
            for rbw in rbw_set:
                for num_clusters in num_clusters_set:
                    test_k_means(data_dir,rbw,num_clusters)

    else:
        data_dir = namespace.data_dir
        assert len(data_dir) != 0
        for rbw in rbw_set:
            for num_clusters in num_clusters_set:
                test_k_means(data_dir,rbw,num_clusters)

# Spectrum visualization