In [1]:
from data_loader import *
import keras
from keras.models import Model, load_model
from keras import backend as K
from sklearn.manifold import TSNE
from sklearn.decomposition import PCA

from bokeh.plotting import figure, output_notebook, show, ColumnDataSource
from bokeh.models import HoverTool

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
CLASSES_24 = ['16PSK', '2FSK_5KHz', '2FSK_75KHz', '8PSK', 'AM_DSB', 'AM_SSB', 'APSK16_c34',
 'APSK32_c34', 'BPSK', 'CPFSK_5KHz', 'CPFSK_75KHz', 'FM_NB', 'FM_WB',
 'GFSK_5KHz', 'GFSK_75KHz', 'GMSK', 'MSK', 'NOISE', 'OQPSK', 'PI4QPSK', 'QAM16',
 'QAM32', 'QAM64', 'QPSK']

COLORS_24 = {
    '16PSK': '#FF8800', 
    '2FSK_5KHz': '#3cb44b', 
    '2FSK_75KHz': '#ffe119', 
    '8PSK': '#C2BEA1', 
    'AM_DSB': '#0082c8', 
    'AM_SSB':'#911eb4', 
    'APSK16_c34': '#46f0f0',
    'APSK32_c34': '#f032e6', 
    'BPSK': '#d2f53c', 
    'CPFSK_5KHz': '#fabebe', 
    'CPFSK_75KHz': '#008080', 
    'FM_NB': '#e6beff', 
    'FM_WB': '#aa6e28',
    'GFSK_5KHz': '#fffac8', 
    'GFSK_75KHz': '#800000', 
    'GMSK': '#aaffc3', 
    'MSK': '#808000', 
    'NOISE': '#ffd8b1', 
    'OQPSK': '#000080', 
    'PI4QPSK': '#808080', 
    'QAM16': '#0AC0E0',
    'QAM32': '#000000', 
    'QAM64': '#E00A19', 
    'QPSK': '#3BF703'}
BOOKEH_COLORS = {
    '16PSK': 'aqua', 
    '16PSK_TS1': 'aqua', 
    '2FSK_5KHz': 'aquamarine', 
    '2FSK_5KHz_TS1': 'aquamarine', 
    '2FSK_75KHz': 'bisque', 
    '2FSK_75KHz_TS1': 'bisque', 
    '8PSK': 'black', 
    '8PSK_TS1': 'black', 
    'AM_DSB': 'blue', 
    'AM_DSB_TS1': 'blue', 
    'AM_SSB':'blueviolet', 
    'AM_SSB_TS1':'blueviolet', 
    'APSK16_c34': 'brown',
    'APSK16_c34_TS1': 'brown',
    'APSK32_c34': 'burlywood', 
    'APSK32_c34_TS1': 'burlywood', 
    'BPSK': 'cadetblue', 
    'BPSK_TS1': 'cadetblue', 
    'CPFSK_5KHz': 'chartreuse', 
    'CPFSK_5KHz_TS1': 'chartreuse', 
    'CPFSK_75KHz': 'chocolate', 
    'CPFSK_75KHz_TS1': 'chocolate', 
    'FM_NB': 'cornflowerblue', 
    'FM_NB_TS1': 'cornflowerblue', 
    'FM_WB': 'crimson',
    'FM_WB_TS1': 'crimson',
    'GFSK_5KHz': 'darkcyan', 
    'GFSK_5KHz_TS1': 'darkcyan', 
    'GFSK_75KHz': 'darkgoldenrod', 
    'GFSK_75KHz_TS1': 'darkgoldenrod', 
    'GMSK': 'darkgray', 
    'GMSK_TS1': 'darkgray', 
    'MSK': 'darkgreen', 
    'MSK_TS1': 'darkgreen', 
    'NOISE': 'darkorange', 
    'NOISE_TS1': 'darkorange', 
    'OQPSK': 'deeppink', 
    'OQPSK_TS1': 'deeppink', 
    'PI4QPSK': 'fuchsia', 
    'PI4QPSK_TS1': 'fuchsia', 
    'QAM16': 'gold',
    'QAM16_TS1': 'gold',
    'QAM32': 'lightblue', 
    'QAM32_TS1': 'lightblue', 
    'QAM64': 'magenta', 
    'QAM64_TS1': 'magenta', 
    'QPSK': 'plum',
    'QPSK_TS1': 'plum'
}


BOOKEH_SHAPES = {

    '16PSK':1,
    '16PSK_TS1':2, 
    '2FSK_5KHz':1,
    '2FSK_5KHz_TS1':2, 
    '2FSK_75KHz':1,
    '2FSK_75KHz_TS1':2, 
    '8PSK':1,
    '8PSK_TS1':2, 
    'AM_DSB':1,
    'AM_DSB_TS1':2, 
    'AM_SSB':1,
    'AM_SSB_TS1':2, 
    'APSK16_c34':1,
    'APSK16_c34_TS1':2,
    'APSK32_c34':1,
    'APSK32_c34_TS1':2, 
    'BPSK':1,
    'BPSK_TS1':2, 
    'CPFSK_5KHz':1,
    'CPFSK_5KHz_TS1':2, 
    'CPFSK_75KHz':1,
    'CPFSK_75KHz_TS1':2, 
    'FM_NB':1,
    'FM_NB_TS1':2, 
    'FM_WB':1,
    'FM_WB_TS1':2,
    'GFSK_5KHz':1,
    'GFSK_5KHz_TS1':2, 
    'GFSK_75KHz':1,
    'GFSK_75KHz_TS1':2, 
    'GMSK':1,
    'GMSK_TS1':2, 
    'MSK':1,
    'MSK_TS1':2, 
    'NOISE':1,
    'NOISE_TS1':2, 
    'OQPSK':1,
    'OQPSK_TS1':2, 
    'PI4QPSK':1,
    'PI4QPSK_TS1':2, 
    'QAM16':1,
    'QAM16_TS1':2,
    'QAM32':1,
    'QAM32_TS1':2, 
    'QAM64':1,
    'QAM64_TS1':2, 
    'QPSK':1,
    'QPSK_TS1':2,
}

In [3]:
def load_training_data(data_file,num_samples=100, mods = None, spectrum=False):
    testdata = LoadModRecData(data_file, 1., 0., 0., load_snrs=[10], num_samples_per_key=num_samples, load_mods = mods,spectrum=spectrum)
    train_data = testdata.signalData
    train_labels = testdata.signalLabels[:,0]
    return train_data, train_labels


def open_test_file(test_file, snr_model, filter_snr = True):
    # opens a testfile and if filter_snr is set to true then it will filter 
    # out only samples that have been predicted to have snr 10dB
    f = open(test_file, 'rb')
    testdata = pickle.load(f, encoding='latin1')
    testdata = np.asarray([testdata[i+1] for i in range(len(testdata.keys()))])

    if filter_snr:
        snr_probs = snr_model.predict(testdata)
        snr_preds = np.asarray([np.argmax(snr_prob) for snr_prob in snr_probs])
        testdata = testdata[np.where(snr_preds == 5)]
    
    return testdata

def get_labels(model, data, one_hot_index = True, confidence_thresh=None):
    # take data and predict label, returns int array; to get modulation of index i 
    # one_hot_index = False
    preds = model.predict(data)
    
    if confidence_thresh:
        probs = np.array([max(pred) for pred in preds])
        idx = np.where(probs > confidence_thresh)
        preds = preds[idx]
        data = data[idx]
    
    if one_hot_index:
        return data, np.asarray([np.argmax(pred) for pred in preds])
    else:
        return data, np.asarray([CLASSES[np.argmax(pred)] for pred in preds])

def get_mods_from_test_data(testdata1, test1_labels, mod_indices,classes, num_test_points=100):
    # gives us a specified number of data samples that we predict to be
    # of the specified modulation
    
    np.random.seed(0)
    batch, labels = [],[]
    for i in mod_indices:
        test1_filter = np.where(test1_labels == i)
        filtered1 = testdata1[test1_filter]
        filtered1 = filtered1[np.random.choice(filtered1.shape[0], num_test_points, replace=False)]
        labels1 = [(classes[i],1)]*num_test_points
        batch.append(filtered1)
        labels.extend(labels1)
        
    batch = np.concatenate(batch)
    labels = np.asarray(labels)
    return batch, labels

def tsne_model(model, data, pca_dim=50, tsne_dim=2, preds = False, layer_index = -2):
    """
    Does tsne reduction
    Parameters:
    model (keras model)
    data (np array): input data for the model
    layer_name: name of the output layer to do tsne on. 'None' will use 2nd to last layer, before final dense layer
 
    pca_dim (int): first dimensions are reduced using PCA to this number of dimensions
    tsne_dim (int): final desired dimension after doing tsne reduction
    
    Returns:
    np.array: Shape (data.shape[0], tsne_reduc); sample points in reduced space
    """
    
    layer_name = model.layers[layer_index].name
    
    intermediate_layer_model = Model(inputs=model.input,
                                 outputs=model.get_layer(layer_name).output)
    
    intermediate_output = intermediate_layer_model.predict(data)

    print(intermediate_output.shape[1])
    pca = PCA(n_components= pca_dim, random_state = 214853)    
    output_pca_reduced = pca.fit_transform(intermediate_output)
    
    tsne = TSNE(n_components=tsne_dim, random_state=214853)
    intermediates_tsne = tsne.fit_transform(output_pca_reduced)
    
    if preds:
        return intermediates_tsne, model.predict(data)
    return intermediates_tsne

def separate_labels(tsne_output, labels, preds,num_points):
    # splits tsne output, labels, and preds into subarrays that have the same labels
    unique_labels = np.unique(labels)
    
    tsne_subar = [0] * unique_labels.shape[0]
    label_subar = [0] * unique_labels.shape[0]
    preds_subar = [0] * unique_labels.shape[0]
    indices = [0] * unique_labels.shape[0]
    for i,label in enumerate(unique_labels):
        idx = np.where(labels==label)
        
        tsne_subar[i] = tsne_output[idx][:num_points]
        label_subar[i] = labels[idx][:num_points]
        preds_subar[i] = preds[idx][:num_points]
        indices[i] = idx[0][:num_points]
        
    return tsne_subar, label_subar, indices, preds_subar
        
      
def get_mods_test(model, data, mods):
    # takes in test set returns those data samples that classify in the group of mods by model
    classes = ['16PSK', '2FSK_5KHz', '2FSK_75KHz', '8PSK', 'AM_DSB', 'AM_SSB', 'APSK16_c34',
             'APSK32_c34', 'BPSK', 'CPFSK_5KHz', 'CPFSK_75KHz', 'FM_NB', 'FM_WB',
             'GFSK_5KHz', 'GFSK_75KHz', 'GMSK', 'MSK', 'NOISE', 'OQPSK', 'PI4QPSK', 'QAM16',
             'QAM32', 'QAM64', 'QPSK']

    preds = model.predict(data)
    preds = np.asarray([np.argmax(pred) for pred in preds])
    labels = np.asarray([classes[pred] for pred in preds])

    idx = []
    for i,labl in enumerate(preds):
        if labl in mods:
            idx.append(i)
    idx = np.asarray(idx)
    
    return data[idx]
    

In [4]:
snr_model = load_model("../../snr2.h5")
# model = load_model('../../mod_group0_val_loss5754_copy.h5')
model = load_model('../../mod_group0_val_loss0546.h5')



In [5]:
all_mods = [np.arange(24), np.array([1,9,10,11,12,13]), 
            np.array([4,5]), np.array([1,9]), np.array([6,7,20,21,22]), np.array([0,3]), np.array([0,3,6,7,20,21,22])]

mod_group=6
mods = all_mods[mod_group]
train_file = "/datax/yzhang/training_data/training_data_chunk_14.pkl"
train_file = "/datax/yzhang/army_challenge/training_data/training_data_chunk_14.pkl"

train_data, train_labels = load_training_data(train_file,num_samples=100,mods = [CLASSES_24[i] for i in mods], spectrum=False)


[Data Loader] - Loading Datafile,  /datax/yzhang/army_challenge/training_data/training_data_chunk_14.pkl (time series)
[Data Loader] - Counting Number of Examples in Dataset...
[Data Loader] - Number of Examples in Dataset: 700
[Data Loader] - [Modulation Dataset] Adding Collects for: 16PSK
[Data Loader] - [Modulation Dataset] Adding Collects for: 8PSK
[Data Loader] - [Modulation Dataset] Adding Collects for: APSK16_c34
[Data Loader] - [Modulation Dataset] Adding Collects for: APSK32_c34
[Data Loader] - [Modulation Dataset] Adding Collects for: QAM16
[Data Loader] - [Modulation Dataset] Adding Collects for: QAM32
[Data Loader] - [Modulation Dataset] Adding Collects for: QAM64
[Data Loader] - Converting to numpy arrays...
[Data Loader] - Shuffling Data...
[Data Loader] - Splitting Data...
[Data Loader] - Train Size: 700 Validation Size: 0 Test Size: 0
[Data Loader] - Done.



In [6]:
test_file = "../../Test_Set_1_Army_Signal_Challenge.pkl"
testdata1_filtered_snr = open_test_file(test_file, snr_model, filter_snr = True)

In [7]:
test_data_ = get_mods_test(model, testdata1_filtered_snr, mods)

In [8]:
# embed_model = load_model('/datax/yzhang/army_challenge/models/domain_thresh85_32hidden/model0.h5')
# embed_model = load_model('mod_classifier19.h5')
# embed_model = load_model('../mod4_noise02_time_1242/model0.h5')
# embed_model = load_model('../../mod_group4_emb1.h5')
# embed_model = load_model('../../model0fft_83.h5')
# embed_model = load_model('../thr80warm5noise4_1121rmodel0.h5')
# embed_model = load_model('/datax/yzhang/army_challenge/models/domain_thresh85_32hidden/model0.h5')
# embed_model = load_model('../../mod_group5_pretrained3_model0.h5')

#####
# embed_model = load_model('../../mod_group1_val_loss4071.h5').layers[-2]
# embed_model = Model(embed_model.layers[0].get_input_at(0),embed_model.layers[-1].get_output_at(0))
####

embed_model = load_model('../../tmp_gp6_da/model_fancydishhd.h5')

In [9]:
embed_model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 2, 1024)      0                                            
__________________________________________________________________________________________________
reshape_1 (Reshape)             (None, 2, 1024, 1)   0           input_1[0][0]                    
__________________________________________________________________________________________________
conv2d_1 (Conv2D)               (None, 1, 512, 64)   960         reshape_1[0][0]                  
__________________________________________________________________________________________________
max_pooling2d_1 (MaxPooling2D)  (None, 1, 256, 64)   0           conv2d_1[0][0]                   
__________________________________________________________________________________________________
conv2d_2 (

In [10]:
# preds_old = model.predict(test_data)
test_data = np.copy(test_data_)
preds = embed_model.predict(test_data)
preds = np.asarray([np.argmax(pred) for pred in preds])
    
test_labels = np.asarray([CLASSES_24[mods[pred]]+"_TS1" for pred in preds])




In [11]:
total = 0
inter_data, inter_labels = [], []
for labl in np.unique(test_labels):
    idx = np.where(test_labels == labl)
    total += len(idx)
    inter_data.append(test_data[idx][:100])
    inter_labels.append(test_labels[idx][:100])
    
#     print(inter_data[-1].shape, inter_labels[-1].shape)

print(test_data.shape, test_labels.shape, total)
test_data = np.concatenate(inter_data)
test_labels = np.concatenate(inter_labels)
print(test_data.shape, test_labels.shape)


(5330, 2, 1024) (5330,) 7
(700, 2, 1024) (700,)


In [12]:
data = np.concatenate((train_data, test_data))
labels = np.concatenate((train_labels, test_labels))

print(train_data.shape, train_labels.shape)
print(test_data.shape, test_labels.shape)
print(data.shape, labels.shape)

(700, 2, 1024) (700,)
(700, 2, 1024) (700,)
(1400, 2, 1024) (1400,)


In [13]:
tsne_output, preds = tsne_model(model=embed_model,data=data, preds=True, layer_index=-6)
p2 = np.copy(preds)

4096


In [14]:
preds = np.copy(p2)

# round to 3 decimals
preds = np.around(preds, decimals=3)

# turn each prediction into a list of strings
preds = np.asarray([[CLASSES_24[mods[i]] +": " + str(pred[i]) for i in range(len(pred))] for pred in preds])



In [3]:
tsne_sub, labels_sub, indices, preds_sub = separate_labels(tsne_output, labels, preds, 100)

NameError: name 'separate_labels' is not defined

In [2]:
tooltips = [("p", "(@x, @y)"),("label", "@label"),("index", "@index"),("pred", "@pred")]

hover_tsne = HoverTool(tooltips = tooltips) 
tools_tsne = [hover_tsne, 'pan', 'wheel_zoom', 'reset','box_zoom','save']
p = figure(plot_width=700, plot_height=700, tools=tools_tsne)
output_notebook()

for i in range(len(tsne_sub)):
    tsne_ = tsne_sub[i]
    labels_ = labels_sub[i]
    indices_ = indices[i]
    preds_ = preds_sub[i]
    labl = labels_[0]

    source_train = ColumnDataSource(
        data=dict(
            x = tsne_[:,0],
            y = tsne_[:,1],
            index = indices_,
            label = labels_,
            pred = preds_
        )
    )
    

    shape = BOOKEH_SHAPES[labl]
    if shape == 1:
        p.circle('x', 'y', size=7, fill_color=BOOKEH_COLORS[labl], 
                 alpha=0.9, line_width=0, source=source_train, name="test")#, legend='label')
    elif shape == 2:
        p.diamond('x', 'y', size=7, fill_color=BOOKEH_COLORS[labl], 
                 alpha=0.9, line_width=0, source=source_train, name="test")#, legend='label')
    elif shape == 3:
        p.cross('x', 'y', size=7, fill_color=BOOKEH_COLORS[labl], 
                 alpha=0.9, line_width=0, source=source_train, name="test")#, legend='label')

p.legend.click_policy="hide"

show(p)

NameError: name 'HoverTool' is not defined