In [7]:
f = open("table3000.txt","w+")
f.write("uint16_t dummy_table[3000] = {")
for i in range(3000):
    f.write(f"{i}, ")
    if (i % 30 == 0):
        f.write("\n")
    
f.write("};")
f.close()

In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import os 
import glob

from keras.callbacks import EarlyStopping
from keras.models import Model
from keras.layers import (Input, Dense)
from smartula_analyze import save_to_file, is_affected

from sklearn import preprocessing
from sklearn.manifold import TSNE
from sklearn.decomposition import PCA

Using TensorFlow backend.


## Load data to tuple list

In [2]:
audio_len = 1500

if os.name == "nt":
    path_name = "C:\\Users\\tymons\\001.Projects\\003.eul\\workspace\\smartula-analysis\\core\\csv\\"
else:
    path_name = "/home/tymons/Projects/003.eul/workspace/smartula-analysis/csv/"
    

all_filenames = [i for i in glob.glob(f"{path_name}*.csv")]
#all_filenames = all_filenames[:2]

list_of_audios = []
for filename in all_filenames:
    samples = pd.read_csv(filename, header=None).values[:audio_len].astype(float)
    samples = samples - samples.mean()
    samples = samples.reshape(samples.size)
    timestamp = filename.split(os.sep)[-1].split(".")[0]
    
    dict_sm = {"timestamp" : timestamp,
               "samples" : samples,
               "class" : is_affected(timestamp, [("2019-06-04T18-22-00", "2019-06-04T20-30-00"),
                                                ("2019-06-05T20-46-00", "2019-06-05T23-48-00"),
                                                ("2019-06-06T22-23-00", "2019-06-07T05-52-00")])}
           
    if len(samples) == audio_len:
        list_of_audios.append(dict_sm)
    else:
        print("Error with: " + filename +". Got length:" + str(samples.size))


Error with: C:\Users\tymons\001.Projects\003.eul\workspace\smartula-analysis\core\csv\2019-06-06T03-23-11.csv. Got length:1000
Error with: C:\Users\tymons\001.Projects\003.eul\workspace\smartula-analysis\core\csv\2019-06-07T03-09-11.csv. Got length:1000


## Model

In [3]:
encoding_dim = 32 

sound_input = Input(shape=(1500,))
encoded = Dense(784, activation='relu')(sound_input)
encoded = Dense(128, activation='relu')(encoded)
encoded = Dense(64, activation='relu')(encoded)
latent = Dense(encoding_dim, activation='relu')(encoded)

decoded = Dense(64, activation='relu')(latent)
decoded = Dense(128, activation='relu')(decoded)
decoded = Dense(784, activation='relu')(decoded)
decoded = Dense(1500, activation='sigmoid')(decoded)

# this model maps an input to its reconstruction
autoencoder = Model(sound_input, decoded)
encoder = Model(sound_input, latent)

autoencoder.compile(optimizer='adam', loss='binary_crossentropy')

Instructions for updating:
Colocations handled automatically by placer.


## Normalization and Standarization


In [4]:
k = 0.8

data_one = [audio["samples"] for audio in list_of_audios]
data_one = preprocessing.scale(data_one)

scaler = preprocessing.MinMaxScaler(copy=True, feature_range=(0,1))
data = scaler.fit_transform(data_one)

index = int(k*len(data))
x_train = data[:index]
x_test = data[index:]

In [5]:
es = [EarlyStopping(monitor='val_loss', min_delta=1e-06, patience=3)]

autoencoder.fit(x_train, x_train,
                epochs=500,
                shuffle=True,
                batch_size = 20,
                validation_data=(x_test, x_test),
                callbacks = es)


Instructions for updating:
Use tf.cast instead.
Train on 855 samples, validate on 214 samples
Epoch 1/500
Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500

KeyboardInterrupt: 

In [None]:
predictions = encoder.predict(data)
#print(predictions[0])

classes = [audio_dict['class'] for audio_dict in list_of_audios]
colors = ["red" if category else "green" for category in classes]
pred_class_col = list(zip(predictions, classes, colors))


In [None]:
features_embedded = TSNE(n_components=2, perplexity=5, learning_rate=500, n_iter=2500, verbose=1) \
       .fit_transform([elem[0] for elem in pred_class_col])

In [None]:
pca = PCA(n_components=2)
features_embedded = pca.fit_transform([elem[0] for elem in pred_class_col])

In [None]:
features_class_col = list(zip(features_embedded, classes, colors, [audio["timestamp"] for audio in list_of_audios]))

In [None]:
%matplotlib inline

# # Create plot
fig = plt.figure()
ax = fig.add_subplot(1, 1, 1)

for data, group, color, timestamp in features_class_col:
    x, y = data
    ax.scatter(x, y, alpha=0.8, c=color, edgecolors='none', s=30, label=str(group))
    
plt.show()


In [None]:
from bokeh.plotting import figure, show, output_file
from bokeh.models import ColumnDataSource

data_frame = pd.DataFrame()

np_features_class_col_t = np.asarray(features_class_col)

data_frame['colors'] = ["#003399" if group == True else "#ff0000" for group in np_features_class_col_t[:, 1]]
data_frame['timestamp'] = np_features_class_col_t[:, 3]
data_frame['group'] = np_features_class_col_t[:, 1]
data_frame['x'] = [point[0] for point in np_features_class_col_t[:, 0]]
data_frame['y'] = [point[1] for point in np_features_class_col_t[:, 0]]

source = ColumnDataSource(data=data_frame)

tools = "hover,pan,wheel_zoom,zoom_in,zoom_out,box_zoom,undo,redo,reset,tap,save,box_select," \
        "poly_select,lasso_select, "
tooltips = [
    ("timestamp", "@timestamp"),
    ("class", "@group")
]
p = figure(tools=tools, tooltips=tooltips)
p.scatter(x='x', y='y', fill_color='colors', fill_alpha=0.4, source=source, size=15, line_color=None)
output_file("color_scatter.html", title="color_scatter.py example")
show(p)  # open a browser

In [None]:
np_features_class_col_t[:2, 0]