In [2]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import os 
import glob

from keras.callbacks import EarlyStopping
from keras.models import Model
from keras.layers import (Input, Dense)
from smartula_analyze import save_to_file, is_affected
from sklearn import preprocessing
from sklearn.manifold import TSNE
from sklearn.decomposition import PCA

Using TensorFlow backend.


## Load data to tuple list

In [18]:
audio_len = 3000
hives_sns = ["1300001", "1300002"]

if os.name == "nt":
    path_name = "C:\\Users\\tymons\\001.Projects\\003.eul\\workspace\\smartula-analysis\\core\\measurements\\"
else:
    path_name = "/home/tymons/Projects/003.eul/workspace/smartula-analysis/measurements/"

list_of_audios = []
for hive_sn in hives_sns:
    all_filenames = [i for i in glob.glob(f"{path_name}{hive_sn}/sound-*.csv")]
    for filename in all_filenames:
        samples = pd.read_csv(filename, header=0).values[:audio_len].astype(float)
        samples = samples - samples.mean()
        samples = samples.reshape(samples.size)
        timestamp = filename.split(os.sep)[-1].split(".")[0].replace("sound-", "")
    
        dict_sm = {"timestamp" : timestamp, "samples" : samples, "class" : hive_sn}
        if len(samples) == audio_len:
            list_of_audios.append(dict_sm)
        else:
            print("Error with: " + filename +". Got length:" + str(samples.size))

## Normalization and Standarization


In [29]:
k = 0.8

scaler = preprocessing.MinMaxScaler(copy=True, feature_range=(0,1))

data = [audio["samples"] for audio in list_of_audios]
# Standarize
data = preprocessing.scale(data)
# Normalize
data = scaler.fit_transform(data) 

index = int(k*len(data))
x_train = data[:index]
x_test = data[index:]

## MFCC model




## Model Autodencoder

In [30]:
encoding_dim = 32 

sound_input = Input(shape=(1500,))
encoded = Dense(784, activation='relu')(sound_input)
encoded = Dense(128, activation='relu')(encoded)
encoded = Dense(64, activation='relu')(encoded)
latent = Dense(encoding_dim, activation='relu')(encoded)

decoded = Dense(64, activation='relu')(latent)
decoded = Dense(128, activation='relu')(decoded)
decoded = Dense(784, activation='relu')(decoded)
decoded = Dense(1500, activation='sigmoid')(decoded)

# this model maps an input to its reconstruction
autoencoder = Model(sound_input, decoded)
encoder = Model(sound_input, latent)

autoencoder.compile(optimizer='adam', loss='binary_crossentropy')

W0717 14:15:00.032352 140376280762112 deprecation_wrapper.py:119] From /home/tymons/Projects/003.eul/workspace/smartula-analysis/venv/lib/python3.6/site-packages/keras/backend/tensorflow_backend.py:74: The name tf.get_default_graph is deprecated. Please use tf.compat.v1.get_default_graph instead.

W0717 14:15:00.928751 140376280762112 deprecation_wrapper.py:119] From /home/tymons/Projects/003.eul/workspace/smartula-analysis/venv/lib/python3.6/site-packages/keras/backend/tensorflow_backend.py:517: The name tf.placeholder is deprecated. Please use tf.compat.v1.placeholder instead.

W0717 14:15:01.092972 140376280762112 deprecation_wrapper.py:119] From /home/tymons/Projects/003.eul/workspace/smartula-analysis/venv/lib/python3.6/site-packages/keras/backend/tensorflow_backend.py:4138: The name tf.random_uniform is deprecated. Please use tf.random.uniform instead.

W0717 14:15:01.330157 140376280762112 deprecation_wrapper.py:119] From /home/tymons/Projects/003.eul/workspace/smartula-analysis

In [5]:
es = [EarlyStopping(monitor='val_loss', min_delta=1e-06, patience=3)]

autoencoder.fit(x_train, x_train,
                epochs=500,
                shuffle=True,
                batch_size = 20,
                validation_data=(x_test, x_test),
                callbacks = es)


Instructions for updating:
Use tf.cast instead.
Train on 855 samples, validate on 214 samples
Epoch 1/500
Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500

KeyboardInterrupt: 

In [None]:
predictions = encoder.predict(data)
#print(predictions[0])

classes = [audio_dict['class'] for audio_dict in list_of_audios]
colors = ["red" if category else "green" for category in classes]
pred_class_col = list(zip(predictions, classes, colors))


In [None]:
features_embedded = TSNE(n_components=2, perplexity=5, learning_rate=500, n_iter=2500, verbose=1) \
       .fit_transform([elem[0] for elem in pred_class_col])

In [None]:
pca = PCA(n_components=2)
features_embedded = pca.fit_transform([elem[0] for elem in pred_class_col])

In [None]:
features_class_col = list(zip(features_embedded, classes, colors, [audio["timestamp"] for audio in list_of_audios]))

In [None]:
%matplotlib inline

# # Create plot
fig = plt.figure()
ax = fig.add_subplot(1, 1, 1)

for data, group, color, timestamp in features_class_col:
    x, y = data
    ax.scatter(x, y, alpha=0.8, c=color, edgecolors='none', s=30, label=str(group))
    
plt.show()


In [None]:
from bokeh.plotting import figure, show, output_file
from bokeh.models import ColumnDataSource

data_frame = pd.DataFrame()

np_features_class_col_t = np.asarray(features_class_col)

data_frame['colors'] = ["#003399" if group == True else "#ff0000" for group in np_features_class_col_t[:, 1]]
data_frame['timestamp'] = np_features_class_col_t[:, 3]
data_frame['group'] = np_features_class_col_t[:, 1]
data_frame['x'] = [point[0] for point in np_features_class_col_t[:, 0]]
data_frame['y'] = [point[1] for point in np_features_class_col_t[:, 0]]

source = ColumnDataSource(data=data_frame)

tools = "hover,pan,wheel_zoom,zoom_in,zoom_out,box_zoom,undo,redo,reset,tap,save,box_select," \
        "poly_select,lasso_select, "
tooltips = [
    ("timestamp", "@timestamp"),
    ("class", "@group")
]
p = figure(tools=tools, tooltips=tooltips)
p.scatter(x='x', y='y', fill_color='colors', fill_alpha=0.4, source=source, size=15, line_color=None)
output_file("color_scatter.html", title="color_scatter.py example")
show(p)  # open a browser

In [None]:
np_features_class_col_t[:2, 0]