In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import os 
import glob

from keras.callbacks import EarlyStopping
from keras.models import Model
from keras.layers import (Input, Dense)

from sklearn import preprocessing

Using TensorFlow backend.


## Load data to tuple list

In [2]:
audio_len = 1500

if os.name == "nt":
    os.chdir("C:\\Users\\tymons\\001.Projects\\003.eul\\workspace\\smartula-analysis\\core\\csv")
else:
    os.chdir("/home/tymons/Projects/003.eul/workspace/smartula-analysis/csv")
    

all_filenames = [i for i in glob.glob("*.{}".format("csv"))]
#all_filenames = all_filenames[:600]
list_of_audios = []
for filename in all_filenames:
    samples = np.ravel(pd.read_csv(filename, header=None))
    array = np.array(samples[:audio_len]).astype(float)
    samples = array - array.mean()
    audio_tuple = (filename, samples)
    if len(samples) == audio_len:
        list_of_audios.append(audio_tuple)
    else:
        print("Error with: " + filename +". Got length:" + str(len(samples)))


Error with: 2019-06-06T03-23-11.csv. Got length:1000
Error with: 2019-06-07T03-09-11.csv. Got length:1000


## Model

In [3]:
encoding_dim = 32  # 32 floats -> compression of factor 24.5, assuming the input is 784 floats

sound_input = Input(shape=(1500,))
encoded = Dense(784, activation='relu')(sound_input)
encoded = Dense(128, activation='relu')(encoded)
encoded = Dense(64, activation='relu')(encoded)
latent = Dense(encoding_dim, activation='relu')(encoded)

decoded = Dense(64, activation='relu')(latent)
decoded = Dense(128, activation='relu')(decoded)
decoded = Dense(784, activation='relu')(decoded)
decoded = Dense(1500, activation='sigmoid')(decoded)

# this model maps an input to its reconstruction
autoencoder = Model(sound_input, decoded)
encoder = Model(sound_input, latent)

Instructions for updating:
Colocations handled automatically by placer.


In [4]:
autoencoder.compile(optimizer='adadelta', loss='mse')

## Normalization and Standarization


In [5]:
k = 0.8

data = np.array([audio[1] for audio in list_of_audios])
data = preprocessing.scale(data)
data = preprocessing.normalize(data)

index = int(k*len(data))
x_train = data[:index]
x_test = data[index:]



In [6]:
es = [EarlyStopping(monitor='val_loss', min_delta=1e-05, patience=2)]

autoencoder.fit(x_train, x_train,
                epochs=500,
                shuffle=True,
                batch_size = 100,
                validation_data=(x_test, x_test),
                callbacks = es)


Instructions for updating:
Use tf.cast instead.
Train on 855 samples, validate on 214 samples
Epoch 1/500
Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500
Epoch 26/500
Epoch 27/500
Epoch 28/500
Epoch 29/500
Epoch 30/500
Epoch 31/500
Epoch 32/500
Epoch 33/500
Epoch 34/500
Epoch 35/500
Epoch 36/500
Epoch 37/500
Epoch 38/500
Epoch 39/500
Epoch 40/500
Epoch 41/500
Epoch 42/500
Epoch 43/500


<keras.callbacks.History at 0x21b5c029240>

array([[ 0.00072173, -0.03094156,  0.03503067, ...,  0.02777295,
         0.00894691, -0.01498   ],
       [-0.02954881, -0.00932382, -0.0392583 , ...,  0.04015046,
         0.03383381,  0.00238051],
       [ 0.05133195,  0.0271058 , -0.03766019, ...,  0.04997449,
        -0.01698882, -0.02845899],
       ...,
       [-0.00414083, -0.03682335,  0.01097654, ...,  0.04732382,
        -0.01997022, -0.01941049],
       [-0.03299856,  0.02043938, -0.03232971, ...,  0.02335959,
         0.04058617,  0.01333566],
       [-0.0094582 , -0.04848468,  0.0090522 , ..., -0.01397355,
         0.0470532 ,  0.01798497]], dtype=float32)

In [None]:
autoencoder.summary()

In [None]:
# encode and decode some digits
# note that we take them from the *test* set
encoded = encoder.predict(x_test)




In [None]:
%matplotlib inline

n = 10  # how many digits we will display
plt.figure(figsize=(20, 4))
for i in range(n):
    # display original
    ax = plt.subplot(2, n, i + 1)
    plt.imshow(x_test[i].reshape(28, 28))
    plt.gray()
    ax.get_xaxis().set_visible(False)
    ax.get_yaxis().set_visible(False)

    # display reconstruction
    ax = plt.subplot(2, n, i + 1 + n)
    plt.imshow(decoded_imgs[i].reshape(28, 28))
    plt.gray()
    ax.get_xaxis().set_visible(False)
    ax.get_yaxis().set_visible(False)
plt.show()