In [None]:
import tensorflow_datasets as tfds
import tensorflow as tf

from tensorflow.keras.layers import (
    Input,
    Dense,
)
from tensorflow.keras.models import Model

import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt

import warnings
warnings.filterwarnings('ignore')

In [None]:
print(f"Tensorflow datasets: {tfds.__version__}")
print(f"Tensorflow: {tf.__version__}")
print(f"Numpy: {np.__version__}")
print(f"Pandas: {pd.__version__}")
print(f"Matplotlib: {matplotlib.__version__}")

In [None]:
import pump
data_dir = "../dataset"

(train, test), info = tfds.load('pump', split=["train", "test"], data_dir=data_dir, with_info=True)

In [None]:
def spectral_centroid(item):
    audio = tf.cast(item["audio"], dtype = tf.float32)
    audio = audio / 2**15
    audio = tf.signal.stft(audio, frame_length=1024, frame_step=512)
    audio = tf.abs(audio)
    freqs = tf.constant(np.arange(0,513), dtype=audio.dtype)
    audio = tf.reduce_sum(audio * freqs, axis=1) / tf.reduce_sum(audio, axis=1)
    return audio, audio

In [None]:
def spectral_centroid_test(item):
    audio = tf.cast(item["audio"], dtype = tf.float32)
    audio = audio / 2**15
    audio = tf.signal.stft(audio, frame_length=1024, frame_step=512)
    audio = tf.abs(audio)
    freqs = tf.constant(np.arange(0,513), dtype=audio.dtype)
    item["audio"] = tf.reduce_sum(audio * freqs, axis=1) / tf.reduce_sum(audio, axis=1)
    return item

In [None]:
BATCH_SIZE = 128
SHUFFLE_BUFFER_SIZE = 32

audio_train = train.map(spectral_centroid).batch(BATCH_SIZE)
label_train = train.map(lambda item: item["label"])

normal_test = test.filter(lambda item: item["label"]==0).map(spectral_centroid_test).batch(BATCH_SIZE)
anomaly_test = test.filter(lambda item: item["label"]==1).map(spectral_centroid_test).batch(BATCH_SIZE)

In [None]:
class AnomalyDetector(Model):
    def __init__(self):
        super(AnomalyDetector, self).__init__()
        self.encoder = tf.keras.Sequential(
            [
                Input(shape=311),
                Dense(units=128, activation='relu'),
                Dense(units=64, activation='relu'),
                Dense(units=16, activation="relu"),
            ]
        )
        self.decoder = tf.keras.Sequential(
            [
                Dense(units=64, activation='relu'),
                Dense(units=128, activation='relu'),
                Dense(311),
            ]
        )

    def call(self, x):
        encoded = self.encoder(x)
        decoded = self.decoder(encoded)
        return decoded


autoencoder = AnomalyDetector()

In [None]:
autoencoder.compile(optimizer='adam', loss='mean_squared_error')

In [None]:
history = autoencoder.fit(audio_train,
                epochs=10,
                shuffle=True)

In [None]:
autoencoder.summary()

In [None]:
reconstruction = autoencoder.predict(audio_train)
train2 = np.stack(audio_train.unbatch().map(lambda x, _: x), axis=1).T

train_loss = tf.keras.losses.mae(train2, reconstruction)

plt.hist(train_loss, bins=20)
plt.xlabel("Train loss")
plt.ylabel("No of examples")
plt.show()

In [None]:
df = pd.DataFrame({"error": train_loss.numpy()})
df.describe()

In [None]:
threshold = np.mean(train_loss) + np.std(train_loss)
print(threshold)

In [None]:
reconstruction = autoencoder.predict(normal_test.map(lambda item: item["audio"]))
test2 = np.stack(normal_test.unbatch().map(lambda item: item["audio"]), axis=1).T

test_loss = tf.keras.losses.mae(test2, reconstruction)

plt.hist(test_loss, bins=10)
plt.xlabel("Test loss")
plt.ylabel("No of examples")
plt.show()

In [None]:
df = pd.DataFrame({"error": test_loss.numpy()})
df.describe()

In [None]:
reconstruction = autoencoder.predict(anomaly_test.map(lambda item: item["audio"]))
test2 = np.stack(anomaly_test.unbatch().map(lambda item: item["audio"]), axis=1).T

test_loss = tf.keras.losses.mae(test2, reconstruction)

plt.hist(test_loss, bins=10)
plt.xlabel("Test loss")
plt.ylabel("No of examples")
plt.show()

In [None]:
df = pd.DataFrame({"error": test_loss.numpy()})
df.describe()