In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from tqdm import tqdm
import glob

hives_ids = ["smrpiclient7", "smrpiclient6", "smrpiclient3"]

In [None]:
import matplotlib.pyplot as plt

%matplotlib widget

def plot_spectrogram(frequency, time_x, spectrocgram, title):
    fig = plt.figure(figsize=(6,4))
    plt.title(title)
    plt.pcolormesh(time_x, frequency, spectrocgram)
    plt.ylabel('Frequency [Hz]')
    plt.xlabel('Time [sec]')
    plt.show()

In [2]:
import torch.nn as nn
import torch.nn.functional as F

# define the NN architecture
class ConvAutoencoder(nn.Module):
    def __init__(self):
        super(ConvAutoencoder, self).__init__()
        ## encoder layers ##
        # conv layer (depth from 1 --> 16), 3x3 kernels
        self.conv1 = nn.Conv2d(1, 16, 3, padding=1)  
        # conv layer (depth from 16 --> 4), 3x3 kernels
        self.conv2 = nn.Conv2d(16, 4, 3, padding=1)
        # pooling layer to reduce x-y dims by two; kernel and stride of 2
        self.pool = nn.MaxPool2d(2, 2)
        
        ## decoder layers ##
        ## a kernel of 2 and a stride of 2 will increase the spatial dims by 2
        self.t_conv1 = nn.ConvTranspose2d(4, 16, 2, stride=2)
        self.t_conv2 = nn.ConvTranspose2d(16, 1, 2, stride=2)


    def forward(self, x):
        ## encode ##
        # add hidden layers with relu activation function
        # and maxpooling after
        x = F.relu(self.conv1(x))
        x = self.pool(x)
        # add second hidden layer
        x = F.relu(self.conv2(x))
        x = self.pool(x)  # compressed representation
        ## decode ##
        # add transpose conv layers, with relu activation function
        x = F.relu(self.t_conv1(x))
        # output layer (with sigmoid for scaling from 0 to 1)
        x = F.sigmoid(self.t_conv2(x))
        return x

In [None]:
import torch
from torch import nn

class autoencoder_basic(nn.Module):
    def __init__(self):
        super(autoencoder_basic, self).__init__()
        self.encoder = nn.Sequential(
            nn.Linear(1499, 512),
            nn.SELU(True),
            nn.Linear(512, 128),
            nn.SELU(True),
            nn.Linear(128, 64),
            nn.SELU(True))
        self.decoder = nn.Sequential(
            nn.Linear(64, 128),
            nn.SELU(True),
            nn.Linear(128, 512),
            nn.SELU(True),
            nn.Linear(128, 1499),
            nn.Sigmoid())

    def forward(self, x):
        x = self.encoder(x)
        x = self.decoder(x)
        return x


## Load train data samples

In [3]:
import struct
import math
import numpy as np
import librosa
import librosa.display

from tqdm import tqdm
from datetime import datetime
from scipy.io import wavfile
from scipy import signal
from scipy.fftpack import fft

import matplotlib.pyplot as plt
%matplotlib widget

sound_time_ms = 2000
# ~93 ms for fft window
nfft = 4096
# ~34% overlapping
hop_len = (nfft//3) + 30
# This can be manipulated to adjust number of bins for conv layer
fmax = 2750

hives_data = []
max_to_norm = 0
for idx, hive_id in enumerate(hives_ids):
    sound_files = [f for f in glob.glob(f"..\\measurements\\smartulav2\\{hive_id}_*\\*.wav")]
    print(f"Sound data preparation for hive: {hive_id} which has {len(sound_files)} recordings...", end=' ', flush=True)
    for file in tqdm(sound_files):
        sample_rate, sound_samples = wavfile.read(file)
        sound_samples = sound_samples.T[0]/(2.0**31)
    
        spectrogram = librosa.core.stft(sound_samples, n_fft=nfft, hop_length=hop_len)
        spectrogram_magnitude = np.abs(spectrogram)
        spectrogram_phase = np.angle(spectrogram)
        spectrogram_db = librosa.amplitude_to_db(spectrogram_magnitude, ref=np.max)
        frequencies = librosa.fft_frequencies(sr=sample_rate, n_fft=nfft)
        times = (np.arange(0, spectrogram_magnitude.shape[1])*hop_len)/sample_rate
        
        freq_slice = np.where((frequencies < fmax))
        frequencies = frequencies[freq_slice]
        spectrogram_db = spectrogram_db[freq_slice, :][0]    
    
        filename = file.rsplit('\\', 1)[-1]
        timestamp = filename[filename.index('-')+1:].rsplit(".wav")[0]
        datetime = datetime.strptime(timestamp, '%Y-%m-%dT%H-%M-%S')
        hives_data.append([datetime, hive_id, sound_samples, [frequencies, times, spectrogram_db]])
    print(" done.")

print(f"Got {len(hives_data)} sound samples")

Sound data preparation for hive: smrpiclient7 which has 3367 recordings... 

100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3367/3367 [00:40<00:00, 82.84it/s]

 done.
Sound data preparation for hive: smrpiclient6 which has 3172 recordings... 


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3172/3172 [00:37<00:00, 85.21it/s]

 done.
Sound data preparation for hive: smrpiclient3 which has 602 recordings... 


100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 602/602 [00:06<00:00, 86.46it/s]

 done.
Got 7141 sound samples





## Prepare data for autoencoder train.

In [None]:
import random 

random_idx = random.randint(0, len(hives_data) - 1)

plot_spectrogram(hives_data[random_idx][3][0],
                 hives_data[random_idx][3][1],
                 hives_data[random_idx][3][2],
                 f"hive: {hives_data[random_idx][1]}, time: {hives_data[random_idx][0]}, idx: {random_idx}") 

## Train basic AE

In [None]:
from scipy import signal as sig
from sklearn.preprocessing import MinMaxScaler
from torch.utils.data import DataLoader
from torch.utils import data as tdata

# Divide data to test, validation and train
train_stop_idx = int(pd_ae_data.shape[0]*90/100)

pd_ae_data_train = pd_ae_data[:train_stop_idx]
pd_ae_data_test = pd_ae_data[train_stop_idx:]

print(f'Train data size: {pd_ae_data_train.shape[0]}')
print(f'Test data size: {pd_ae_data_test.shape[0]}')

tensor_train = torch.Tensor(pd_ae_data_train['periodogram'].values.tolist())
tensor_test = torch.Tensor(pd_ae_data_test['periodogram'].values.tolist())

train_dataset = tdata.TensorDataset(tensor_train)
test_dataset = tdata.TensorDataset(tensor_test)

dataloader = DataLoader(train_dataset, batch_size=32, shuffle=True)
dataloader_test = DataLoader(test_dataset, batch_size=32, shuffle=True)

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
from torch.autograd import Variable
from torchvision import transforms
import torch.nn.functional as F

num_epochs = 1000
learning_rate = 1e-3

model = autoencoder_basic().to(device)
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=1e-5)

for epoch in range(num_epochs):
    loss = 0
    for data in dataloader:
        periodogram = data[0].to(device)
        # ===================forward=====================
        output = model(periodogram)
        #train_loss = criterion(output, periodogram)
        train_loss = F.binary_cross_entropy(output, periodogram)
        # ===================backward====================
        optimizer.zero_grad()
        train_loss.backward()
        optimizer.step()
        loss += train_loss.item()
    loss = loss / len(dataloader)
    if (epoch+1) % (num_epochs/10) == 0:
        print(f'epoch {epoch + 1}/{num_epochs}, loss:{loss}')

In [None]:
import matplotlib.pyplot as plt

counter = 0
with torch.no_grad():
    loss_test = 0
    for data in dataloader_test:
        periodograms_test = data[0].to(device)
        output = model(periodograms_test)
        for idx, i in enumerate(output):
            #loss_test += nn.MSELoss()(periodograms_test[idx], i)
            loss_test += F.binary_cross_entropy(periodograms_test[idx], i)

loss_test = loss_test/len(pd_ae_data_test)
print(f'Final test loss: {loss_test}')


## Train CONV AE

In [11]:
from sklearn.preprocessing import StandardScaler, MinMaxScaler

sc = StandardScaler()
mm = MinMaxScaler()

spectrogram_ae_data = [hive_data[3][2] for hive_data in hives_data]
standarized_ae_data = [sc.fit_transform(spec.T).T for spec in spectrogram_ae_data]
scaled_ae_data = [mm.fit_transform(stan.T).T for stan in standarized_ae_data]

print(f"Got dataset of size: {len(scaled_ae_data)}")

Got dataset of size: 7141


In [12]:
import torch 
from torch.utils import data as tdata

train_data_size = len(scaled_ae_data)*80//100
test_data_size = (len(scaled_ae_data) - train_data_size) // 2
val_data_size = len(scaled_ae_data) - train_data_size - test_data_size

dataset_tensor = torch.Tensor(scaled_ae_data)
print(f"Dataset shape: {dataset_tensor.shape}")
print(f"Train set size: {train_data_size}")
print(f"Test set size: {test_data_size}")
print(f"Validation set size: {val_data_size}")

# add one extra dimension as it is required for conv layer
dataset_tensor = dataset_tensor[:, None, :, :] 
dataset = tdata.TensorDataset(dataset_tensor)
train_set, test_set, val_set = torch.utils.data.random_split(dataset, [train_data_size, test_data_size, val_data_size])

dataloader_train = tdata.DataLoader(train_set, batch_size=32, shuffle=True)
dataloader_test = tdata.DataLoader(test_set, batch_size=32, shuffle=True)
dataloader_val = tdata.DataLoader(val_set, batch_size=32, shuffle=True)

Dataset shape: torch.Size([7141, 256, 64])
Train set size: 5712
Test set size: 714
Validation set size: 715


In [15]:
num_epochs = 50
learning_rate = 1e-3

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

modelConvAE = ConvAutoencoder().to(device)
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(modelConvAE.parameters(), lr=learning_rate, weight_decay=1e-5)

In [16]:
%matplotlib widget

import matplotlib.pyplot as plt

# monitor training loss per batch
train_loss = []
# monitor validation loss per batch
val_loss = []
# save avg train losses for early stopping visualization
avg_train_loss = []
# save avg train losses for early stopping visualization
avg_val_loss = [] 
    
for epoch in range(1, num_epochs+1):    
    ###################
    # train the model #
    ###################
    modelConvAE.train()
    for data in dataloader_train:
        # transfer data to device
        periodogram = data[0].to(device)
        # clear the gradients of all optimized variables
        optimizer.zero_grad()
        # forward pass
        outputs = modelConvAE(periodogram)
        # calculate the loss
        loss = criterion(outputs, periodogram)
        # backward pass: compute gradient of the loss with respect to model parameters
        loss.backward()
        # perform a single optimization step (parameter update)
        optimizer.step()
        # update running training loss
        train_loss.append(loss.item())
        
    ###################
    # val the model   #
    ###################
    modelConvAE.eval()
    for val_data in dataloader_val:
        # transfer data to device
        periodogram = data[0].to(device)
        # forward pass
        outputs = modelConvAE(periodogram)
        # calculate the loss
        loss = criterion(outputs, periodogram)
        # update running val loss
        val_loss.append(loss.item())
    
    # print training/validation statistics 
    # calculate average loss over an epoch
    train_loss = np.average(train_loss)
    val_loss = np.average(val_loss)
    avg_train_loss.append(train_loss)
    avg_val_loss.append(val_loss)
    
    epoch_len = len(str(num_epochs))
    # print avg training statistics 
    print(f'[{epoch:>{epoch_len}}/{num_epochs:>{epoch_len}}] train_loss: {train_loss:.5f} valid_loss: {val_loss:.5f}')
    
    # clear batch losses
    train_loss = []
    val_loss = []


fig = plt.figure(figsize=(6,8))
plt.plot(avg_train_loss, 'r', label="train loss")
plt.plot(avg_val_loss, 'b', label="validation loss")
plt.legend(loc='best')
plt.grid(True)
plt.show()

[ 1/50] train_loss: 0.10458 valid_loss: 0.03559
[ 2/50] train_loss: 0.03472 valid_loss: 0.03722
[ 3/50] train_loss: 0.03350 valid_loss: 0.03620
[ 4/50] train_loss: 0.03220 valid_loss: 0.03345
[ 5/50] train_loss: 0.03072 valid_loss: 0.03188
[ 6/50] train_loss: 0.02984 valid_loss: 0.02684
[ 7/50] train_loss: 0.02934 valid_loss: 0.02951
[ 8/50] train_loss: 0.02898 valid_loss: 0.02875
[ 9/50] train_loss: 0.02872 valid_loss: 0.02679
[10/50] train_loss: 0.02822 valid_loss: 0.02948
[11/50] train_loss: 0.02701 valid_loss: 0.02636
[12/50] train_loss: 0.02668 valid_loss: 0.02404
[13/50] train_loss: 0.02650 valid_loss: 0.02652
[14/50] train_loss: 0.02634 valid_loss: 0.02483
[15/50] train_loss: 0.02621 valid_loss: 0.02849
[16/50] train_loss: 0.02606 valid_loss: 0.02200
[17/50] train_loss: 0.02594 valid_loss: 0.02921
[18/50] train_loss: 0.02581 valid_loss: 0.02512
[19/50] train_loss: 0.02572 valid_loss: 0.02785
[20/50] train_loss: 0.02561 valid_loss: 0.02215
[21/50] train_loss: 0.02554 valid_loss: 

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [None]:
avg_train_loss

In [None]:
import matplotlib.pyplot as plt

counter = 0
with torch.no_grad():
    loss_test = 0
    for data in dataloader_test:
        periodograms_test = data[0].to(device)
        output = modelConvAE(periodograms_test)
        for idx, output_elem in enumerate(output):
            loss_test += criterion(periodograms_test[idx], output_elem)

loss_test = loss_test/len(scaled_ae_data)
print(f'Final test loss: {loss_test}')

# Plot and visualize (MFCC vs AE)

In [None]:
import matplotlib.pyplot as plt

with torch.no_grad():
    encode_data = pd_ae_data['periodogram'].values.tolist()
    encode_data_tensor = torch.Tensor(encode_data).to(device)
    output = [model.encoder(encode_data_tensor).cpu().numpy()][0]


In [None]:
%matplotlib widget
import matplotlib.pyplot as plt
import random

idx = random.randint(0, len(hives_data) - 1)
with torch.no_grad():
    fig, axs = plt.subplots(2, 1)
    frequencies = librosa.fft_frequencies(sr=sample_rate, n_fft=nfft)
    freq_slice = np.where((frequencies < fmax))
    frequencies = frequencies[freq_slice]
    times = (np.arange(0, spectrogram_magnitude.shape[1])*hop_len)/sample_rate    
    
    elem = scaled_ae_data[idx]
    elem = elem[None, None,: ,:]
    elem = torch.Tensor(elem)

    axs[0].pcolormesh(times, frequencies, scaled_ae_data[idx])
    axs[1].pcolormesh(times, frequencies, modelConvAE(elem.to(device)).cpu().numpy().squeeze())

# Dimension reduction - now we perform t-SNE and PCA to visualize the data

In [None]:
from sklearn.manifold import TSNE

reduced_ae_tsne = TSNE(n_components=2, perplexity=100, learning_rate=500, verbose=1).fit_transform(output)
reduced_mfcc_tsne = TSNE(n_components=2, perplexity=100, learning_rate=500, verbose=1).fit_transform(pd_ae_data['mfcc'].values.tolist())

In [None]:
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler

mfccs_standarized = StandardScaler().fit_transform(pd_ae_data['mfcc'].values.tolist())
ae_standarized = StandardScaler().fit_transform(output)
reduced_ae_pca = PCA(n_components = 2).fit_transform(ae_standarized)
reduced_mfcc_pca = PCA(n_components = 2).fit_transform(mfccs_standarized)

In [None]:
import matplotlib.pyplot as plt

pca = PCA().fit(ae_standarized)

plt.figure()
plt.plot(np.cumsum(pca.explained_variance_ratio_))
plt.xlabel('number of components')
plt.ylabel('cumulative explained variance');

In [None]:
%matplotlib widget

import matplotlib.pyplot as plt
import numpy as np
from scipy.fftpack import fft, fftfreq

RECORD_TIME = 2
SAMPLE_RATE = 44100

data = hives_data[-10][2][:, 0]/(2.0**31)
datetime = hives_data[-1][0]
fft_data = abs(fft(data))
freqs = fftfreq(int(len(fft_data)/2), 1/SAMPLE_RATE)

fig, axs = plt.subplots(2)
fig.tight_layout(pad=3.0)
axs[0].set_title(f"Sound recording at {datetime} ({RECORD_TIME}s)")
axs[0].grid()
axs[0].set_xlabel('Time [sec]')
axs[0].plot(np.linspace(0, 2, len(data)), data)

axs[1].set_title("Periodogram")
axs[1].set_xticks(np.arange(0, (freqs.size/2), step=100))
axs[1].set_xticklabels(np.arange(0, (freqs.size/2), step=100, dtype=int), rotation=45)
axs[1].grid()
axs[1].set_xlabel('Frequency [Hz]')
axs[1].plot(freqs[1:1500], fft_data[1:1500], 'r')