In [7]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from tqdm import tqdm
import glob

hives_ids = ["smrpiclient7", "smrpiclient6", "smrpiclient3"]

In [2]:
import torch.nn as nn
import torch.nn.functional as F

# define the NN architecture
class ConvAutoencoder(nn.Module):
    def __init__(self):
        super(ConvAutoencoder, self).__init__()
        ## encoder layers ##
        # conv layer (depth from 1 --> 16), 3x3 kernels
        self.conv1 = nn.Conv2d(1, 16, 3, padding=1)  
        # conv layer (depth from 16 --> 4), 3x3 kernels
        self.conv2 = nn.Conv2d(16, 4, 3, padding=1)
        # pooling layer to reduce x-y dims by two; kernel and stride of 2
        self.pool = nn.MaxPool2d(2, 2)
        
        ## decoder layers ##
        ## a kernel of 2 and a stride of 2 will increase the spatial dims by 2
        self.t_conv1 = nn.ConvTranspose2d(4, 16, 2, stride=2)
        self.t_conv2 = nn.ConvTranspose2d(16, 1, 2, stride=2)


    def forward(self, x):
        ## encode ##
        # add hidden layers with relu activation function
        # and maxpooling after
        x = F.relu(self.conv1(x))
        x = self.pool(x)
        # add second hidden layer
        x = F.relu(self.conv2(x))
        x = self.pool(x)  # compressed representation
        
        ## decode ##
        # add transpose conv layers, with relu activation function
        x = F.relu(self.t_conv1(x))
        # output layer (with sigmoid for scaling from 0 to 1)
        x = F.sigmoid(self.t_conv2(x))
                
        return x

In [3]:
import torch
from torch import nn

class autoencoder_basic(nn.Module):
    def __init__(self):
        super(autoencoder_basic, self).__init__()
        self.encoder = nn.Sequential(
            nn.Linear(1499, 512),
            nn.SELU(True),
            nn.Linear(512, 128),
            nn.SELU(True),
            nn.Linear(128, 64),
            nn.SELU(True))
        self.decoder = nn.Sequential(
            nn.Linear(64, 128),
            nn.SELU(True),
            nn.Linear(128, 512),
            nn.SELU(True),
            nn.Linear(128, 1499),
            nn.Sigmoid())

    def forward(self, x):
        x = self.encoder(x)
        x = self.decoder(x)
        return x


## Load train data samples

In [105]:
import struct
import math
import numpy as np

from tqdm import tqdm
from datetime import datetime
from scipy.io import wavfile
from scipy import signal
from scipy.fftpack import fft

import matplotlib.pyplot as plt
%matplotlib widget

sound_time_ms = 2000
segment_time_ms = 200
fmax = 3000

hives_data = []
max_to_norm = 0
for idx, hive_id in enumerate(hives_ids):
    sound_files = [f for f in glob.glob(f"..\\measurements\\smartulav2\\{hive_id}_*\\*.wav")]
    print(f"Sound data preparation for hive: {hive_id} which has {len(sound_files)} recordings...", end=' ', flush=True)
    for file in tqdm(sound_files):
        sample_rate, sound_samples = wavfile.read(file)
        sound_samples = sound_samples.T[0]/(2.0**31)
        print(f"Sound samples len: {len(sound_samples)} with samples rate: {sample_rate}")
        print(f"Number of samples per segment: {math.floor(segment_time_ms*len(sound_samples)/sound_time_ms)}")
        frequencies, times, spectrogram = signal.spectrogram(sound_samples, sample_rate,
                                                             nperseg=math.floor(segment_time_ms*len(sound_samples)/sound_time_ms),
                                                             window=('hamming'))
        freq_slice = np.where((frequencies <= fmax))
        frequencies = frequencies[freq_slice]
        spectrogram = spectrogram[freq_slice, :][0]
        print(len(frequencies))
        print((spectrogram.shape))

        break
        filename = file.rsplit('\\', 1)[-1]
        timestamp = filename[filename.index('-')+1:].rsplit(".wav")[0]
        datetime = datetime.strptime(timestamp, '%Y-%m-%dT%H-%M-%S')
        hives_data.append([datetime, hive_id, sound_samples, fft_data])
    plt.pcolormesh(times, frequencies, spectrogram)
    plt.ylabel('Frequency [Hz]')
    plt.xlabel('Time [sec]')
    plt.show()
    break
    print(" done.")

print(f"Got {len(hives_data)} sound samples ")
# # Normalize
# hives_data = [[data[0], data[1], data[2]] for data in hives_data]

Sound data preparation for hive: smrpiclient7 which has 3367 recordings... 

  0%|                                                                                                                                                                                                               | 0/3367 [00:00<?, ?it/s]

Sound samples len: 88200 with samples rate: 44100
Number of samples per segment: 8820
601
(601, 11)





Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Got 0 sound samples 


  plt.pcolormesh(times, frequencies, spectrogram)


## Prepare data for autoencoder train.

In [15]:
import librosa
import pandas as pd

from scipy import signal as sig
from sklearn.preprocessing import MinMaxScaler
                      
pd_data = pd.DataFrame(
    {
        'samples': [hive_data[2] for hive_data in hives_data],
        'periodogram' : [hive_data[3] for hive_data in hives_data],
        'sn': [hive_data[1] for hive_data in hives_data]
    }
)

pd_data['periodogram'] = list(np.transpose(MinMaxScaler().fit_transform(np.transpose(pd_data['periodogram'].values.tolist()))))
# pd_data['mfcc'] = list([np.mean(librosa.feature.mfcc(sample, sr=3000, n_fft=150, hop_length=75, n_mfcc=14), axis=1)
#                            for sample in tpd_data['samples'].to_numpy()])
# pd_ae_data = pd_data[(pd_data['sn'] == 'smrpiclient7')]
pd_ae_data = pd_data
# pd_ae_data = pd_data[(pd_data['sn'] == 1300001)
#                         | (pd_data['sn'] == 1400001)
#                         | (pd_data['sn'] == 1400002)]
pd_ae_data = pd_ae_data.sample(frac=1).reset_index(drop=True)

In [14]:
pd_ae_data.tail()

Unnamed: 0,samples,periodogram,sn
7136,"[-0.14960726769641042, -0.1493848362006247, -0...","[0.3205124081023819, 0.15923276278448897, 0.11...",smrpiclient6
7137,"[-0.14167435513809323, -0.141726931091398, -0....","[1.0, 0.4366336158776749, 0.31181935933731103,...",smrpiclient7
7138,"[-0.15721441665664315, -0.15710320137441158, -...","[0.9438849481011324, 0.4754277805104857, 0.479...",smrpiclient6
7139,"[-0.1556573980487883, -0.1556573980487883, -0....","[1.0, 0.4542248835710911, 0.3304965097732763, ...",smrpiclient6
7140,"[-0.1603951840661466, -0.16035069758072495, -0...","[0.3139936408310009, 0.18356474968882697, 0.08...",smrpiclient6


In [17]:
from scipy import signal as sig
from sklearn.preprocessing import MinMaxScaler
from torch.utils.data import DataLoader
from torch.utils import data as tdata

# Divide data to test, validation and train
train_stop_idx = int(pd_ae_data.shape[0]*90/100)

pd_ae_data_train = pd_ae_data[:train_stop_idx]
pd_ae_data_test = pd_ae_data[train_stop_idx:]

print(f'Train data size: {pd_ae_data_train.shape[0]}')
print(f'Test data size: {pd_ae_data_test.shape[0]}')

tensor_train = torch.Tensor(pd_ae_data_train['periodogram'].values.tolist())
tensor_test = torch.Tensor(pd_ae_data_test['periodogram'].values.tolist())

train_dataset = tdata.TensorDataset(tensor_train)
test_dataset = tdata.TensorDataset(tensor_test)

dataloader = DataLoader(train_dataset, batch_size=32, shuffle=True)
dataloader_test = DataLoader(test_dataset, batch_size=32, shuffle=True)

Train data size: 6426
Test data size: 715


## Train basic AE

In [19]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
from torch.autograd import Variable
from torchvision import transforms
import torch.nn.functional as F

num_epochs = 1000
learning_rate = 1e-3

model = autoencoder_basic().to(device)
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=1e-5)

for epoch in range(num_epochs):
    loss = 0
    for data in dataloader:
        periodogram = data[0].to(device)
        # ===================forward=====================
        output = model(periodogram)
        #train_loss = criterion(output, periodogram)
        train_loss = F.binary_cross_entropy(output, periodogram)
        # ===================backward====================
        optimizer.zero_grad()
        train_loss.backward()
        optimizer.step()
        loss += train_loss.item()
    loss = loss / len(dataloader)
    if (epoch+1) % (num_epochs/10) == 0:
        print(f'epoch {epoch + 1}/{num_epochs}, loss:{loss}')

## Train CONV AE

In [21]:
num_epochs = 30
learning_rate = 1e-3

modelConvAE = ConvAutoencoder().to(device)
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=1e-5)

In [57]:
for epoch in range(1, num_epochs+1):
    # monitor training loss
    train_loss = 0.0
    
    ###################
    # train the model #
    ###################
    for data in dataloader:
        # _ stands in for labels, here
        # no need to flatten images
        print(data[0].shape)
        break
        periodogram = data[0].to(device)
        # clear the gradients of all optimized variables
        optimizer.zero_grad()
        # forward pass: compute predicted outputs by passing inputs to the model
        outputs = modelConvAE(periodogram)
        # calculate the loss
        loss = criterion(outputs, periodogram)
        # backward pass: compute gradient of the loss with respect to model parameters
        loss.backward()
        # perform a single optimization step (parameter update)
        optimizer.step()
        # update running training loss
        train_loss += loss.item()*periodogram.size(0)
    break        
    # print avg training statistics 
    train_loss = train_loss/len(dataloader)
    print('Epoch: {} \tTraining Loss: {:.6f}'.format(epoch, train_loss))

torch.Size([32, 1499])


In [29]:
import torch
import numpy as np
from torchvision import datasets
import torchvision.transforms as transforms

# convert data to torch.FloatTensor
transform = transforms.ToTensor()
train_data = datasets.MNIST(root='../MNIST-data', train=True, download=True, transform=transform)
train_loader = torch.utils.data.DataLoader(train_data, batch_size=20, num_workers=0)

In [54]:
for idx, data in enumerate(train_loader):
    print((data[0]).shape)
    break

torch.Size([20, 1, 28, 28])


## Test

In [None]:
import matplotlib.pyplot as plt

counter = 0
with torch.no_grad():
    loss_test = 0
    for data in dataloader_test:
        periodograms_test = data[0].to(device)
        output = model(periodograms_test)
        for idx, i in enumerate(output):
            #loss_test += nn.MSELoss()(periodograms_test[idx], i)
            loss_test += F.binary_cross_entropy(periodograms_test[idx], i)

loss_test = loss_test/len(pd_ae_data_test)
print(f'Final test loss: {loss_test}')


Plot and visualize (MFCC vs AE)

In [None]:
import matplotlib.pyplot as plt

with torch.no_grad():
    encode_data = pd_ae_data['periodogram'].values.tolist()
    encode_data_tensor = torch.Tensor(encode_data).to(device)
    output = [model.encoder(encode_data_tensor).cpu().numpy()][0]


In [None]:
%matplotlib widget
import matplotlib.pyplot as plt

idx =708
with torch.no_grad():
    fig = plt.figure()
    plt.figure(figsize=(8,4))
    plt.plot(encode_data[idx], 'r')
    plt.plot(model.decoder(torch.Tensor(output[idx]).to(device)).cpu().numpy(), 'b')

Dimension reduction - now we perform t-SNE and PCA to visualize the data

In [None]:
from sklearn.manifold import TSNE

reduced_ae_tsne = TSNE(n_components=2, perplexity=100, learning_rate=500, verbose=1).fit_transform(output)
reduced_mfcc_tsne = TSNE(n_components=2, perplexity=100, learning_rate=500, verbose=1).fit_transform(pd_ae_data['mfcc'].values.tolist())

In [None]:
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler

mfccs_standarized = StandardScaler().fit_transform(pd_ae_data['mfcc'].values.tolist())
ae_standarized = StandardScaler().fit_transform(output)
reduced_ae_pca = PCA(n_components = 2).fit_transform(ae_standarized)
reduced_mfcc_pca = PCA(n_components = 2).fit_transform(mfccs_standarized)

In [None]:
import matplotlib.pyplot as plt

pca = PCA().fit(ae_standarized)

plt.figure()
plt.plot(np.cumsum(pca.explained_variance_ratio_))
plt.xlabel('number of components')
plt.ylabel('cumulative explained variance');

In [None]:
from sklearn.ensemble import IsolationForest

clf = IsolationForest(n_estimators=100, max_samples='auto', contamination=float(.005), \
                        max_features=1.0, bootstrap=False, n_jobs=-1, random_state=43, verbose=0, behaviour="new")

clf.fit(reduced_ae_pca)
clf.get_params(deep=True)

In [None]:
output_forest = clf.predict(reduced_ae_pca)

In [None]:
%matplotlib notebook

import matplotlib.pyplot as plt

fig, axs = plt.subplots(2, 2, figsize=(10,8))
fig.subplots_adjust(hspace=0.4)

idx_1300001 = pd_data[pd_data['sn'] == 1300001].index.values.tolist()
idx_1300002 = pd_data[pd_data['sn'] == 1300002].index.values.tolist()
idx_1400001 = pd_data[pd_data['sn'] == 1400001].index.values.tolist()
idx_1400002 = pd_data[pd_data['sn'] == 1400002].index.values.tolist()

l1_ae_tsne = axs[0][0].scatter([data[0] for idx, data in enumerate(reduced_ae_tsne) if idx in idx_1300001],
                 [data[1] for idx, data in enumerate(reduced_ae_tsne) if idx in idx_1300001], c='r', alpha = 0.3)
# l2_ae_tsne = axs[0][0].scatter([data[0] for idx, data in enumerate(reduced_ae_tsne) if idx in idx_1300002],
#                  [data[1] for idx, data in enumerate(reduced_ae_tsne) if idx in idx_1300002], c='b', alpha = 0.3)
# l3_ae_tsne = axs[0][0].scatter([data[0] for idx, data in enumerate(reduced_ae_tsne) if idx in idx_1400001],
#                  [data[1] for idx, data in enumerate(reduced_ae_tsne) if idx in idx_1400001], c='g', alpha = 0.3)
# l4_ae_tsne = axs[0][0].scatter([data[0] for idx, data in enumerate(reduced_ae_tsne) if idx in idx_1400002],
#                  [data[1] for idx, data in enumerate(reduced_ae_tsne) if idx in idx_1400002], c='y', alpha = 0.3)

l1_mfcc_tsne = axs[0][1].scatter([data[0] for idx, data in enumerate(reduced_mfcc_tsne) if idx in idx_1300001],
                 [data[1] for idx, data in enumerate(reduced_mfcc_tsne) if idx in idx_1300001], c='r', alpha = 0.3)
# l2_mfcc_tsne = axs[0][1].scatter([data[0] for idx, data in enumerate(reduced_mfcc_tsne) if idx in idx_1300002],
#                  [data[1] for idx, data in enumerate(reduced_mfcc_tsne) if idx in idx_1300002], c='b', alpha = 0.3)
# l3_mfcc_tsne = axs[0][1].scatter([data[0] for idx, data in enumerate(reduced_mfcc_tsne) if idx in idx_1400001],
#                  [data[1] for idx, data in enumerate(reduced_mfcc_tsne) if idx in idx_1400001], c='g', alpha = 0.3)
# l4_mfcc_tsne = axs[0][1].scatter([data[0] for idx, data in enumerate(reduced_mfcc_tsne) if idx in idx_1400002],
#                  [data[1] for idx, data in enumerate(reduced_mfcc_tsne) if idx in idx_1400002], c='y', alpha = 0.3)

# axs[0][0].legend((l1_ae_tsne, l2_ae_tsne, l3_ae_tsne, l4_ae_tsne), ('Sikorki', 'Not Hive', 'Sulmin 1', 'Sulmin 2'),
#                  loc='upper right')
axs[0][0].set_xlabel('DIM1')
axs[0][0].set_ylabel('DIM2')
axs[0][0].set_title('Hives - AE sounds with TSNE')

# axs[0][1].legend((l1_mfcc_tsne, l2_mfcc_tsne, l3_mfcc_tsne, l4_mfcc_tsne), ('Sikorki', 'Not Hive', 'Sulmin 1', 'Sulmin 2'),
#                  loc='upper right')
axs[0][1].set_xlabel('DIM1')
axs[0][1].set_ylabel('DIM2')
axs[0][1].set_title('Hives - MFCC sounds with TSNE')

l1_ae_pca = axs[1][0].scatter([data[0] for idx, data in enumerate(reduced_ae_pca) if idx in idx_1300001],
                 [data[1] for idx, data in enumerate(reduced_ae_pca) if idx in idx_1300001], c='r', alpha = 0.3)
markers = axs[1][0].scatter([data[0] for idx, data in enumerate(reduced_ae_pca) if output_forest[idx] == -1],
                           [data[1] for idx, data in enumerate(reduced_ae_pca) if output_forest[idx] == -1], c='b', alpha = 0.9)
# l2_ae_pca = axs[1][0].scatter([data[0] for idx, data in enumerate(reduced_ae_pca) if idx in idx_1300002],
#                  [data[1] for idx, data in enumerate(reduced_ae_pca) if idx in idx_1300002], c='b', alpha = 0.3)
# l3_ae_pca = axs[1][0].scatter([data[0] for idx, data in enumerate(reduced_ae_pca) if idx in idx_1400001],
#                  [data[1] for idx, data in enumerate(reduced_ae_pca) if idx in idx_1400001], c='g', alpha = 0.3)
# l4_ae_pca = axs[1][0].scatter([data[0] for idx, data in enumerate(reduced_ae_pca) if idx in idx_1400002],
#                  [data[1] for idx, data in enumerate(reduced_ae_pca) if idx in idx_1400002], c='y', alpha = 0.3)

l1_mfcc_pca = axs[1][1].scatter([data[0] for idx, data in enumerate(reduced_mfcc_pca) if idx in idx_1300001],
                 [data[1] for idx, data in enumerate(reduced_mfcc_pca) if idx in idx_1300001], c='r', alpha = 0.3)
# l2_mfcc_pca = axs[1][1].scatter([data[0] for idx, data in enumerate(reduced_mfcc_pca) if idx in idx_1300002],
#                  [data[1] for idx, data in enumerate(reduced_mfcc_pca) if idx in idx_1300002], c='b', alpha = 0.3)
# l3_mfcc_pca = axs[1][1].scatter([data[0] for idx, data in enumerate(reduced_mfcc_pca) if idx in idx_1400001],
#                  [data[1] for idx, data in enumerate(reduced_mfcc_pca) if idx in idx_1400001], c='g', alpha = 0.3)
# l4_mfcc_pca = axs[1][1].scatter([data[0] for idx, data in enumerate(reduced_mfcc_pca) if idx in idx_1400002],
#                  [data[1] for idx, data in enumerate(reduced_mfcc_pca) if idx in idx_1400002], c='y', alpha = 0.3)

# axs[1][0].legend((l1_ae_pca, l2_ae_pca, l3_ae_pca, l4_ae_pca), ('Sikorki', 'Not Hive', 'Sulmin 1', 'Sulmin 2'),
#                  loc='upper right')
axs[1][0].set_xlabel('PC1')
axs[1][0].set_ylabel('PC2')
axs[1][0].set_title('Hives - AE sounds with PCA')

# axs[1][1].legend((l1_mfcc_pca, l2_mfcc_pca, l3_mfcc_pca, l4_mfcc_pca), ('Sikorki', 'Not Hive', 'Sulmin 1', 'Sulmin 2'),
#                   loc='upper right')
axs[1][1].set_xlabel('PC1')
axs[1][1].set_ylabel('PC2')
axs[1][1].set_title('Hives - MFCC sounds with PCA')



In [None]:
plt.savefig('output-ae/basic-2warstwy-128-32.png')

In [None]:
%matplotlib widget

import matplotlib.pyplot as plt
import numpy as np
from scipy.fftpack import fft, fftfreq

RECORD_TIME = 2
SAMPLE_RATE = 44100

data = hives_data[-10][2][:, 0]/(2.0**31)
datetime = hives_data[-1][0]
fft_data = abs(fft(data))
freqs = fftfreq(int(len(fft_data)/2), 1/SAMPLE_RATE)

fig, axs = plt.subplots(2)
fig.tight_layout(pad=3.0)
axs[0].set_title(f"Sound recording at {datetime} ({RECORD_TIME}s)")
axs[0].grid()
axs[0].set_xlabel('Time [sec]')
axs[0].plot(np.linspace(0, 2, len(data)), data)

axs[1].set_title("Periodogram")
axs[1].set_xticks(np.arange(0, (freqs.size/2), step=100))
axs[1].set_xticklabels(np.arange(0, (freqs.size/2), step=100, dtype=int), rotation=45)
axs[1].grid()
axs[1].set_xlabel('Frequency [Hz]')
axs[1].plot(freqs[1:1500], fft_data[1:1500], 'r')