In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from tqdm import tqdm
import glob

hives_ids = ["smrpiclient7", "smrpiclient6", "smrpiclient3"]


Here we load train data sound samples and prepare spectrogram

In [2]:
import struct
import math
import numpy as np
import librosa
import librosa.display

from tqdm import tqdm
from datetime import datetime
from scipy.io import wavfile
from scipy import signal
from scipy.fftpack import fft

import matplotlib.pyplot as plt
%matplotlib widget

sound_time_ms = 2000
# ~93 ms for fft window
nfft = 4096
# ~34% overlapping
hop_len = (nfft//3) + 30
# This can be manipulated to adjust number of bins for conv layer
fmax = 2750

hives_data = []
max_to_norm = 0
for idx, hive_id in enumerate(hives_ids):
    sound_files = [f for f in glob.glob(f"..\\measurements\\smartulav2\\{hive_id}_*\\*.wav")]
    print(f"Sound data preparation for hive: {hive_id} which has {len(sound_files)} recordings...", end=' ', flush=True)
    for file in tqdm(sound_files):
        sample_rate, sound_samples = wavfile.read(file)
        sound_samples = sound_samples.T[0]/(2.0**31)
    
        spectrogram = librosa.core.stft(sound_samples, n_fft=nfft, hop_length=hop_len)
        spectrogram_magnitude = np.abs(spectrogram)
        spectrogram_phase = np.angle(spectrogram)
        spectrogram_db = librosa.amplitude_to_db(spectrogram_magnitude, ref=np.max)
        frequencies = librosa.fft_frequencies(sr=sample_rate, n_fft=nfft)
        times = (np.arange(0, spectrogram_magnitude.shape[1])*hop_len)/sample_rate
        
        freq_slice = np.where((frequencies < fmax))
        frequencies = frequencies[freq_slice]
        spectrogram_db = spectrogram_db[freq_slice, :][0]    
    
        filename = file.rsplit('\\', 1)[-1]
        utc_timestamp = filename[filename.index('-')+1:].rsplit(".wav")[0]
        utc_datetime = datetime.strptime(utc_timestamp, '%Y-%m-%dT%H-%M-%S')
        hives_data.append([utc_datetime, hive_id, sound_samples, [frequencies, times, spectrogram_db]])
    print(" done.")

print(f"Got {len(hives_data)} sound samples")

Sound data preparation for hive: smrpiclient7 which has 3367 recordings... 

100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3367/3367 [00:42<00:00, 78.38it/s]

 done.
Sound data preparation for hive: smrpiclient6 which has 3172 recordings... 


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3172/3172 [00:41<00:00, 76.46it/s]

 done.
Sound data preparation for hive: smrpiclient3 which has 602 recordings... 


100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 602/602 [00:07<00:00, 75.72it/s]

 done.
Got 7141 sound samples





In [3]:
from sklearn.preprocessing import StandardScaler, MinMaxScaler

sc = StandardScaler()
mm = MinMaxScaler()

spectrogram_ae_data = [hive_data[3][2] for hive_data in hives_data]
standarized_ae_data = [sc.fit_transform(spec.T).T for spec in spectrogram_ae_data]
scaled_ae_data = [mm.fit_transform(stan.T).T for stan in standarized_ae_data]
utc_timestamps = [pd.Timestamp(hive_data[0]) for hive_data in hives_data]
names = [hive_data[1] for hive_data in hives_data]

data_to_analyze = list(zip(scaled_ae_data, utc_timestamps, names))

print(f"Got dataset of size: {len(scaled_ae_data)}")

Got dataset of size: 7141


## Prepare data for autoencoder train.

In [None]:
import random 

random_idx = random.randint(0, len(hives_data) - 1)

plot_spectrogram(hives_data[random_idx][3][0],
                 hives_data[random_idx][3][1],
                 hives_data[random_idx][3][2],
                 f"hive: {hives_data[random_idx][1]}, time: {hives_data[random_idx][0]}, idx: {random_idx}") 

## Train basic AE

In [None]:
from scipy import signal as sig
from sklearn.preprocessing import MinMaxScaler
from torch.utils.data import DataLoader
from torch.utils import data as tdata

# Divide data to test, validation and train
train_stop_idx = int(pd_ae_data.shape[0]*90/100)

pd_ae_data_train = pd_ae_data[:train_stop_idx]
pd_ae_data_test = pd_ae_data[train_stop_idx:]

print(f'Train data size: {pd_ae_data_train.shape[0]}')
print(f'Test data size: {pd_ae_data_test.shape[0]}')

tensor_train = torch.Tensor(pd_ae_data_train['periodogram'].values.tolist())
tensor_test = torch.Tensor(pd_ae_data_test['periodogram'].values.tolist())

train_dataset = tdata.TensorDataset(tensor_train)
test_dataset = tdata.TensorDataset(tensor_test)

dataloader = DataLoader(train_dataset, batch_size=32, shuffle=True)
dataloader_test = DataLoader(test_dataset, batch_size=32, shuffle=True)

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
from torch.autograd import Variable
from torchvision import transforms
import torch.nn.functional as F

num_epochs = 1000
learning_rate = 1e-3

model = autoencoder_basic().to(device)
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=1e-5)

for epoch in range(num_epochs):
    loss = 0
    for data in dataloader:
        periodogram = data[0].to(device)
        # ===================forward=====================
        output = model(periodogram)
        #train_loss = criterion(output, periodogram)
        train_loss = F.binary_cross_entropy(output, periodogram)
        # ===================backward====================
        optimizer.zero_grad()
        train_loss.backward()
        optimizer.step()
        loss += train_loss.item()
    loss = loss / len(dataloader)
    if (epoch+1) % (num_epochs/10) == 0:
        print(f'epoch {epoch + 1}/{num_epochs}, loss:{loss}')

In [None]:
import matplotlib.pyplot as plt

counter = 0
with torch.no_grad():
    loss_test = 0
    for data in dataloader_test:
        periodograms_test = data[0].to(device)
        output = model(periodograms_test)
        for idx, i in enumerate(output):
            #loss_test += nn.MSELoss()(periodograms_test[idx], i)
            loss_test += F.binary_cross_entropy(periodograms_test[idx], i)

loss_test = loss_test/len(pd_ae_data_test)
print(f'Final test loss: {loss_test}')


## Train CONV AE

In [62]:
import torch 
from torch.utils import data as tdata

train_data_size = len(scaled_ae_data)*80//100
val_data_size = len(scaled_ae_data) - train_data_size

dataset_tensor = torch.Tensor(scaled_ae_data)
print(f"Dataset shape: {dataset_tensor.shape}")
print(f"Train set size: {train_data_size}")
print(f"Validation set size: {val_data_size}")

# add one extra dimension as it is required for conv layer
dataset_tensor = dataset_tensor[:, None, :, :] 
dataset = tdata.TensorDataset(dataset_tensor)
train_set, val_set = torch.utils.data.random_split(dataset, [train_data_size, val_data_size])

dataloader_train = tdata.DataLoader(train_set, batch_size=32, shuffle=True)
dataloader_val = tdata.DataLoader(val_set, batch_size=32, shuffle=True)

Dataset shape: torch.Size([7141, 256, 64])
Train set size: 5712
Validation set size: 1429


In [61]:
del modelConvAE

In [63]:
%matplotlib widget

import sys
import matplotlib.pyplot as plt

num_epochs = 50
learning_rate = 1e-3

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# del modelConvAE

modelConvAE = ConvAutoencoder().to(device)
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(modelConvAE.parameters(), lr=learning_rate, weight_decay=1e-6)

# monitor training loss per batch
train_loss = []
# monitor validation loss per batch
val_loss = []
# save avg train losses for early stopping visualization
avg_train_loss = []
# save avg train losses for early stopping visualization
avg_val_loss = [] 
# patience when stop training
patience = 10
# counter for patience in early sotpping
patience_counter = 0
# best validation score
best_val_loss = -1
# model checkpoint filename
checkpoint_filename = 'checkpoint.pth'
# early stopping epoch
win_epoch = 0
    
for epoch in range(1, num_epochs+1):    
    ###################
    # train the model #
    ###################
    modelConvAE.train()
    for data in dataloader_train:
        # transfer data to device
        periodogram = data[0].to(device)
        # clear the gradients of all optimized variables
        optimizer.zero_grad()
        # forward pass
        outputs = modelConvAE(periodogram)
        # calculate the loss
        loss = criterion(outputs, periodogram)
        # backward pass: compute gradient of the loss with respect to model parameters
        loss.backward()
        # perform a single optimization step (parameter update)
        optimizer.step()
        # update running training loss
        train_loss.append(float(loss.item()))
        
    ###################
    # val the model   #
    ###################
    modelConvAE.eval()
    for val_data in dataloader_val:
        # transfer data to device
        periodogram_val = val_data[0].to(device)
        # forward pass
        voutputs = modelConvAE(periodogram_val)
        # calculate the loss
        vloss = criterion(voutputs, periodogram_val)
        # update running val loss
        val_loss.append(float(vloss.item()))
    
    # print training/validation statistics 
    # calculate average loss over an epoch
    train_loss = np.average(train_loss)
    val_loss = np.average(val_loss)
    avg_train_loss.append(train_loss)
    avg_val_loss.append(val_loss)
    
    epoch_len = len(str(num_epochs))
    # print avg training statistics 
    print(f'[{epoch:>{epoch_len}}/{num_epochs:>{epoch_len}}] train_loss: {train_loss:.5f} valid_loss: {val_loss:.5f}', end=' ', flush=True)
    
    if val_loss < best_val_loss or best_val_loss == -1:
        # new checkpoint
        print("checkpoint!")
        best_val_loss = val_loss
        patience_counter = 0
        torch.save(modelConvAE.state_dict(), checkpoint_filename)
        win_epoch = epoch
    elif patience_counter >= patience:
        print("early stopping.")
        print(f"=> loading checkpoint {checkpoint_filename}")
        device = torch.device("cuda")
        modelConvAE.load_state_dict(torch.load(checkpoint_filename))
        break
    else:
        print(".")
        patience_counter = patience_counter + 1
        
    # clear batch losses
    train_loss = []
    val_loss = []

fig = plt.figure(figsize=(10,5))
plt.plot(np.arange(1, epoch + 1), avg_train_loss, 'r', label="train loss")
plt.plot(np.arange(1, epoch + 1), avg_val_loss, 'b', label="validation loss")
plt.axvline(win_epoch, linestyle='--', color='g',label='Early Stopping Checkpoint')
plt.legend(loc='best')
plt.grid(True)
plt.show()

[ 1/50] train_loss: 0.06397 valid_loss: 0.04382 checkpoint!
[ 2/50] train_loss: 0.04381 valid_loss: 0.04170 checkpoint!
[ 3/50] train_loss: 0.04192 valid_loss: 0.04072 checkpoint!
[ 4/50] train_loss: 0.04101 valid_loss: 0.04069 checkpoint!
[ 5/50] train_loss: 0.04027 valid_loss: 0.04006 checkpoint!
[ 6/50] train_loss: 0.03974 valid_loss: 0.04149 .
[ 7/50] train_loss: 0.03951 valid_loss: 0.04138 .
[ 8/50] train_loss: 0.03924 valid_loss: 0.03971 checkpoint!
[ 9/50] train_loss: 0.03897 valid_loss: 0.04038 .
[10/50] train_loss: 0.03886 valid_loss: 0.03971 checkpoint!
[11/50] train_loss: 0.03869 valid_loss: 0.04031 .
[12/50] train_loss: 0.03858 valid_loss: 0.04115 .
[13/50] train_loss: 0.03848 valid_loss: 0.03912 checkpoint!
[14/50] train_loss: 0.03839 valid_loss: 0.03866 checkpoint!
[15/50] train_loss: 0.03841 valid_loss: 0.04030 .
[16/50] train_loss: 0.03818 valid_loss: 0.03941 .
[17/50] train_loss: 0.03821 valid_loss: 0.03934 .
[18/50] train_loss: 0.03810 valid_loss: 0.03980 .
[19/50] tr

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

# Encode data

In [None]:
encoded_data = conv2d_encode(modelConvAE, scaled_ae_data)

In [None]:
%matplotlib widget
import matplotlib.pyplot as plt
import random

idx = random.randint(0, len(hives_data) - 1)
with torch.no_grad():
    fig, axs = plt.subplots(2, 1)
    frequencies = librosa.fft_frequencies(sr=sample_rate, n_fft=nfft)
    freq_slice = np.where((frequencies < fmax))
    frequencies = frequencies[freq_slice]
    times = (np.arange(0, spectrogram_magnitude.shape[1])*hop_len)/sample_rate    
    
    elem = scaled_ae_data[idx]
    elem = elem[None, None,: ,:]
    elem = torch.Tensor(elem)

    axs[0].pcolormesh(times, frequencies, scaled_ae_data[idx])
    axs[1].pcolormesh(times, frequencies, modelConvAE(elem.to(device)).cpu().numpy().squeeze())

# Add temperature/humidity/gas

In [209]:
hive_under_analysis = hives_ids[0]
start_time = '2020-08-10 00:00:00'
end_time = '2020-09-16 00:00:00'
print(f"extracting data for hive under analysis: {hive_under_analysis} from {start_time} to {end_time}...")

df_hives_sound = pd.DataFrame(data_to_analyze, columns=['spectrogram', 'utc_timestamp', 'name'])
df_hive_sound_ua = df_hives_sound[(df_hives_sound['name'] == hive_under_analysis)
                                 & (df_hives_sound['utc_timestamp'] > start_time)
                                 & (df_hives_sound['utc_timestamp'] < end_time)]
df_hive_sound_ua.set_index('utc_timestamp', inplace=True)
print(f"-> prepared base of {df_hive_sound_ua.count()['spectrogram']} numer of sound spectrum <-")

df_hive_temperature_ua = read_sensor_data('..//measurements//smartulav2//sulmin-10082020-15092020-inside-temperature.csv', hive_under_analysis, start_time, end_time, 'temperature')
df_hive_humidity_ua = read_sensor_data('..//measurements//smartulav2//sulmin-10082020-15092020-inside-humidity.csv', hive_under_analysis, start_time, end_time, 'humidity')
df_hive_alcohol_ua = read_sensor_data('..//measurements//smartulav2//sulmin-10082020-15092020-inside-alcohol.csv', hive_under_analysis, start_time, end_time, 'alcohol')
df_hive_aceton_ua = read_sensor_data('..//measurements//smartulav2//sulmin-10082020-15092020-inside-aceton.csv', hive_under_analysis, start_time, end_time, 'aceton')
df_hive_amon_ua = read_sensor_data('..//measurements//smartulav2//sulmin-10082020-15092020-inside-jon-amonowy.csv', hive_under_analysis, start_time, end_time, 'jon-amonowy')
df_hive_toluen_ua = read_sensor_data('..//measurements//smartulav2//sulmin-10082020-15092020-inside-toluen.csv', hive_under_analysis, start_time, end_time, 'toluen')
df_hive_co2_ua = read_sensor_data('..//measurements//smartulav2//sulmin-10082020-15092020-inside-co2.csv', hive_under_analysis, start_time, end_time, 'co2')
df_hive_siarkowodor_ua = read_sensor_data('..//measurements//smartulav2//sulmin-10082020-15092020-inside-siarkowodor.csv', hive_under_analysis, start_time, end_time, 'siarkowodor')
df_hive_metanotiol_ua = read_sensor_data('..//measurements//smartulav2//sulmin-10082020-15092020-inside-metanotiol.csv', hive_under_analysis, start_time, end_time, 'metanotiol')
df_hive_trimetyloamina_ua = read_sensor_data('..//measurements//smartulav2//sulmin-10082020-15092020-inside-trimetyloamina.csv', hive_under_analysis, start_time, end_time, 'trimetyloamina')
df_hive_wodor_ua = read_sensor_data('..//measurements//smartulav2//sulmin-10082020-15092020-inside-wodor.csv', hive_under_analysis, start_time, end_time, 'wodor')
df_hive_co_ua = read_sensor_data('..//measurements//smartulav2//sulmin-10082020-15092020-inside-co.csv', hive_under_analysis, start_time, end_time, 'co')

print("merging data to sound samples...")
df_hive_data = merge_dataframes_ontimestamp(df_hive_sound_ua,
                                            df_hive_temperature_ua, df_hive_humidity_ua,
                                            df_hive_alcohol_ua, df_hive_aceton_ua, df_hive_amon_ua, df_hive_toluen_ua, df_hive_co2_ua,
                                            df_hive_siarkowodor_ua, df_hive_metanotiol_ua, df_hive_trimetyloamina_ua, df_hive_wodor_ua,
                                            df_hive_co_ua)

print(f"encoding sound samples with autoencoder...", end=' ')
df_hive_data['conv_ae'] = conv2d_encode(modelConvAE, df_hive_data['spectrogram'].to_list())
print(f"finish!")

df_hive_data['feature_vector'] = merge_columns(df_hive_data, ['humidity', 'temperature',
                                                'alcohol', 'aceton', 'jon-amonowy', 'toluen', 'co2', 
                                                'siarkowodor', 'metanotiol', 'trimetyloamina', 'wodor',
                                                'co',
                                                'conv_ae'])

extracting data for hive under analysis: smrpiclient7 from 2020-08-10 00:00:00 to 2020-09-16 00:00:00...
-> prepared base of 3367 numer of sound spectrum <-
got 3488 of temperature samples
got 3488 of humidity samples
got 5152 of alcohol samples
got 5152 of aceton samples
got 5152 of jon-amonowy samples
got 5152 of toluen samples
got 5152 of co2 samples
got 5141 of siarkowodor samples
got 5141 of metanotiol samples
got 5141 of trimetyloamina samples
got 5141 of wodor samples
got 5124 of co samples
merging data to sound samples...
encoding sound samples with autoencoder... finish!


# SVM classification 

In [345]:
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC

night_start = 23
night_end = 3

df_svm = pd.DataFrame(df_hive_data[['feature_vector', 'name']])
is_night_array = (df_svm.index.hour <= night_end) | (df_svm.index.hour >= night_start)
df_svm['is_night'] = is_night_array
df_svm['feature_vector'] = (pd.Series(StandardScaler().fit_transform(df_svm['feature_vector'].values.tolist()).tolist())).values

svc = SVC(kernel='rbf', class_weight='balanced', gamma='auto')
svc.fit(df_svm['feature_vector'].values.tolist(), df_svm['is_night'].values.tolist())
predicted = svc.predict(df_svm['feature_vector'].values.tolist())

predicted_list = np.array(list(map(int, predicted)))
initial_list = np.array(list(map(int, df_svm['is_night'].values.tolist())))

accuracy = 100 - (np.sum(np.abs(predicted_list-initial_list))/len(predicted)*100)
print(f"Total accuracy: {accuracy}")

Total accuracy: 73.98277398277398


In [348]:
df_svm.reset_index()

Unnamed: 0,utc_timestamp,feature_vector,name,is_night
0,2020-08-10 00:03:48,"[-1.333153987296419, 0.5980583360639828, 0.650...",smrpiclient7,True
1,2020-08-10 00:18:52,"[-1.1558990588515046, 0.5717782793435893, 0.59...",smrpiclient7,True
2,2020-08-10 00:33:56,"[-1.093002148758148, 0.3202405935912525, 0.590...",smrpiclient7,True
3,2020-08-10 00:49:01,"[-1.1406513230712971, 0.37092356012343913, 0.5...",smrpiclient7,True
4,2020-08-10 01:04:05,"[-1.38461509555462, 0.4553951710104187, 0.5303...",smrpiclient7,True
...,...,...,...,...
3362,2020-09-15 16:43:20,"[-0.9500546258187007, 1.5084745867347567, 2.27...",smrpiclient7,False
3363,2020-09-15 16:58:24,"[-1.0472589414175244, 1.4033543598531826, 1.37...",smrpiclient7,False
3364,2020-09-15 17:13:28,"[-1.073942479032888, 1.2118853751760308, 1.251...",smrpiclient7,False
3365,2020-09-15 17:28:33,"[-1.1101558515108816, 1.0373107126762742, 1.01...",smrpiclient7,False


In [351]:
df_svm.to_csv('..//measurements//temp2.csv')

# Dimension reduction - now we perform t-SNE and PCA to visualize the data

In [None]:
from sklearn.manifold import TSNE

reduced_ae_tsne = TSNE(n_components=2, perplexity=100, learning_rate=500, verbose=1).fit_transform(output)
reduced_mfcc_tsne = TSNE(n_components=2, perplexity=100, learning_rate=500, verbose=1).fit_transform(pd_ae_data['mfcc'].values.tolist())

In [None]:
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler

mfccs_standarized = StandardScaler().fit_transform(pd_ae_data['mfcc'].values.tolist())
ae_standarized = StandardScaler().fit_transform(output)
reduced_ae_pca = PCA(n_components = 2).fit_transform(ae_standarized)
reduced_mfcc_pca = PCA(n_components = 2).fit_transform(mfccs_standarized)

In [None]:
import matplotlib.pyplot as plt

pca = PCA().fit(ae_standarized)

plt.figure()
plt.plot(np.cumsum(pca.explained_variance_ratio_))
plt.xlabel('number of components')
plt.ylabel('cumulative explained variance');

In [None]:
%matplotlib widget

import matplotlib.pyplot as plt
import numpy as np
from scipy.fftpack import fft, fftfreq

RECORD_TIME = 2
SAMPLE_RATE = 44100

data = hives_data[-10][2][:, 0]/(2.0**31)
datetime = hives_data[-1][0]
fft_data = abs(fft(data))
freqs = fftfreq(int(len(fft_data)/2), 1/SAMPLE_RATE)

fig, axs = plt.subplots(2)
fig.tight_layout(pad=3.0)
axs[0].set_title(f"Sound recording at {datetime} ({RECORD_TIME}s)")
axs[0].grid()
axs[0].set_xlabel('Time [sec]')
axs[0].plot(np.linspace(0, 2, len(data)), data)

axs[1].set_title("Periodogram")
axs[1].set_xticks(np.arange(0, (freqs.size/2), step=100))
axs[1].set_xticklabels(np.arange(0, (freqs.size/2), step=100, dtype=int), rotation=45)
axs[1].grid()
axs[1].set_xlabel('Frequency [Hz]')
axs[1].plot(freqs[1:1500], fft_data[1:1500], 'r')

# ----- Functions/Classes -----

plotting sepctrogram by fucntion 

In [None]:
import matplotlib.pyplot as plt

%matplotlib widget

def plot_spectrogram(frequency, time_x, spectrocgram, title):
    fig = plt.figure(figsize=(6,4))
    plt.title(title)
    plt.pcolormesh(time_x, frequency, spectrocgram)
    plt.ylabel('Frequency [Hz]')
    plt.xlabel('Time [sec]')
    plt.show()

convolutional autoencoder

In [54]:
import torch.nn as nn
import torch.nn.functional as F

class View(nn.Module):
    """ Function for nn.Sequentional to reshape data """
    def __init__(self, shape):
        super(View, self).__init__()
        self.shape = shape

    def forward(self, x):
        return x.view(*self.shape)

def conv2d_block(in_f, out_f, *args, **kwargs):
    """ Function for building convolutional block

        Attributes
            in_f - number of input features
            out_f - number of output features
    """
    return nn.Sequential(
        nn.Conv2d(in_f, out_f, *args, **kwargs),
        nn.BatchNorm2d(out_f),
        nn.ReLU(),
        nn.Dropout2d(p=0.2)
    )

def conv2d_transpose_block(in_f, out_f, *args, **kwargs):
    """ Function for building transpose convolutional block
        
        Attributes
            in_f - number of input features
            out_f - number of output features
    """
    return nn.Sequential(
        nn.ConvTranspose2d(in_f, out_f, *args, **kwargs),
        nn.BatchNorm2d(out_f),
        nn.ReLU(),
        nn.Dropout2d(p=0.2)
    )

######################################
#                                    #
#   Main convolutional autoencoder   #
#                                    #
######################################
class ConvAutoencoder(nn.Module):
    def __init__(self):
        super(ConvAutoencoder, self).__init__()
        
        ## encoder layers ##
        self.encoder = nn.Sequential(
            # [1x256x64] => [64x256x64]
            conv2d_block(1, 64, kernel_size=3, padding=1),
            # [64x256x64] => [64x128x32]
            nn.MaxPool2d(2, 2),
            # [64x128x32] => [32x128x32]
            conv2d_block(64, 32, kernel_size=3, padding=1),
            # [32x128x32] => [32x64x16]
            nn.MaxPool2d(2, 2),
            # [32x64x16] => [16x64x16]
            conv2d_block(32, 16, kernel_size=3, padding=1),
            # [16x64x16] => [16x32x8]
            nn.MaxPool2d(2, 2),
            # [16x32x8] => [4x32x8]
            conv2d_block(16, 4, kernel_size=3, padding=1),
            # [4x32x8] => [4x16x4]
            nn.MaxPool2d(2, 2),
            # [4x16x4] => [1x256]
            nn.Flatten(),
            # [1x256] => [1x64]
            nn.Linear(256, 64),
            nn.ReLU()
        )
        
        ## decoder layers ##
        self.decoder = nn.Sequential(
            # [1x64] => [1x256]
            nn.Linear(64, 256),
            nn.ReLU(),
            # [1x256] => [4x16x4]
            View([-1, 4, 16, 4]),
            # [4x16x4] => [16x32x8]
            conv2d_transpose_block(4, 16, kernel_size=2, stride=2),
            # [16x32x8] => [32x64x16]
            conv2d_transpose_block(16, 32, kernel_size=2, stride=2),
            # [32x64x16] => [64x128x32]
            conv2d_transpose_block(32, 64, kernel_size=2, stride=2),
            # [64x128x32] => [1x256x64]
            nn.ConvTranspose2d(64, 1, kernel_size=2, stride=2),
            nn.Sigmoid()
        )


    def forward(self, x):
        x = self.encoder(x)
        x = self.decoder(x)
        
        return x

basic fully connected autoencoder

In [None]:
import torch
from torch import nn

class autoencoder_basic(nn.Module):
    def __init__(self):
        super(autoencoder_basic, self).__init__()
        self.encoder = nn.Sequential(
            nn.Linear(1499, 512),
            nn.SELU(True),
            nn.Linear(512, 128),
            nn.SELU(True),
            nn.Linear(128, 64),
            nn.SELU(True))
        self.decoder = nn.Sequential(
            nn.Linear(64, 128),
            nn.SELU(True),
            nn.Linear(128, 512),
            nn.SELU(True),
            nn.Linear(128, 1499),
            nn.Sigmoid())

    def forward(self, x):
        x = self.encoder(x)
        x = self.decoder(x)
        return x

function for extracting ecoded data from trained model

In [65]:
def conv2d_encode(model, data_intput):
    """ Function for encoding data and returning encoded """
    dataset_tensor = torch.Tensor(data_intput)
    dataset_tensor = dataset_tensor[:, None, :, :]
    dataset_tensor = tdata.TensorDataset(dataset_tensor)
    dataset = tdata.DataLoader(dataset_tensor, batch_size=32, shuffle=True)
    encoded_data = []
    
    model.eval()
    with torch.no_grad():
        for data in dataset:
            periodograms = data[0].to(device)
            output = modelConvAE.encoder(periodograms).cpu().numpy().squeeze()
            encoded_data.extend(output)
    
    return encoded_data

In [34]:
import pandas as pd

def read_sensor_data(filename, hive_sn, start_time, end_time, sensor_column_name):
    """ Function for reading smartula sensor file (from grafana) and build pandas dataframe """
    df_sensor_data = pd.read_csv(filename, skiprows=1, sep=";")
    
    if hive_sn not in hives_ids:
        print(f"Hive {hive_sn} is not in hives_ids set! Returning empty dataframe")
        return pd.DataFrame()
    
    # change series column to be coherent with sounds
    for hive in hives_ids:
        df_sensor_data.loc[df_sensor_data['Series'].str.contains(hive[2:]), 'Series'] = hive

    # change column names to match sound
    df_sensor_data.columns = ['name', 'utc_timestamp', sensor_column_name]
    # convert timestamp to pandas timestamp
    df_sensor_data['utc_timestamp'] = pd.to_datetime(df_sensor_data['utc_timestamp'].astype(str).str[:-6], format='%Y-%m-%dT%H:%M:%S')
    
    df_sensor_data = df_sensor_data[(df_sensor_data['name'] == hive_sn) & (df_sensor_data['utc_timestamp'] > start_time) & (df_sensor_data['utc_timestamp'] < end_time)]
    df_sensor_data.set_index('utc_timestamp', inplace=True)
    print(f"got {df_sensor_data[sensor_column_name].count()} of {sensor_column_name} samples")
    
    return df_sensor_data

def merge_dataframes_ontimestamp(df_merge_to, *args):
    """ Merging dataframes to df_merge_to """
    df_hive_data_ua = df_merge_to
    for dataframe in args:
        df_hive_data_ua = pd.merge(df_hive_data_ua, dataframe.reindex(df_hive_data_ua.index, method='nearest'), on=['utc_timestamp', 'name'])
        
    return df_hive_data_ua

flatten util

In [208]:
import collections

def flatten(x):
    if isinstance(x, collections.abc.Iterable):
        return [a for i in x for a in flatten(i)]
    else:
        return [x]
    
def merge_columns(dataframe, column_names):
    """ Function for merging columns with irregular size """
    return [flatten(val) for val in  dataframe[column_names].values.tolist()]
    