# Atmospheric Data - preparation 

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

In [2]:
hives_ids = [1300001, 1300002, 1400001, 1400002]
DATA_INIT = True

In [3]:
if DATA_INIT:
    dfh_hives = [pd.read_csv(f"measurements/{hive_id}/humidity.csv") for hive_id in hives_ids]
    dft_hives = [pd.read_csv(f"measurements/{hive_id}/temperature.csv") for hive_id in hives_ids]
    dfh_hivesWithoutDuplicates = [dfh_hive.drop_duplicates(subset=['timestamp'], keep=False) for dfh_hive in dfh_hives]
    dft_hivesWithoutDuplicates = [dft_hive.drop_duplicates(subset=['timestamp'], keep=False) for dft_hive in dft_hives]

    for idx, hive_id in enumerate(hives_ids):
        print(f"Hive no. {hives_ids[idx]} | humidity temperature dataset size : {dfh_hivesWithoutDuplicates[idx].shape} {dft_hivesWithoutDuplicates[idx].shape}")

    df_hive = [pd.merge(
        dfh_hivesWithoutDuplicates[idx], dft_hivesWithoutDuplicates[idx], on='timestamp', suffixes=(f"_humidity_{hive_id}",f"_temperature_{hive_id}"))
               for idx, hive_id in enumerate(hives_ids)]

    total = 0
    for atmosphere_data in df_hive:
        atmosphere_data['timestamp'] = pd.to_datetime(atmosphere_data['timestamp'], format='%Y-%m-%dT%H-%M-%S').sort_values()
        atmosphere_data.set_index('timestamp', inplace=True)
        print(f"Atmospheric data after merge: {atmosphere_data.shape}")
        total += atmosphere_data.shape[0]
    
    print(f"Total atmoshpere dataset size: {total}")


Hive no. 1300001 | humidity temperature dataset size : (6776, 2) (6786, 2)
Hive no. 1300002 | humidity temperature dataset size : (6865, 2) (6867, 2)
Hive no. 1400001 | humidity temperature dataset size : (2444, 2) (2457, 2)
Hive no. 1400002 | humidity temperature dataset size : (1948, 2) (1949, 2)
Atmospheric data after merge: (6696, 2)
Atmospheric data after merge: (6778, 2)
Atmospheric data after merge: (2418, 2)
Atmospheric data after merge: (1936, 2)
Total atmoshpere dataset size: 17828


# Sound Data - preparation

In [None]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import glob
import pandas as pd
from tqdm import tqdm
from sklearn import preprocessing

In [None]:
scaler = preprocessing.MinMaxScaler()
sound_hive_list = []
sound_files = [f for f in glob.glob("measurements\\1300001\\sound*.csv")]
for file in tqdm(sound_files):
    df_samples = pd.read_csv(file)
    if(len(df_samples.index) == 3000 and max(df_samples['samples'].values) < 4500):
        sound_hive_list.append(df_samples['samples'].values)
        
hiveSound = np.array(scaler.fit_transform(sound_hive_list), dtype="float32")
hiveSound = list(zip(hiveSound, [np.eye(2)[0] for x in range(len(sound_hive_list))]))

sound_outdoor_list = []
sound_files = [f for f in glob.glob("measurements\\1300002\\sound*.csv")]
for file in tqdm(sound_files):
    df_samples = pd.read_csv(file)
    if(len(df_samples.index) == 3000 and max(df_samples['samples'].values) < 4500):
        sound_outdoor_list.append(df_samples['samples'].values)

outdoorSound = np.array(scaler.fit_transform(sound_outdoor_list), dtype="float32")
outdoorSound = list(zip(outdoorSound, [np.eye(2)[1] for x in range(len(sound_outdoor_list))]))

sound_labeled = hiveSound + outdoorSound

print("Size of hive sound data: ", len(hiveSound))
print("Size of outdoor sound data: ", len(outdoorSound))

np.random.shuffle(sound_labeled)
np.save("sound_data_training.npy", sound_labeled)

In [None]:
training_data_sound = np.load("sound_data_training.npy", allow_pickle=True)

# AUTOENCODER - BASIC

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
num_epochs = 50
learning_rate = 1e-4

class autoencoder(nn.Module):
    def __init__(self):
        super(autoencoder, self).__init__()
        self.encoder = nn.Sequential(
            nn.Linear(3000, 2048),
            nn.ReLU(True),
            nn.Linear(2048, 1024),
            nn.ReLU(True),
            nn.Linear(1024, 512),
            nn.ReLU(True),
            nn.Linear(512, 256))
        self.decoder = nn.Sequential(
            nn.Linear(256, 512),
            nn.ReLU(True),
            nn.Linear(512, 1024),
            nn.ReLU(True),
            nn.Linear(1024, 2048),
            nn.ReLU(True),
            nn.Linear(2048, 3000), nn.Tanh())

    def forward(self, x):
        x = self.encoder(x)
        x = self.decoder(x)
        return x

In [None]:
import torch.utils.data as utils

sound_dataset = utils.TensorDataset(torch.Tensor([x[0] for x in training_data_sound]),
                                   torch.Tensor([x[1] for x in training_data_sound])) # create your datset
                                    
print("Length of complete sound dataset is", len(sound_dataset))
sound_trainset = torch.utils.data.DataLoader(sound_dataset, batch_size=20, shuffle=True)

In [None]:
model = autoencoder()
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=1e-6)

for epoch in range(num_epochs):
    for data in tqdm(sound_trainset):
        X, y = data
        model.zero_grad()
        output = model(X)
        loss = criterion(output, X)
        loss.backward()
        optimizer.step()
    # ===================log========================
    print(f"epoch [{epoch}/{num_epochs}], loss:{loss.item()}")

In [None]:
torch.save(model.state_dict(), 'autoencoder-basic-model.pth')

### Evaluation

In [None]:
model.load_state_dict(torch.load('autoencoder-basic-model.pth'))

In [None]:
with torch.no_grad():
    for data in tqdm(sound_trainset):
        X, y = data
        output = model(X)
        for idx, i in enumerate(output):
            if torch.argmax(i) == y[idx]:
                correct += 1
            total += 1

# AUTOENCODER CNN

In [None]:
a = [1,2,3,4,5,6,7,7,8,98,90,0]
print(a[-3:])

# PyTorch

# ---------------


In [None]:
import torch
import torchvision
import torch.nn as nn
import torch.nn.functional as F
from torchvision import transforms, datasets

In [None]:
train = datasets.MNIST("", train=True, download=True, transform = transforms.Compose([transforms.ToTensor()]))
test = datasets.MNIST("", train=False, download=True, transform = transforms.Compose([transforms.ToTensor()]))

In [None]:
print(train)

In [None]:
trainset = torch.utils.data.DataLoader(train, batch_size=20, shuffle=True)
testset = torch.utils.data.DataLoader(test, batch_size=20, shuffle=True)

In [None]:
class Net(nn.Module):
    
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(28*28, 64)
        self.fc2 = nn.Linear(64, 64)
        self.fc3 = nn.Linear(64, 64)
        self.fc4 = nn.Linear(64, 10)
        
    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        x = self.fc4(x)
        return F.log_softmax(x, dim=1)
    
    def weight_reset(m):
        if isinstance(m, nn.Linear):
            m.reset_parameters()
        
        
net = Net()


In [None]:
import torch.optim as optim

optimizer = optim.Adam(net.parameters(), lr=1e-3)
EPOCHS = 6

net.weight_reset()

for epoch in range(EPOCHS):
    for data in trainset:
        X, y = data
        net.zero_grad()
        output = net(X.view(-1, 784))
        loss = F.nll_loss(output, y)
        loss.backward()
        optimizer.step()
    print("Current loss is: ", loss.item())
    if loss < 1e-5:
        print("Loss threshold obtained!")
        break
        

In [None]:
correct = 0
total = 0

with torch.no_grad():
    for data in testset:
        X, y = data
        output = net(X.view(-1, 784))
        for idx, i in enumerate(output):
            if torch.argmax(i) == y[idx]:
                correct += 1
            total += 1
            
print("Accuracy on test data: ", round(correct/total, 3))

In [None]:
correct = 0
total = 0

with torch.no_grad():
    for data in trainset:
        X, y = data
        output = net(X.view(-1, 784))
        for idx, i in enumerate(output):
            if torch.argmax(i) == y[idx]:
                correct += 1
            total += 1
            
print("Accuracy on train data: ", round(correct/total, 3))