# Pytorch Autoencoder

### Bibliotheken

In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
import torch
from torch import nn
from torch.utils.data import DataLoader, TensorDataset
from torchvision import datasets
from torchvision.transforms import ToTensor, Lambda
from sklearn.metrics import accuracy_score, precision_score, recall_score
from sklearn.model_selection import train_test_split

### Datenaufbereitung

In [None]:
# Download the dataset
dataframe = pd.read_csv('http://storage.googleapis.com/download.tensorflow.org/data/ecg.csv', header=None)
print(dataframe.head(10))
labels = dataframe.iloc[:, -1].values
data = dataframe.iloc[:, 0:-1].values
# Train-, Test-Splitt
train_data, test_data, train_labels, test_labels = train_test_split(data, labels, test_size=0.3, random_state=21)
# Torch-Array
train_labels = torch.from_numpy(np.array(train_labels, dtype='int64')).ravel()
train_data = torch.from_numpy(np.array(train_data, dtype='float32'))
test_labels = torch.from_numpy(np.array(test_labels, dtype='int64')).ravel()
test_data = torch.from_numpy(np.array(test_data, dtype='float32'))
# Min-, Max-Scaler
min_val = torch.min(train_data)
max_val = torch.max(train_data)

train_data = (train_data - min_val) / (max_val - min_val)
test_data = (test_data - min_val) / (max_val - min_val)

        0         1         2         3         4         5         6    \
0 -0.112522 -2.827204 -3.773897 -4.349751 -4.376041 -3.474986 -2.181408   
1 -1.100878 -3.996840 -4.285843 -4.506579 -4.022377 -3.234368 -1.566126   
2 -0.567088 -2.593450 -3.874230 -4.584095 -4.187449 -3.151462 -1.742940   
3  0.490473 -1.914407 -3.616364 -4.318823 -4.268016 -3.881110 -2.993280   
4  0.800232 -0.874252 -2.384761 -3.973292 -4.338224 -3.802422 -2.534510   
5 -1.507674 -3.574550 -4.478011 -4.408275 -3.321242 -2.105171 -1.481048   
6 -0.297161 -2.766635 -4.102185 -4.589669 -4.219357 -3.650443 -2.300518   
7  0.446769 -1.507397 -3.187468 -4.507462 -4.604201 -3.636115 -2.311604   
8  0.087631 -1.753490 -3.304473 -4.704657 -4.686415 -3.611817 -2.267268   
9 -0.832281 -1.700368 -2.257301 -2.853671 -2.853301 -2.701487 -2.285726   

        7         8         9    ...       131       132       133       134  \
0 -1.818286 -1.250522 -0.477492  ...  0.792168  0.933541  0.796958  0.578621   
1 -0.992258 -0

### Normal-, Anomalous-Datenset

In [None]:
train_labels = train_labels.bool()                 # True and False
test_labels = test_labels.bool()                   # True and False

normal_train_data = train_data[train_labels]       # nur data
normal_test_data = test_data[test_labels]          # nur data

anomalous_train_data = train_data[~train_labels]
anomalous_test_data = test_data[~test_labels]

print(train_labels)
print(train_labels.shape)
print(normal_train_data.shape)
print(anomalous_train_data.shape)
print(normal_train_data)
print(anomalous_train_data)

tensor([False,  True,  True,  ...,  True,  True,  True])
torch.Size([3498])
torch.Size([2059, 140])
torch.Size([1439, 140])
tensor([[0.6904, 0.5696, 0.4886,  ..., 0.6001, 0.6144, 0.5490],
        [0.5479, 0.3175, 0.1942,  ..., 0.6879, 0.6435, 0.4178],
        [0.4297, 0.2905, 0.2481,  ..., 0.4669, 0.3874, 0.2770],
        ...,
        [0.5028, 0.2960, 0.1607,  ..., 0.6465, 0.6927, 0.6155],
        [0.6613, 0.5073, 0.3443,  ..., 0.6281, 0.5354, 0.5423],
        [0.3631, 0.1959, 0.2313,  ..., 0.6587, 0.6686, 0.5002]])
tensor([[0.5103, 0.3682, 0.2967,  ..., 0.5061, 0.5475, 0.5130],
        [0.5290, 0.4899, 0.4836,  ..., 0.2317, 0.2502, 0.4504],
        [0.5202, 0.4174, 0.3648,  ..., 0.4513, 0.4966, 0.5709],
        ...,
        [0.5324, 0.4437, 0.4283,  ..., 0.3895, 0.4198, 0.6029],
        [0.5899, 0.5213, 0.4700,  ..., 0.3557, 0.4057, 0.5473],
        [0.5239, 0.4297, 0.3870,  ..., 0.3893, 0.4072, 0.5136]])


### Modell

In [None]:
# Datenset
train_tensor = TensorDataset(train_data, train_data)
test_tensor = TensorDataset(test_data, test_data)
# DataLoader
train_dataloader = DataLoader(train_tensor, batch_size=32)
test_dataloader = DataLoader(test_tensor, batch_size=32)
# Model
class NeuralNetwork(nn.Module):
    def __init__(self):
        super(NeuralNetwork, self).__init__()
        # self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(140, 64),
            nn.Tanh(),
            nn.Linear(64, 32),
            nn.Tanh(),
            nn.Linear(32, 16),
            nn.Tanh(),
            #nn.Linear(16, 2),
            #nn.Tanh(),
            #nn.Linear(2, 16),
            nn.Tanh(),
            nn.Linear(16, 32),
            nn.Tanh(),
            nn.Linear(32, 64),
            nn.Tanh(),
            nn.Linear(64, 140)
        )

    def forward(self, x):
        # x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits

model = NeuralNetwork()
model

NeuralNetwork(
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=140, out_features=64, bias=True)
    (1): Tanh()
    (2): Linear(in_features=64, out_features=32, bias=True)
    (3): Tanh()
    (4): Linear(in_features=32, out_features=16, bias=True)
    (5): Tanh()
    (6): Tanh()
    (7): Linear(in_features=16, out_features=32, bias=True)
    (8): Tanh()
    (9): Linear(in_features=32, out_features=64, bias=True)
    (10): Tanh()
    (11): Linear(in_features=64, out_features=140, bias=True)
  )
)

### Training

In [None]:
def train_loop(train_dataloader, model, loss_fn, optimizer):
    size = len(train_dataloader.dataset)
    for batch, (X, y) in enumerate(train_dataloader):
        # Compute prediction and loss
        pred = model(X)
        loss = loss_fn(pred, y)
        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        if batch % 100 == 0:
            loss, current = loss.item(), batch * len(X)
            print(f"loss, Train: {loss:>7f}  [{current:>5d}/{size:>5d}]")

### Test

In [None]:
def test_loop(test_dataloader, model, loss_fn):
    size = len(test_dataloader.dataset)
    test_loss, correct = 0, 0

    with torch.no_grad():
        for batch, (X, y) in enumerate(test_dataloader):
            pred = model(X)
            test_loss += loss_fn(pred, y).item()

    test_loss /= size
    print(f"Avg loss, Test: {test_loss:>8f} \n")

### Parameter

In [None]:
learning_rate = 1e-02
epochs = 200

loss_fn = nn.L1Loss() # MAE
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

### Run Modell

In [None]:
# Start Modell
for t in range(epochs):                                            # Anzahl Epochen aus "epochs"
    print(f"Epoch {t+1}\n-------------------------------")
    train_loop(train_dataloader, model, loss_fn, optimizer)        # Funktion train_loop mit Parameter
    test_loop(test_dataloader, model, loss_fn)                     # Funktion test_loop mit Parameter
print("Done!")

Epoch 1
-------------------------------
loss, Train: 0.565128  [    0/ 3498]
loss, Train: 0.045342  [ 3200/ 3498]
Avg loss, Test: 0.001271 

Epoch 2
-------------------------------
loss, Train: 0.044513  [    0/ 3498]
loss, Train: 0.036781  [ 3200/ 3498]
Avg loss, Test: 0.000929 

Epoch 3
-------------------------------
loss, Train: 0.032480  [    0/ 3498]
loss, Train: 0.031211  [ 3200/ 3498]
Avg loss, Test: 0.000829 

Epoch 4
-------------------------------
loss, Train: 0.029345  [    0/ 3498]
loss, Train: 0.031933  [ 3200/ 3498]
Avg loss, Test: 0.000819 

Epoch 5
-------------------------------
loss, Train: 0.029056  [    0/ 3498]
loss, Train: 0.029294  [ 3200/ 3498]
Avg loss, Test: 0.000829 

Epoch 6
-------------------------------
loss, Train: 0.028988  [    0/ 3498]
loss, Train: 0.027896  [ 3200/ 3498]
Avg loss, Test: 0.000751 

Epoch 7
-------------------------------
loss, Train: 0.026422  [    0/ 3498]
loss, Train: 0.028471  [ 3200/ 3498]
Avg loss, Test: 0.000732 

Epoch 8
-----

### Modell reconstructions

In [None]:
loss_fn = nn.L1Loss(reduction='none') # mean standard

with torch.no_grad():
  reconstructions = model(train_data)
  #train_loss = torch.abs(reconstructions - train_data).data
  train_loss = loss_fn(train_data.data, reconstructions)
  print(train_loss)

tensor([[0.0144, 0.0465, 0.0135,  ..., 0.0427, 0.0606, 0.1442],
        [0.1080, 0.0804, 0.0065,  ..., 0.0263, 0.0173, 0.0117],
        [0.0137, 0.0233, 0.0393,  ..., 0.0574, 0.0111, 0.0926],
        ...,
        [0.0056, 0.0114, 0.0437,  ..., 0.0301, 0.0067, 0.0756],
        [0.0127, 0.0286, 0.0344,  ..., 0.0605, 0.0549, 0.0336],
        [0.0356, 0.0457, 0.0188,  ..., 0.0039, 0.0369, 0.0101]])


In [None]:
threshold = torch.mean(train_loss) + torch.std(train_loss)   # Threshold mean + std aus Training
print("Threshold: ", threshold)

Threshold:  tensor(0.0268)


In [None]:
from sklearn.metrics import accuracy_score, precision_score, recall_score

def predict(model, data, threshold):
  reconstructions = model(data)
  loss_fn = nn.L1Loss(reduction='none')
  loss = torch.mean(loss_fn(reconstructions, data), dim=1)
  print(loss.shape)
  return torch.less(loss, threshold).long()

def print_stats(predictions, labels):
  print("Accuracy = {}".format(accuracy_score(labels, preds)))   # preds = return aus def predict(), labels = labels
  print("Precision = {}".format(precision_score(labels, preds)))
  print("Recall = {}".format(recall_score(labels, preds)))

https://developers.google.com/machine-learning/crash-course/classification/precision-and-recall

In [None]:
preds = predict(model, test_data, threshold) #threshold 0.099
print_stats(preds, test_labels)

torch.Size([1500])
Accuracy = 0.5833333333333334
Precision = 0.5806451612903226
Recall = 0.9837209302325581
