In [1]:
import csv
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import os
from sklearn.model_selection import train_test_split
import seaborn as sns 
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
import torch
from torch.utils.data import TensorDataset, DataLoader ,random_split
import torch.nn as nn
import torch.optim as optim
from tqdm.auto import tqdm
from sklearn.preprocessing import MinMaxScaler


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
def create_torch_dataset():
    all_data = []
    scaler = MinMaxScaler()
    columns = ['Input Voltage', 'Output Voltage', 'Tachometer']
    num_rows = 122824
    for i in range(1, 561):
        temp_df = pd.read_csv(f'../PHM09_competition_1/Run_{i}.csv', names=columns, nrows=num_rows)
        normalized_data = scaler.fit_transform(temp_df)
        temp_tensor = torch.tensor(normalized_data, dtype=torch.float32)
        temp_tensor = temp_tensor.unsqueeze(dim=0)  # Adding a channel dimension
        all_data.append(temp_tensor)
    
    all_data_tensor = torch.stack(all_data, dim=0)
    return all_data_tensor

tensor_t = create_torch_dataset()

In [3]:
tensor_t.shape

torch.Size([560, 1, 122824, 3])

In [4]:
class Autoencoder(nn.Module):
    def __init__(self):
        super(Autoencoder, self).__init__()
        # Encoder
        self.encoder = nn.Sequential(
            nn.Conv1d(1, 16, kernel_size=3, stride=2, padding=1), # Adjusted for 1 input channel
            nn.ReLU(),
            nn.Conv1d(16, 32, kernel_size=3, stride=2, padding=1),
            nn.ReLU(),
            nn.Conv1d(32, 64, kernel_size=3, stride=2, padding=1),
            nn.ReLU()
        )
        # Decoder
        self.decoder = nn.Sequential(
            nn.ConvTranspose1d(64, 32, kernel_size=3, stride=2, padding=1, output_padding=1),
            nn.ReLU(),
            nn.ConvTranspose1d(32, 16, kernel_size=3, stride=2, padding=1, output_padding=1),
            nn.ReLU(),
            nn.ConvTranspose1d(16, 1, kernel_size=3, stride=2, padding=1, output_padding=1), # Adjusted for 1 output channel
            nn.Sigmoid() # Assuming the input data is normalized between 0 and 1
        )

    def forward(self, x):
        x = self.encoder(x)
        x = self.decoder(x)
        return x


# Assuming you have a model, optimizer, and loss function defined
model = Autoencoder()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
loss_fn = nn.MSELoss()

def train_step(model, data, loss_fn, optim):
    train_loss = 0
    model.train()
    for X in data:
        y_pred = model(X)
        loss = loss_fn(y_pred, X)
        train_loss += loss.item()
        optim.zero_grad()
        loss.backward()
        optim.step()

    train_loss /= len(data)
    print(f"Train loss: {train_loss:.5f}")



# Prepare DataLoaders
tensor_t = tensor_t.view(560, 1, -1)  # Reshape tensor
train_size = int(0.8 * len(tensor_t))
val_size = len(tensor_t) - train_size
train_dataset, val_dataset = random_split(tensor_t, [train_size, val_size])

train_dataloader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_dataloader = DataLoader(val_dataset, batch_size=32)

# Initialize the Autoencoder model
model = Autoencoder()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
loss_fn = nn.MSELoss()

# Training loop
epochs = 3

for epoch in tqdm(range(epochs), desc="Training Epochs"):
    print(f"Epoch: {epoch + 1}\n---------")
    train_step(model=model, data=train_dataloader, loss_fn=loss_fn, optim=optimizer)

# # After training, you can use the model to compute reconstruction errors
# # and set a threshold for classifying data points as normal or anomalies
# # This is part of your post-training analysis

Training Epochs:   0%|                                                                           | 0/3 [00:00<?, ?it/s]

Epoch: 1
---------


Training Epochs:  33%|██████████████████████                                            | 1/3 [03:15<06:30, 195.02s/it]

Train loss: 0.07175
Epoch: 2
---------


Training Epochs:  67%|████████████████████████████████████████████                      | 2/3 [05:26<02:37, 157.63s/it]

Train loss: 0.06435
Epoch: 3
---------


Training Epochs: 100%|██████████████████████████████████████████████████████████████████| 3/3 [07:45<00:00, 155.04s/it]

Train loss: 0.04714





In [5]:
def calculate_threshold(model, data_loader):
    model.eval()
    errors = []
    with torch.no_grad():
        for X in data_loader:
            reconstructed = model(X)
            error = torch.mean(torch.abs(X - reconstructed), dim=[1, 2])
            errors.extend(error.tolist())

    errors = torch.tensor(errors)
    threshold = errors.mean() + errors.std()
    print(threshold.item())
    return threshold.item()

def eval_model(model, data_loader, threshold):
    model.eval()
    run_conditions = []
    with torch.no_grad():
        for X in data_loader:  # Directly use the data without unpacking
            # If X is a list or tuple, get the first element as the data tensor
            if isinstance(X, (list, tuple)):
                X = X[0]

            reconstructed = model(X)
            error = torch.mean(torch.abs(X - reconstructed), dim=[1, 2])
            condition = (error > threshold).int()
            # Assuming each batch corresponds to one run
            run_condition = condition.any().item()  # 1 if any sample in the run is an anomaly
            run_conditions.append(run_condition)

    # Create DataFrame with run_id and condition
    result_df = pd.DataFrame({
        'Run_id': range(1, len(run_conditions) + 1),
        'Condition': run_conditions
    })

    return result_df

    
# data_loader = train_dataloader
full_data_loader = DataLoader(TensorDataset(tensor_t), batch_size=1)

# Compute the threshold
threshold = calculate_threshold(model, val_dataloader)

# Evaluate the model
result_df = eval_model(model, full_data_loader, threshold)
result_df

0.1599484086036682


Unnamed: 0,Run_id,Condition
0,1,False
1,2,False
2,3,False
3,4,False
4,5,False
...,...,...
555,556,False
556,557,False
557,558,False
558,559,False


In [6]:
result_df['Condition'].value_counts()

Condition
False    452
True     108
Name: count, dtype: int64

In [7]:
len(result_df)

560

In [8]:
tensor_t.shape

torch.Size([560, 1, 368472])