In [1]:
!wget -O fan_data.zip "https://zenodo.org/records/3384388/files/0_dB_fan.zip?download=1"
!unzip fan_data.zip -d ./fan_data


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  inflating: ./fan_data/fan/id_00/normal/00000152.wav  
  inflating: ./fan_data/fan/id_00/normal/00000153.wav  
  inflating: ./fan_data/fan/id_00/normal/00000154.wav  
  inflating: ./fan_data/fan/id_00/normal/00000155.wav  
  inflating: ./fan_data/fan/id_00/normal/00000156.wav  
  inflating: ./fan_data/fan/id_00/normal/00000157.wav  
  inflating: ./fan_data/fan/id_00/normal/00000158.wav  
  inflating: ./fan_data/fan/id_00/normal/00000159.wav  
  inflating: ./fan_data/fan/id_00/normal/00000160.wav  
  inflating: ./fan_data/fan/id_00/normal/00000161.wav  
  inflating: ./fan_data/fan/id_00/normal/00000162.wav  
  inflating: ./fan_data/fan/id_00/normal/00000163.wav  
  inflating: ./fan_data/fan/id_00/normal/00000164.wav  
  inflating: ./fan_data/fan/id_00/normal/00000165.wav  
  inflating: ./fan_data/fan/id_00/normal/00000166.wav  
  inflating: ./fan_data/fan/id_00/normal/00000167.wav  
  inflating: ./fan_data/fan/id_00/norma

In [2]:
import os
for root, dirs, files in os.walk('./fan_data'):
    print(f"Found {len(files)} files in {root}")


Found 0 files in ./fan_data
Found 0 files in ./fan_data/fan
Found 0 files in ./fan_data/fan/id_02
Found 359 files in ./fan_data/fan/id_02/abnormal
Found 1016 files in ./fan_data/fan/id_02/normal
Found 0 files in ./fan_data/fan/id_04
Found 348 files in ./fan_data/fan/id_04/abnormal
Found 1033 files in ./fan_data/fan/id_04/normal
Found 0 files in ./fan_data/fan/id_06
Found 361 files in ./fan_data/fan/id_06/abnormal
Found 1015 files in ./fan_data/fan/id_06/normal
Found 0 files in ./fan_data/fan/id_00
Found 407 files in ./fan_data/fan/id_00/abnormal
Found 1011 files in ./fan_data/fan/id_00/normal


In [4]:
import os
import librosa
import numpy as np

def extract_features(audio_file, n_mels=64, frames=5, n_fft=1024, hop_length=512, power=2.0):
    """
    Extract features from an audio file by converting it into a log-mel spectrogram.
    """
    # Load the audio file using librosa
    y, sr = librosa.load(audio_file, sr=None)  # 'sr=None' ensures we use the native sampling rate

    # Compute the mel-spectrogram
    mel_spectrogram = librosa.feature.melspectrogram(y=y, sr=sr, n_fft=n_fft, hop_length=hop_length,
                                                     n_mels=n_mels, power=power)

    # Convert the mel-spectrogram to log scale (log-mel-spectrogram)
    log_mel_spectrogram = librosa.power_to_db(mel_spectrogram, ref=np.max)

    # Stack frames to create feature vector
    feature_vector = np.hstack([log_mel_spectrogram[:, i:i+frames].flatten()
                                for i in range(log_mel_spectrogram.shape[1] - frames + 1)])

    return feature_vector


def load_data_by_model(root_path, models, n_mels=64, frames=5):
    """
    Load data for each fan model, with normal and abnormal labels, and extract features.

    Parameters:
    - root_path: Root directory where the 'fan' and model subfolders are located.
    - models: List of model IDs to load ('id_00', 'id_02', etc.).
    - n_mels: Number of Mel frequency bins for the mel-spectrogram.
    - frames: Number of frames to stack for feature vector.

    Returns:
    - all_features: List of all extracted features across all models.
    - all_labels: List of corresponding labels (0 for normal, 1 for abnormal).
    """
    all_features = []
    all_labels = []

    # Iterate through each model in the dataset
    for model in models:
        model_folder = os.path.join(root_path, "fan", model)

        # Check if the model folder exists
        if not os.path.exists(model_folder):
            continue

        # For each model, we load normal and abnormal files separately
        for condition in ["normal", "abnormal"]:
            condition_folder = os.path.join(model_folder, condition)

            # Check if the condition folder exists and is not empty
            if os.path.exists(condition_folder) and len(os.listdir(condition_folder)) > 0:
                for file in os.listdir(condition_folder):
                    if file.endswith(".wav"):
                        file_path = os.path.join(condition_folder, file)

                        # Extract features from the audio file
                        feature_vector = extract_features(file_path, n_mels=n_mels, frames=frames)

                        # Append the feature vector and label to the lists
                        all_features.append(feature_vector)
                        all_labels.append(0 if condition == "normal" else 1)

    return np.array(all_features), np.array(all_labels)


# Example usage:
root_path = "./fan_data"  # Root directory where fan data is stored
models = ["id_00", "id_02", "id_04", "id_06"]  # List of model IDs to load
features, labels = load_data_by_model(root_path, models)
print(f"Loaded {len(features)} samples.")


Loaded 5550 samples.


In [9]:
from sklearn.model_selection import train_test_split

def split_data(features, labels, test_size=0.2, random_state=42):
    """
    Split the data into training and testing sets.

    Parameters:
    - features: Extracted feature vectors.
    - labels: Corresponding labels (0 for normal, 1 for abnormal).
    - test_size: Proportion of the data to be used as the test set.
    - random_state: Seed for reproducibility.

    Returns:
    - X_train, X_test: Training and testing feature sets.
    - y_train, y_test: Training and testing labels.
    """
    X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=test_size, random_state=random_state)

    # Use only normal data for training
    X_train = X_train[y_train == 0]
    y_train = y_train[y_train == 0]

    return X_train, X_test, y_train, y_test


# Example usage:
X_train, X_test, y_train, y_test = split_data(features, labels)
print(f"Training data: {X_train.shape[0]} samples, Testing data: {X_test.shape[0]} samples")


Training data: 3271 samples, Testing data: 1110 samples


In [6]:
import tensorflow as tf
from tensorflow.keras import layers, models

def build_autoencoder(input_dim):
    """
    Build and compile the autoencoder model.

    Parameters:
    - input_dim: The dimensionality of the input features (flattened log-mel spectrogram).

    Returns:
    - autoencoder: The compiled autoencoder model.
    """
    # Encoder
    input_layer = layers.Input(shape=(input_dim,))
    encoded = layers.Dense(128, activation='relu')(input_layer)
    encoded = layers.Dense(64, activation='relu')(encoded)

    # Latent space
    latent_space = layers.Dense(32, activation='relu')(encoded)

    # Decoder
    decoded = layers.Dense(64, activation='relu')(latent_space)
    decoded = layers.Dense(128, activation='relu')(decoded)
    output_layer = layers.Dense(input_dim, activation='sigmoid')(decoded)

    # Autoencoder model
    autoencoder = models.Model(input_layer, output_layer)

    # Compile the model
    autoencoder.compile(optimizer='adam', loss='mse')

    return autoencoder

# Build and train the autoencoder
input_dim = X_train.shape[1]  # Number of features in each sample (after feature extraction)
autoencoder = build_autoencoder(input_dim)

# Train the autoencoder
autoencoder.fit(X_train, X_train, epochs=10, batch_size=32, validation_split=0.2)


Epoch 1/10
[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m60s[0m 703ms/step - loss: 883.5201 - val_loss: 893.4330
Epoch 2/10
[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m76s[0m 636ms/step - loss: 883.9385 - val_loss: 893.4330
Epoch 3/10
[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m44s[0m 537ms/step - loss: 882.2955 - val_loss: 893.4330
Epoch 4/10
[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m83s[0m 547ms/step - loss: 883.9573 - val_loss: 893.4330
Epoch 5/10
[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m45s[0m 552ms/step - loss: 885.2971 - val_loss: 893.4330
Epoch 6/10
[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m52s[0m 632ms/step - loss: 879.9868 - val_loss: 893.4330
Epoch 7/10
[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m75s[0m 547ms/step - loss: 882.1144 - val_loss: 893.4330
Epoch 8/10
[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m81s[0m 534ms/step - loss: 880.3286 - val_loss: 893.4330


<keras.src.callbacks.history.History at 0x7c62fb7ac610>

In [7]:
def calculate_reconstruction_error(autoencoder, X_test):
    """
    Calculate the reconstruction error for each sample in the test set.

    Parameters:
    - autoencoder: The trained autoencoder model.
    - X_test: The testing feature set.

    Returns:
    - errors: The reconstruction errors for each test sample.
    """
    # Get the reconstructed output from the autoencoder
    reconstructed = autoencoder.predict(X_test)

    # Calculate the mean squared error for each sample
    errors = np.mean(np.square(X_test - reconstructed), axis=1)

    return errors

# Calculate the reconstruction errors for the test set
errors = calculate_reconstruction_error(autoencoder, X_test)

# Set a threshold based on the 95th percentile of the reconstruction errors
threshold = np.percentile(errors, 95)
print(f"Reconstruction error threshold: {threshold}")


[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 95ms/step
Reconstruction error threshold: 1381.7453674316403


In [8]:
def classify_samples(errors, threshold):
    """
    Classify samples as normal or abnormal based on the reconstruction error threshold.

    Parameters:
    - errors: The reconstruction errors for each test sample.
    - threshold: The threshold for classification (error > threshold => abnormal).

    Returns:
    - predictions: List of predicted labels (0 for normal, 1 for abnormal).
    """
    predictions = (errors > threshold).astype(int)
    return predictions

# Classify the test samples
predictions = classify_samples(errors, threshold)

# Evaluate the classification performance
from sklearn.metrics import accuracy_score, confusion_matrix

accuracy = accuracy_score(y_test, predictions)
conf_matrix = confusion_matrix(y_test, predictions)

print(f"Accuracy: {accuracy:.4f}")
print(f"Confusion Matrix:\n{conf_matrix}")


Accuracy: 0.7387
Confusion Matrix:
[[784  20]
 [270  36]]
