# Unsupervised Machine Sound Anomaly Detection

We are using IsolationForest algorithm

In [None]:
# We’ll pull a small subset (≈15 normal + 10 anomaly WAV files) from the larger dataset for quick experimentation.

import os, zipfile, requests
from tqdm import tqdm

url = "https://huggingface.co/datasets/ejlok1/toyadmos-mini/resolve/main/toyadmos_mini.zip"
dataset_path = "../datasets/audio/toyadmos_mini.zip"
subset_path = "../datasets/audio/toyadmos_subset"

# Download only if missing
if not os.path.exists("toyadmos_mini"):
    print("📥 Downloading ToyADMOS-mini (~90 MB)...")
    r = requests.get(url, stream=True)
    with open(dataset_path, "wb") as f:
        for chunk in tqdm(r.iter_content(chunk_size=1024*1024)):
            if chunk:
                f.write(chunk)
    print("Extracting...")
    with zipfile.ZipFile(dataset_path, "r") as z:
        z.extractall(".")
    print("✅ Dataset extracted.")

# Create subset folder
os.makedirs(f"{subset_path}/normal", exist_ok=True)
os.makedirs(f"{subset_path}/anomaly", exist_ok=True)

import shutil
from glob import glob

# Copy only a few normal/anomaly files into subset
normal_files = sorted(glob("toyadmos_mini/normal/*.wav"))[:15]
anomaly_files = sorted(glob("toyadmos_mini/anomaly/*.wav"))[:10]

for f in normal_files:
    shutil.copy(f, f"{subset_path}/normal/")
for f in anomaly_files:
    shutil.copy(f, f"{subset_path}/anomaly/")

print(f"✅ Subset ready: {len(normal_files)} normal, {len(anomaly_files)} anomaly files.")


📂 Folder structure after:

toyadmos_subset/<br>
 ├── normal/   (≈15 files)<br>
 └── anomaly/  (≈10 files)<br>


Import Libraries

In [None]:
import librosa
import numpy as np
import os
from tqdm import tqdm
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import IsolationForest
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings("ignore")


Feature Extraction Function

In [None]:
# We’ll use multiple audio features techniques that describe different acoustic properties.

def extract_features(file_path):
    y, sr = librosa.load(file_path, duration=3, offset=0.5)
    mfcc = np.mean(librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13).T, axis=0)
    centroid = np.mean(librosa.feature.spectral_centroid(y=y, sr=sr))
    bandwidth = np.mean(librosa.feature.spectral_bandwidth(y=y, sr=sr))
    contrast = np.mean(librosa.feature.spectral_contrast(y=y, sr=sr))
    zcr = np.mean(librosa.feature.zero_crossing_rate(y))
    return np.hstack([mfcc, centroid, bandwidth, contrast, zcr])

Load Normal Data (Training)

In [None]:
TRAIN_PATH = "toyadmos_subset/normal"
X_train = []

for file in tqdm(os.listdir(TRAIN_PATH), desc="Loading normal sounds"):
    if file.endswith(".wav"):
        path = os.path.join(TRAIN_PATH, file)
        X_train.append(extract_features(path))

X_train = np.array(X_train)
print("✅ Normal training samples:", X_train.shape)


Data preprocessing

Scaling

In [None]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)

Train IsolationForest

In [None]:
model = IsolationForest(contamination=0.1, random_state=42)
model.fit(X_train_scaled)

print("✅ IsolationForest model trained on normal data.")

Test on Normal + Anomaly data

In [None]:
TEST_PATHS = {
    "normal": "toyadmos_subset/normal",
    "anomaly": "toyadmos_subset/anomaly"
}

results = []

for label, folder in TEST_PATHS.items():
    for file in os.listdir(folder):
        if file.endswith(".wav"):
            path = os.path.join(folder, file)
            feat = extract_features(path)
            feat_scaled = scaler.transform([feat])
            pred = model.predict(feat_scaled)
            results.append({
                "file": file,
                "true_label": label,
                "pred_label": "anomaly" if pred[0] == -1 else "normal"
            })


Evaluate Performance

In [None]:
import pandas as pd
from sklearn.metrics import confusion_matrix

df = pd.DataFrame(results)
acc = (df["true_label"] == df["pred_label"]).mean()

print(df.head())
print(f"\n✅ Detection Accuracy: {acc*100:.2f}%")

cm = confusion_matrix(df["true_label"], df["pred_label"], labels=["normal", "anomaly"])
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
            xticklabels=["Pred Normal","Pred Anomaly"],
            yticklabels=["True Normal","True Anomaly"])
plt.title("IsolationForest Anomaly Detection (Subset)")
plt.show()

Test a Single Audio File

In [None]:
def predict_sound(file_path):
    feat = extract_features(file_path)
    feat_scaled = scaler.transform([feat])
    pred = model.predict(feat_scaled)
    return "⚠️ Anomalous" if pred[0] == -1 else "✅ Normal"

test_file = "toyadmos_subset/anomaly/sample_00010.wav"
print("🎯 Prediction:", predict_sound(test_file))
