In [8]:
import os
import librosa
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm

import mlflow
import mlflow.sklearn
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, ExtraTreesClassifier
from sklearn.model_selection import cross_validate, StratifiedKFold
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
from sklearn.preprocessing import LabelEncoder

# WAJIB sesuai instruksi tugas
mlflow.set_experiment("Baseline_Audio_Tree")

<Experiment: artifact_location='file:d:/CAWU 4 LESGO/M L/LAGU/OneDrive_2026-02-19 (1)/Songs raw (MP3)/mlruns/1', creation_time=1771657861315, experiment_id='1', last_update_time=1771657861315, lifecycle_stage='active', name='Baseline_Audio_Tree', tags={}>

In [9]:
data_path = 'dataset' 
classes = ['Angry', 'Happy', 'Sad', 'Relaxed']

def extract_simple_features(file_path):
    # Load audio 30 detik agar cepat
    y, sr = librosa.load(file_path, duration=30)
    rms = np.mean(librosa.feature.rms(y=y))
    zcr = np.mean(librosa.feature.zero_crossing_rate(y=y))
    return [rms, zcr]

features = []
labels = []

print(f"Memulai ekstraksi data dari folder: {os.path.abspath(data_path)}")

for c in classes:
    class_path = os.path.join(data_path, c)
    if os.path.exists(class_path):
        files = [f for f in os.listdir(class_path) if f.lower().endswith(('.wav', '.mp3'))]
        print(f"Memproses {len(files)} file di kelas {c}...")
        
        for f in tqdm(files, desc=f"Progress {c}"):
            path = os.path.join(class_path, f)
            try:
                feat = extract_simple_features(path)
                features.append(feat)
                labels.append(c)
            except:
                continue
    else:
        print(f"⚠️ Folder {c} tidak ditemukan!")

# Validasi agar tidak Error 0 Sample lagi
if len(features) > 0:
    X = np.array(features)
    le = LabelEncoder()
    y = le.fit_transform(labels)
    print(f"✅ Setup Berhasil! Total data: {len(X)} sampel.")
else:
    print("❌ ERROR: Data masih kosong. Cek apakah folder 'dataset' benar-benar berisi file audio.")

Memulai ekstraksi data dari folder: d:\CAWU 4 LESGO\M L\LAGU\OneDrive_2026-02-19 (1)\Songs raw (MP3)\dataset
Memproses 25 file di kelas Angry...


Progress Angry: 100%|██████████| 25/25 [00:03<00:00,  6.27it/s]


Memproses 25 file di kelas Happy...


Progress Happy: 100%|██████████| 25/25 [00:02<00:00, 12.11it/s]


Memproses 25 file di kelas Sad...


Progress Sad: 100%|██████████| 25/25 [00:02<00:00, 10.72it/s]


Memproses 25 file di kelas Relaxed...


Progress Relaxed: 100%|██████████| 25/25 [00:02<00:00, 11.85it/s]

✅ Setup Berhasil! Total data: 100 sampel.





In [10]:
# Definisi model baseline
models = {
    "Random_Forest": RandomForestClassifier(n_estimators=100, random_state=42),
    "Gradient_Boosting": GradientBoostingClassifier(n_estimators=100, random_state=42),
    "Extra_Trees": ExtraTreesClassifier(n_estimators=100, random_state=42)
}

# 5-Fold Cross Validation
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

for model_name, model in models.items():
    with mlflow.start_run(run_name=model_name):
        print(f"Menjalankan Eksperimen: {model_name}")
        
        # Cross Validation
        scoring = ['accuracy', 'precision_macro', 'recall_macro', 'f1_macro']
        cv_results = cross_validate(model, X, y, cv=skf, scoring=scoring)
        
        # Logging Metrik
        mlflow.log_param("model_type", model_name)
        mlflow.log_metric("Accuracy", np.mean(cv_results['test_accuracy']))
        mlflow.log_metric("F1_Score", np.mean(cv_results['test_f1_macro']))
        
        # Confusion Matrix
        model.fit(X, y)
        y_pred = model.predict(X)
        cm = confusion_matrix(y, y_pred)
        
        fig, ax = plt.subplots(figsize=(8, 6))
        disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=classes)
        disp.plot(cmap='viridis', ax=ax)
        plt.title(f"Baseline {model_name}")
        
        # Simpan Artifact
        img_name = f"cm_{model_name}.png"
        plt.savefig(img_name)
        mlflow.log_artifact(img_name)
        plt.close()
        
        # Log Model
        mlflow.sklearn.log_model(model, "model")
        print(f"Berhasil Log {model_name} ke MLflow.")

print("\n--- SEMUA TUGAS SELESAI ---")

Menjalankan Eksperimen: Random_Forest


  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])


Berhasil Log Random_Forest ke MLflow.
Menjalankan Eksperimen: Gradient_Boosting




Berhasil Log Gradient_Boosting ke MLflow.
Menjalankan Eksperimen: Extra_Trees




Berhasil Log Extra_Trees ke MLflow.

--- SEMUA TUGAS SELESAI ---
