In [5]:
import os
import librosa
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm

import mlflow
import mlflow.sklearn
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, ExtraTreesClassifier
from sklearn.model_selection import cross_validate, StratifiedKFold
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
from sklearn.preprocessing import LabelEncoder

# Arahkan MLflow ke folder mlruns utama di machineLearning/ (jangan buat sendiri)
ML_DIR = os.path.abspath(os.path.join(os.getcwd(), '..', '..', '..'))
mlflow.set_tracking_uri(f'file:///{os.path.join(ML_DIR, "mlruns")}')

# WAJIB sesuai instruksi tugas
mlflow.set_experiment("Baseline_Audio_Tree")

2026/02/21 21:12:01 INFO mlflow.tracking.fluent: Experiment with name 'Baseline_Audio_Tree' does not exist. Creating a new experiment.


<Experiment: artifact_location='file:///c:\\CAWU4GROUP3\\projects\\projectRoodio\\machineLearning\\mlruns/143684679773775941', creation_time=1771683121329, experiment_id='143684679773775941', last_update_time=1771683121329, lifecycle_stage='active', name='Baseline_Audio_Tree', tags={}>

In [6]:
# Arahkan ke folder data utama di machineLearning/data/raw & raw2
ML_DIR = os.path.abspath(os.path.join(os.getcwd(), '..', '..', '..'))
SOURCE_DIRS = [
    os.path.join(ML_DIR, 'data', 'raw'),
    os.path.join(ML_DIR, 'data', 'raw2')
]
classes = ['angry', 'happy', 'sad', 'relaxed']

def extract_simple_features(file_path):
    # Load audio 30 detik agar cepat
    y, sr = librosa.load(file_path, duration=30)
    rms = np.mean(librosa.feature.rms(y=y))
    zcr = np.mean(librosa.feature.zero_crossing_rate(y=y))
    return [rms, zcr]

features = []
labels = []

for data_path in SOURCE_DIRS:
    print(f"Memulai ekstraksi data dari folder: {data_path}")
    for c in classes:
        class_path = os.path.join(data_path, c)
        if os.path.exists(class_path):
            files = [f for f in os.listdir(class_path) if f.lower().endswith(('.wav', '.mp3'))]
            print(f"Memproses {len(files)} file di kelas {c}...")
            
            for f in tqdm(files, desc=f"Progress {c}"):
                path = os.path.join(class_path, f)
                try:
                    feat = extract_simple_features(path)
                    features.append(feat)
                    labels.append(c)
                except:
                    continue
        else:
            print(f"\u26a0\ufe0f Folder {c} tidak ditemukan di {data_path}!")

# Validasi agar tidak Error 0 Sample lagi
if len(features) > 0:
    X = np.array(features)
    le = LabelEncoder()
    y = le.fit_transform(labels)
    print(f"\u2705 Setup Berhasil! Total data: {len(X)} sampel.")
else:
    print("\u274c ERROR: Data masih kosong. Cek apakah folder data/raw & data/raw2 berisi file audio.")

Memulai ekstraksi data dari folder: c:\CAWU4GROUP3\projects\projectRoodio\machineLearning\data\raw
Memproses 25 file di kelas angry...


Progress angry: 100%|██████████| 25/25 [00:04<00:00,  5.83it/s]


Memproses 25 file di kelas happy...


Progress happy: 100%|██████████| 25/25 [00:01<00:00, 14.64it/s]


Memproses 25 file di kelas sad...


Progress sad: 100%|██████████| 25/25 [00:01<00:00, 14.55it/s]


Memproses 25 file di kelas relaxed...


Progress relaxed: 100%|██████████| 25/25 [00:01<00:00, 14.21it/s]


Memulai ekstraksi data dari folder: c:\CAWU4GROUP3\projects\projectRoodio\machineLearning\data\raw2
Memproses 25 file di kelas angry...


Progress angry: 100%|██████████| 25/25 [00:01<00:00, 13.71it/s]


Memproses 25 file di kelas happy...


Progress happy: 100%|██████████| 25/25 [00:01<00:00, 15.03it/s]


Memproses 25 file di kelas sad...


Progress sad: 100%|██████████| 25/25 [00:01<00:00, 14.51it/s]


Memproses 25 file di kelas relaxed...


Progress relaxed: 100%|██████████| 25/25 [00:01<00:00, 13.94it/s]

✅ Setup Berhasil! Total data: 200 sampel.





In [7]:
# Definisi model baseline
models = {
    "Random_Forest": RandomForestClassifier(n_estimators=100, random_state=42),
    "Gradient_Boosting": GradientBoostingClassifier(n_estimators=100, random_state=42),
    "Extra_Trees": ExtraTreesClassifier(n_estimators=100, random_state=42)
}

# 5-Fold Cross Validation
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

for model_name, model in models.items():
    with mlflow.start_run(run_name=model_name):
        print(f"Menjalankan Eksperimen: {model_name}")
        
        # Cross Validation
        scoring = ['accuracy', 'precision_macro', 'recall_macro', 'f1_macro']
        cv_results = cross_validate(model, X, y, cv=skf, scoring=scoring)
        
        # Logging Metrik
        mlflow.log_param("model_type", model_name)
        mlflow.log_metric("Accuracy", np.mean(cv_results['test_accuracy']))
        mlflow.log_metric("F1_Score", np.mean(cv_results['test_f1_macro']))
        
        # Confusion Matrix
        model.fit(X, y)
        y_pred = model.predict(X)
        cm = confusion_matrix(y, y_pred)
        
        fig, ax = plt.subplots(figsize=(8, 6))
        disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=classes)
        disp.plot(cmap='viridis', ax=ax)
        plt.title(f"Baseline {model_name}")
        
        # Simpan Artifact
        img_name = f"cm_{model_name}.png"
        plt.savefig(img_name)
        mlflow.log_artifact(img_name)
        plt.close()
        
        # Log Model
        mlflow.sklearn.log_model(model, "model")
        print(f"Berhasil Log {model_name} ke MLflow.")

print("\n--- SEMUA TUGAS SELESAI ---")

Menjalankan Eksperimen: Random_Forest




Berhasil Log Random_Forest ke MLflow.
Menjalankan Eksperimen: Gradient_Boosting




Berhasil Log Gradient_Boosting ke MLflow.
Menjalankan Eksperimen: Extra_Trees




Berhasil Log Extra_Trees ke MLflow.

--- SEMUA TUGAS SELESAI ---
