<a href="https://colab.research.google.com/github/trisskmasarahh/Machine-Learning_Ganjil_2025/blob/main/tugas_praktikum_.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [4]:
# ==========================================================
# KLASIFIKASI SUARA (voice.csv) DENGAN SVM
# ==========================================================
from pathlib import Path
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score


# ==========================================================
# 1. Memuat Dataset
# ==========================================================
file_path = Path("/content/drive/MyDrive/Dataset_mesin/praktikum11/voice.csv")
if not file_path.exists():
    raise FileNotFoundError(f"File tidak ditemukan: {file_path.resolve()}")

voice_df = pd.read_csv(file_path)

# ==========================================================
# 2. Pra-pemrosesan Data
# ==========================================================
# Pastikan kolom target bernama 'label'
if 'label' not in voice_df.columns:
    raise KeyError("Kolom target 'label' tidak ditemukan di dataset voice.csv")

# Ubah kolom fitur ke numerik, hilangkan nilai NaN
feature_cols = [col for col in voice_df.columns if col != 'label']
voice_df[feature_cols] = voice_df[feature_cols].apply(pd.to_numeric, errors='coerce')
voice_df.dropna(inplace=True)

# Ubah label ke bentuk numerik (female=1, male=0)
mapping = {'female': 1, 'male': 0}
voice_df['label'] = voice_df['label'].map(mapping)

if voice_df['label'].isna().any():
    raise ValueError("Ada label selain 'male' dan 'female'. Periksa kembali file CSV-nya.")

# Pisahkan fitur dan target
X = voice_df[feature_cols].values
y = voice_df['label'].astype(int).values

# ==========================================================
# 3. Menyiapkan Pengujian SVM
# ==========================================================
split_ratios = [0.3, 0.2]  # test_size untuk 70:30 dan 80:20
kernel_configs = [
    {"name": "linear", "params": {"kernel": "linear", "random_state": 42}},
    {"name": "poly",   "params": {"kernel": "poly", "degree": 3, "random_state": 42}},
    {"name": "rbf",    "params": {"kernel": "rbf", "random_state": 42}},
]

# ==========================================================
# 4. Pelatihan & Evaluasi Model
# ==========================================================
hasil = []
for ratio in split_ratios:
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=ratio, stratify=y, random_state=42
    )
    for kernel in kernel_configs:
        model = make_pipeline(StandardScaler(), SVC(**kernel["params"]))
        model.fit(X_train, y_train)

        acc_train = accuracy_score(y_train, model.predict(X_train))
        acc_test = accuracy_score(y_test, model.predict(X_test))

        hasil.append({
            "split": f"{int((1-ratio)*100)}:{int(ratio*100)}",
            "kernel": kernel["name"],
            "train_acc": acc_train,
            "test_acc": acc_test
        })

# ==========================================================
# 5. Tampilkan Ringkasan
# ==========================================================
result_df = pd.DataFrame(hasil).sort_values(by=["split", "kernel"]).reset_index(drop=True)
print("\n=== RINGKASAN HASIL KLASIFIKASI VOICE (SVM) ===")
print(result_df)

# (Opsional) Tampilkan versi pivot untuk perbandingan cepat
try:
    display(result_df.pivot(index="kernel", columns="split", values="test_acc"))
except Exception:
    pass


=== RINGKASAN HASIL KLASIFIKASI VOICE (SVM) ===
   split  kernel  train_acc  test_acc
0  70:30  linear   0.977898  0.969506
1  70:30    poly   0.967975  0.949527
2  70:30     rbf   0.984664  0.981073
3  80:20  linear   0.977901  0.968454
4  80:20    poly   0.968035  0.952681
5  80:20     rbf   0.986188  0.979495


split,70:30,80:20
kernel,Unnamed: 1_level_1,Unnamed: 2_level_1
linear,0.969506,0.968454
poly,0.949527,0.952681
rbf,0.981073,0.979495
