In [None]:
### Load data IMU CSV dan dibersihkan tahap 1###
import pandas as pd
import emoji

# Baca data CSV
df = pd.read_excel('imu_data_baru.xlsx')

# 1. Tampilkan info umum data
print(df.info())
print(df.head())

# 2. Buang kolom yang tidak relevan (ganti 'kolom_tidak_dipakai' sesuai kebutuhan)
# df.drop(columns=['kolom_tidak_dipakai'], inplace=True)

# 3. Tangani nilai hilang (missing values)
# Contoh: isi dengan median, hapus, atau metode lain
# df = df.dropna()  # jika ingin langsung membuang baris yang ada NaN
# df['kolom_angka'] = df['kolom_angka'].fillna(df['kolom_angka'].median())

# 4. Bersihkan data string, misalnya hapus spasi putih atau ubah ke lowercase
# df['nama_kolom'] = df['nama_kolom'].str.strip().str.lower()

# 5. Ubah tipe data jika perlu
# df['tanggal'] = pd.to_datetime(df['tanggal'], errors='coerce')
# df['angka'] = pd.to_numeric(df['angka'], errors='coerce')

# 6. Buang duplikat
df.drop_duplicates(inplace=True)

# 7. Simpan data yang sudah dibersihkan
df.to_csv('data_imu_clean1.csv', index=False)
print("Data berhasil dibersihkan dan disimpan sebagai 'data_imu_clean1.csv'")


In [None]:
### Pembersihan data tahap 2 
import pandas as pd

# Load CSV dan paksa semua nilai ke bentuk numerik
df = pd.read_csv("data_imu_clean1.csv")

# Buang spasi putih di awal/akhir sel
df = df.applymap(lambda x: str(x).strip() if isinstance(x, str) else x)

# Konversi semua kolom ke numeric jika bisa, dan paksa NaN jika gagal
for col in df.columns:
    df[col] = pd.to_numeric(df[col], errors='coerce')

# Drop baris dengan NaN akibat parsing gagal
df.dropna(inplace=True)
df.to_csv("data_imu_oke.csv", index=False)
print(" File bersih disimpan sebagai: data_imu_oke.csv")

print(" Data berhasil dibersihkan dari nilai yang tidak bisa dikonversi.")

In [None]:
### Visualisasi data IMU
import pandas as pd
import matplotlib.pyplot as plt

# Baca file CSV
df = pd.read_csv("data_imu_oke.csv")

# Tampilkan label unik
print("Label yang ditemukan:", df['label'].unique())

# Loop visualisasi per label
for label in sorted(df['label'].unique()):
    subset = df[df['label'] == label].reset_index(drop=True)

    fig, axs = plt.subplots(2, 1, figsize=(14, 6), sharex=True)
    fig.suptitle(f'Sinyal IMU - Label {label}', fontsize=16)

    # Akselerometer
    axs[0].plot(subset['ax'], label='Ax', alpha=0.7)
    axs[0].plot(subset['ay'], label='Ay', alpha=0.7)
    axs[0].plot(subset['az'], label='Az', alpha=0.7)
    axs[0].set_ylabel('Accelerometer (g)')
    axs[0].legend()
    axs[0].grid(True)

    # Gyroscope
    axs[1].plot(subset['gx'], label='Gx', alpha=0.7)
    axs[1].plot(subset['gy'], label='Gy', alpha=0.7)
    axs[1].plot(subset['gz'], label='Gz', alpha=0.7)
    axs[1].set_ylabel('Gyroscope (°/s)')
    axs[1].set_xlabel('Index Sampel')
    axs[1].legend()
    axs[1].grid(True)

    plt.tight_layout()
    plt.show()


In [None]:
subset = subset.iloc[:300]  # atau nilai yang sesuai


In [None]:
axs[0].plot(subset['ax'], label='Ax', color='red')
axs[0].plot(subset['ay'], label='Ay', color='green')
axs[0].plot(subset['az'], label='Az', color='blue')

In [None]:
#### Ekstraksi Fitur per window IMU
import pandas as pd
import numpy as np
from collections import Counter

def extract_features_from_imu(
    filepath,
    fs=50,
    window_duration=2,
    stride_ratio=1.0,
    axis_cols=['ax', 'ay', 'az', 'gx', 'gy', 'gz'],
    label_col='label'
):
    df = pd.read_csv(filepath)
    df = df.dropna()
    
    # Konversi numerik
    for col in axis_cols + [label_col]:
        df[col] = pd.to_numeric(df[col], errors='coerce')
    df = df.dropna(subset=axis_cols + [label_col])

    window_size = int(fs * window_duration)
    stride = int(window_size * stride_ratio)
    features = []

    for start in range(0, len(df) - window_size + 1, stride):
        end = start + window_size
        window = df.iloc[start:end]
        row = {}

        for axis in axis_cols:
            signal = window[axis]
            row[f'{axis}_mean'] = signal.mean()
            row[f'{axis}_std'] = signal.std()
            row[f'{axis}_max'] = signal.max()
            row[f'{axis}_min'] = signal.min()
            row[f'{axis}_range'] = signal.max() - signal.min()
            row[f'{axis}_rms'] = np.sqrt(np.mean(signal**2))
            row[f'{axis}_energy'] = np.sum(signal**2)
            row[f'{axis}_mad'] = np.mean(np.abs(signal - signal.mean()))

        # Label mayoritas
        dominant_label = Counter(window[label_col]).most_common(1)[0][0]
        row['label'] = dominant_label
        features.append(row)

    return pd.DataFrame(features)


In [None]:
### Jalankan fungsi Ekstraksi Fitur per Window
features_df = extract_features_from_imu(
    filepath='data_imu_oke.csv',
    fs=50,
    window_duration=2,
    stride_ratio=0.5  # 50% overlap
)

print(features_df.head())

In [None]:
### Proses SVM _ RBF_ ConMatrix
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns

# Pisahkan fitur dan label
X = features_df.drop(columns=['label'])
y = features_df['label']

# Normalisasi
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Pecah data (tanpa stratify kalau jumlah label tidak merata)
X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y, test_size=0.3, random_state=42
)

# Latih model SVM dengan RBF kernel
model = SVC(kernel='rbf', C=1.0, gamma='scale')
model.fit(X_train, y_train)

# Prediksi
y_pred = model.predict(X_test)

# Evaluasi
print(" Classification Report:")
print(classification_report(y_test, y_pred, zero_division=0))

# Confusion Matrix
cm = confusion_matrix(y_test, y_pred)
plt.figure(figsize=(6, 4))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
plt.title("Confusion Matrix - SVM RBF")
plt.xlabel("Prediksi")
plt.ylabel("Label Sebenarnya")
plt.tight_layout()
plt.show()


In [None]:
import time
import numpy as np
from sklearn.svm import SVC
from sklearn.model_selection import cross_val_score, StratifiedKFold, LeaveOneOut
from sklearn.metrics import make_scorer, accuracy_score, f1_score

def run_cv_pipeline(X, y, model_desc="SVM RBF", C=1.0, gamma='scale', cv_strategy=None):
    from sklearn.model_selection import cross_validate

    if cv_strategy is None:
        if len(y) < 10:
            cv_strategy = LeaveOneOut()
            cv_name = "Leave-One-Out"
        else:
            cv_strategy = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
            cv_name = "Stratified 5-Fold"
    else:
        cv_name = str(cv_strategy)

    clf = SVC(kernel='rbf', C=C, gamma=gamma)

    scoring = {
        'accuracy': make_scorer(accuracy_score),
        'f1_macro': make_scorer(f1_score, average='macro')
    }

    start = time.time()
    scores = cross_validate(clf, X, y, cv=cv_strategy, scoring=scoring, return_train_score=False)
    end = time.time()

    print(f" Model: {model_desc}")
    print(f" Evaluasi: {cv_name}")
    print(f" Akurasi Rerata: {scores['test_accuracy'].mean():.4f} ± {scores['test_accuracy'].std():.4f}")
    print(f" F1-score Macro: {scores['test_f1_macro'].mean():.4f}")
    print(f" Durasi Evaluasi: {end - start:.2f} detik")

    return scores


In [None]:
X_all = X_scaled  # dari features_df
scores_all = run_cv_pipeline(X_all, y, model_desc="SVM Semua Fitur")


In [None]:
# 1. Cek ukuran total dan jumlah kolom
print(" Ukuran dataset:", features_df.shape)

# 2. Distribusi label
print(" Distribusi label:")
print(features_df['label'].value_counts())

# 3. Apakah ada nilai NaN?
print(" Jumlah nilai kosong:", features_df.isnull().sum().sum())

# 4. Jumlah fitur numerik (exclude label)
print(" Jumlah fitur:", features_df.drop(columns=['label']).shape[1])


In [None]:
from sklearn.model_selection import cross_val_score
from sklearn.svm import SVC
import numpy as np

def fitness_function(params, X, y, cv=3):
    from sklearn.svm import SVC
    from sklearn.model_selection import cross_val_score

    results = []
    for C, gamma in params:
        clf = SVC(C=C, gamma=gamma, kernel='rbf')
        score = cross_val_score(clf, X, y, cv=cv).mean()
        results.append(-score)  # PSO minimize, so use negative
    return np.array(results)


def run_pso(X, y, n_particles=20, n_iter=50, cv=3):
    import pyswarms as ps

    # Boundaries for C and gamma
    bounds = ([1e-3, 1e-5], [100, 10])

    optimizer = ps.single.GlobalBestPSO(
        n_particles=n_particles,
        dimensions=2,
        options={'c1': 0.5, 'c2': 0.3, 'w': 0.9},
        bounds=bounds
    )

    best_cost, best_pos = optimizer.optimize(
        fitness_function, n_iter, X=X, y=y, cv=cv
    )

    C_opt, gamma_opt = best_pos
    print(f" Optimal C: {C_opt:.4f}, gamma: {gamma_opt:.6f}")
    return C_opt, gamma_opt


In [None]:
features_df = extract_features_from_imu(
    filepath='data_imu_oke.csv',
    fs=50,
    window_duration=2,
    stride_ratio=0.5  # 50% overlap
)

print(features_df.head())

In [None]:
### Mulai PSO
import numpy as np
import pandas as pd
from sklearn.svm import SVC
from sklearn.model_selection import cross_val_score, StratifiedKFold
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import seaborn as sns

def fitness_function(params, X, y, cv=3):
    results = []
    for C, gamma in params:
        model = SVC(C=C, gamma=gamma, kernel='rbf')
        scores = cross_val_score(model, X, y, cv=cv)
        results.append(-scores.mean())  # PSO minimizes
    return np.array(results)

def run_svm_pso_pipeline(features_df, n_particles=20, n_iter=50, cv=3, save_csv=False, csv_path="svm_pso_log.csv"):
    import pyswarms as ps

    # Step 1: Prepare features and labels
    X = features_df.drop(columns=['label']).values
    y = features_df['label'].values
    X_scaled = StandardScaler().fit_transform(X)

    # Step 2: PSO optimization
    bounds = ([1e-3, 1e-5], [100, 10])  # lower, upper for C and gamma
    optimizer = ps.single.GlobalBestPSO(
        n_particles=n_particles,
        dimensions=2,
        options={'c1': 0.5, 'c2': 0.3, 'w': 0.9},
        bounds=bounds
    )
    print(" Menjalankan PSO untuk optimasi hyperparameter SVM...")
    best_cost, best_pos = optimizer.optimize(
        fitness_function, n_iter, X=X_scaled, y=y, cv=cv
    )
    C_opt, gamma_opt = best_pos
    print(f" Optimal C: {C_opt:.4f}, gamma: {gamma_opt:.6f}")

    # Step 3: Evaluate model
    clf = SVC(kernel='rbf', C=C_opt, gamma=gamma_opt)
    skf = StratifiedKFold(n_splits=cv, shuffle=True, random_state=42)

    all_scores = []
    all_reports = []
    all_conf_matrices = []

    for fold, (train_idx, test_idx) in enumerate(skf.split(X_scaled, y), 1):
        clf.fit(X_scaled[train_idx], y[train_idx])
        y_pred = clf.predict(X_scaled[test_idx])
        score = clf.score(X_scaled[test_idx], y[test_idx])
        report = classification_report(y[test_idx], y_pred, zero_division=0, output_dict=True)
        cm = confusion_matrix(y[test_idx], y_pred)

        all_scores.append(score)
        all_reports.append(report)
        all_conf_matrices.append(cm)

        print(f"\n Fold {fold} Report:")
        print(classification_report(y[test_idx], y_pred, zero_division=0))

        # Confusion Matrix plot
        plt.figure(figsize=(5,4))
        sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
        plt.title(f"Confusion Matrix - Fold {fold}")
        plt.xlabel("Prediksi")
        plt.ylabel("Label Sebenarnya")
        plt.tight_layout()
        plt.show()

    # Logging
    result_df = pd.DataFrame({
        "Fold": list(range(1, cv + 1)),
        "Accuracy": all_scores
    })
    print(f"\n Akurasi Rata-rata CV: {np.mean(all_scores):.4f} ± {np.std(all_scores):.4f}")

    if save_csv:
        result_df.to_csv(csv_path, index=False)
        print(f"Log hasil disimpan ke: {csv_path}")

    return {
        "C": C_opt,
        "gamma": gamma_opt,
        "scores": all_scores,
        "reports": all_reports,
        "conf_matrices": all_conf_matrices
    }


In [None]:
result = run_svm_pso_pipeline(features_df, n_particles=20, n_iter=50, cv=3, save_csv=True)
