<a href="https://colab.research.google.com/github/windyrahayu45/ML-PadangPanjang/blob/main/Deteksi_Anomali_Data_Penduduk_(Fraud_Bansos).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
from sklearn.ensemble import IsolationForest
from sklearn.preprocessing import LabelEncoder, StandardScaler

# 1. Load dataset hasil scoring + segmen
df = pd.read_csv("dtsen_with_segments.csv")

# 2. Encode variabel kategorikal
cat_cols = ["kepemilikan_lahan","kepemilikan_kendaraan",
            "kepemilikan_tabungan","penerima_bansos"]
df_enc = df.copy()
for c in cat_cols:
    df_enc[c] = LabelEncoder().fit_transform(df_enc[c].astype(str))

# 3. Pilih fitur untuk anomaly detection
features = [
    "pendapatan_per_bulan",
    "pengeluaran_per_bulan",
    "kepemilikan_lahan",
    "kepemilikan_kendaraan",
    "kepemilikan_tabungan",
    "penerima_bansos"
]

# 4. Standardisasi
scaler = StandardScaler()
X_scaled = scaler.fit_transform(df_enc[features])

# 5. Model Isolation Forest
iso = IsolationForest(contamination=0.05, random_state=42)
df["anomaly"] = iso.fit_predict(X_scaled)

# Labeling: -1 = anomali, 1 = normal
df["anomaly_label"] = df["anomaly"].map({1:"Normal", -1:"Anomali"})

# 6. Simpan hasil
df.to_csv("dtsen_with_anomalies.csv", index=False)
print("✅ Dataset dengan deteksi anomali disimpan: dtsen_with_anomalies.csv")

# 7. Lihat contoh data mencurigakan
print(df[df["anomaly_label"]=="Anomali"].head(10)[
    ["nik_kepala_keluarga","nama_kepala_keluarga","pendapatan_per_bulan","penerima_bansos","anomaly_label"]
])


✅ Dataset dengan deteksi anomali disimpan: dtsen_with_anomalies.csv
     nik_kepala_keluarga nama_kepala_keluarga  pendapatan_per_bulan  \
2       3201000000000002         Dewi Hidayat               5868707   
5       3201000000000005          Budi Wijaya                560321   
10      3201000000000010          Ayu Saputra                707923   
27      3201000000000027         Fitri Wijaya               5612787   
28      3201000000000028         Tono Hidayat                684042   
53      3201000000000053         Agus Hidayat               5585681   
54      3201000000000054          Siti Wijaya               5303067   
68      3201000000000068         Rina Saputra                748518   
78      3201000000000078         Siti Saputra               5815254   
111     3201000000000111         Budi Saputra                536386   

    penerima_bansos anomaly_label  
2             Tidak       Anomali  
5                Ya       Anomali  
10            Tidak       Anomali  
27    