In [None]:
import streamlit as st
import pandas as pd
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt

# =========================
# JUDUL APLIKASI
# =========================
st.title("üìä Clustering Kelayakan Pendidikan Indonesia")
st.write(
    "Aplikasi ini mengelompokkan provinsi di Indonesia berdasarkan "
    "kondisi kelayakan pendidikan menggunakan metode K-Means Clustering."
)

# =========================
# LOAD DATA
# =========================
@st.cache_data
def load_data():
    return pd.read_excel("kelayakan-pendidikan-indonesia-cleaned.xlsx")

df = load_data()

st.subheader("üìÑ Dataset Setelah Data Cleaning")
st.dataframe(df)

# =========================
# PILIH FITUR
# =========================
features = [
    "Siswa",
    "Putus Sekolah",
    "Guru_Kepsek_S1_Keatas",
    "Ruang_Kelas_Baik"
]

X = df[features]

# =========================
# STANDARDISASI
# =========================
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# =========================
# PILIH JUMLAH CLUSTER
# =========================
st.sidebar.header("‚öôÔ∏è Pengaturan Model")
k = st.sidebar.slider("Jumlah Cluster", 2, 5, 3)

# =========================
# TRAIN MODEL K-MEANS
# =========================
kmeans = KMeans(n_clusters=k, random_state=42)
df["Cluster"] = kmeans.fit_predict(X_scaled)

# =========================
# TAMPILKAN HASIL CLUSTER
# =========================
st.subheader("üìå Hasil Clustering")
st.dataframe(df[["Provinsi", "Cluster"]])

# =========================
# RINGKASAN TIAP CLUSTER
# =========================
st.subheader("üìä Rata-rata Parameter per Cluster")
cluster_summary = df.groupby("Cluster")[features].mean()
st.dataframe(cluster_summary)

# =========================
# VISUALISASI SEDERHANA
# =========================
st.subheader("üìà Visualisasi Cluster (2D)")

fig, ax = plt.subplots()
scatter = ax.scatter(
    df["Guru_Kepsek_S1_Keatas"],
    df["Putus Sekolah"],
    c=df["Cluster"]
)

ax.set_xlabel("Guru & Kepala Sekolah ‚â• S1")
ax.set_ylabel("Putus Sekolah")
ax.set_title("Clustering Kelayakan Pendidikan")

st.pyplot(fig)

# =========================
# INTERPRETASI
# =========================
st.subheader("üß† Interpretasi Cluster")
st.write(
    "Cluster terbentuk berdasarkan kemiripan pola dari jumlah siswa, "
    "tingkat putus sekolah, kualitas guru, dan kondisi ruang kelas. "
    "Cluster dengan putus sekolah tinggi dan kualitas guru rendah "
    "cenderung menunjukkan kelayakan pendidikan yang lebih rendah."
)


ModuleNotFoundError: No module named 'streamlit'