In [None]:
import pandas as pd
import numpy as np
from scipy.stats import pearsonr

In [None]:
# 1. LOAD DATA
df_guru = pd.read_csv("files/data_guru_bersih.csv")
df_siswa = pd.read_csv("files/data_siswa_bersih.csv")

df_guru.rename(columns={"Mean_LD": "Mean_LD_Guru"}, inplace=True)
df_siswa.rename(columns={"Mean_CT": "Mean_CT_Siswa"}, inplace=True)

# 2. CARI SEKOLAH YANG SAMA
def key_school(name):
    if pd.isna(name):
        return ""
    return " ".join(str(name).lower().strip().split()[:3])

df_guru["sekolah_key"] = df_guru["Asal Instansi"].apply(key_school)
df_siswa["sekolah_key"] = df_siswa["SekolahNama"].apply(key_school)

sekolah_sama = sorted(set(df_guru["sekolah_key"]) & set(df_siswa["sekolah_key"]))

print("Sekolah yang ada di kedua file:")
sekolah_sama

In [None]:
# 3. LOOP SEMUA SEKOLAH SAMA
hasil = []

for target in sekolah_sama:
    print(f"\n==============================")
    print(f"SEKOLAH: {target}")
    print(f"==============================")

    # Filter data
    guru_filt = df_guru[df_guru["sekolah_key"] == target]
    siswa_filt = df_siswa[df_siswa["sekolah_key"] == target]

    print("Jumlah guru  :", len(guru_filt))
    print("Jumlah siswa :", len(siswa_filt))

    if len(guru_filt) == 0 or len(siswa_filt) == 0:
        print(">>> SKIP (data kurang)")
        hasil.append({
            "Sekolah": target,
            "r": None,
            "p": None,
            "Interpretasi": "Tidak bisa dihitung (data kurang)"
        })
        continue

    # 4. RATA-RATA PER LEVEL
    guru_per_level = (
        guru_filt.groupby("Level_LD")["Mean_LD_Guru"]
        .agg(["count", "mean"])
        .rename(columns={"count": "jumlah_guru", "mean": "mean_LD"})
        .reset_index()
    )

    siswa_per_level = (
        siswa_filt.groupby("Level_CT")["Mean_CT_Siswa"]
        .agg(["count", "mean"])
        .rename(columns={"count": "jumlah_siswa", "mean": "mean_CT"})
        .reset_index()
    )

    print("\n=== Rata-rata LD Guru per Level ===")
    print(guru_per_level)

    print("\n=== Rata-rata CT Siswa per Level ===")
    print(siswa_per_level)

    # 5. MERGE LEVEL
    guru_per_level["Level"] = guru_per_level["Level_LD"].str.lower()
    siswa_per_level["Level"] = siswa_per_level["Level_CT"].str.lower()

    df_merge = pd.merge(
        guru_per_level[["Level", "mean_LD"]],
        siswa_per_level[["Level", "mean_CT"]],
        on="Level",
        how="inner"
    )

    print("\n=== DATA UNTUK KORELASI PER LEVEL ===")
    print(df_merge)

    # 6. KORELASI
    if len(df_merge) >= 3:
        r, p = pearsonr(df_merge["mean_LD"], df_merge["mean_CT"])

        r_fmt = f"{r:.3f}"
        p_fmt = f"{p:.3f}"

        print("\n=== HASIL KORELASI PER LEVEL ===")
        print("r :", r_fmt)
        print("p :", p_fmt)

        # Interpretasi
        interpret = "Kuat dan signifikan" if p < 0.05 else "Kuat tapi tidak signifikan"

        hasil.append({
            "Sekolah": target,
            "r": float(r_fmt),
            "p": float(p_fmt),
            "Interpretasi": interpret
        })

    else:
        print("\n=== HASIL KORELASI PER LEVEL ===")
        print("r : None")
        print("p : None")

        hasil.append({
            "Sekolah": target,
            "r": None,
            "p": None,
            "Interpretasi": "Tidak bisa dihitung (level kurang)"
        })

In [None]:
# 4. TABEL RINGKAS
df_output = pd.DataFrame(hasil)
print("\n=== TABEL RINGKAS KORELASI PER SEKOLAH ===")
print(df_output.to_string(index=False))


In [None]:
# -----------------------------
# BUANG SEKOLAH TIDAK SIGNIFIKAN
# -----------------------------
df_signif = df_output[df_output["p"] < 0.05]

print("\n=== SEKOLAH SIGNIFIKAN (Dipakai Lanjut) ===")
print(df_signif)

if df_signif.empty:
    print("\nTidak ada sekolah signifikan â†’ korelasi per level TIDAK BISA dihitung.")
else:
    sekolah_valid = df_signif["Sekolah"].tolist()
    df_guru_valid = df_guru[df_guru["sekolah_key"].isin(sekolah_valid)]
    df_siswa_valid = df_siswa[df_siswa["sekolah_key"].isin(sekolah_valid)]

    # -----------------------------
    # C. KORELASI PER LEVEL setelah buang sekolah non-signif
    # -----------------------------
    guru_level_all = df_guru_valid.groupby("Level_LD")["Mean_LD_Guru"].mean().reset_index(name="mean_LD")
    siswa_level_all = df_siswa_valid.groupby("Level_CT")["Mean_CT_Siswa"].mean().reset_index(name="mean_CT")

    guru_level_all["Level"] = guru_level_all["Level_LD"].str.lower()
    siswa_level_all["Level"] = siswa_level_all["Level_CT"].str.lower()

    df_merge_level = pd.merge(
        guru_level_all[["Level", "mean_LD"]],
        siswa_level_all[["Level", "mean_CT"]],
        on="Level",
        how="inner"
    )

    print("\n=== DATA LEVEL SETELAH SEKOLAH NON-SIGNIF DIHILANGKAN ===")
    print(df_merge_level)

    # Korelasi final
    if len(df_merge_level) >= 3:
        r_final, p_final = pearsonr(df_merge_level["mean_LD"], df_merge_level["mean_CT"])
        print("\n=== KORELASI FINAL PER LEVEL ===")
        print("r =", r_final)
        print("p =", p_final)
    else:
        print("\n=== KORELASI FINAL PER LEVEL ===")
        print("Tidak bisa dihitung (level kurang).")


In [None]:
# PAKAI LEVEL SEDANG DAN TINGGI SAJA
# Target sekolah & level yang dipakai
target_sekolah = ["sd kristen banjarsari", "sd kristen manahan", "smp kristen 1", "smk kristen 1"]
level_allowed = ["sedang", "tinggi"]

hasil = []

for sekolah in target_sekolah:
    print(f"\n==============================")
    print(f"SEKOLAH: {sekolah}")
    print(f"==============================")
    
    # Filter data guru & siswa per sekolah
    guru_filt = df_guru[df_guru["sekolah_key"] == sekolah]
    siswa_filt = df_siswa[df_siswa["sekolah_key"] == sekolah]
    
    # Filter level sedang & tinggi
    guru_filt = guru_filt[guru_filt["Level_LD"].str.strip().str.lower().isin(level_allowed)]
    siswa_filt = siswa_filt[siswa_filt["Level_CT"].str.strip().str.lower().isin(level_allowed)]
    
    # Hitung rata-rata per level
    guru_per_level = guru_filt.groupby("Level_LD")["Mean_LD_Guru"].mean().reset_index()
    siswa_per_level = siswa_filt.groupby("Level_CT")["Mean_CT_Siswa"].mean().reset_index()
    
    # Merge data per level
    df_merge = pd.merge(
        guru_per_level.rename(columns={"Level_LD": "Level", "Mean_LD_Guru": "mean_LD"}),
        siswa_per_level.rename(columns={"Level_CT": "Level", "Mean_CT_Siswa": "mean_CT"}),
        on="Level",
        how="inner"
    )
    
    print("\n=== Data untuk Korelasi (level sedang & tinggi) ===")
    print(df_merge)
    
    # Korelasi Pearson
    if len(df_merge) >= 2:  # minimal 2 level
        r, p = pearsonr(df_merge["mean_LD"], df_merge["mean_CT"])
        interpret = "Kuat dan signifikan" if p < 0.05 else "Kuat tapi tidak signifikan"
        r_val, p_val = round(r, 3), round(p, 3)
    else:
        r_val, p_val, interpret = None, None, "Tidak bisa dihitung (level kurang)"
    
    hasil.append({
        "Sekolah": sekolah,
        "r": r_val,
        "p": p_val,
        "Interpretasi": interpret
    })
    
    print("\n=== Hasil Korelasi ===")
    print(f"r = {r_val}, p = {p_val} -> {interpret}")