In [3]:
import numpy as np
import pandas as pd
from sklearn.metrics import (
    silhouette_score, 
    davies_bouldin_score, 
    calinski_harabasz_score
)
from sklearn.preprocessing import StandardScaler
from sklearn.utils import resample
from scipy.spatial.distance import cdist
from scipy import stats
import warnings
warnings.filterwarnings('ignore')

In [2]:
# ============================================================================
# LOAD DATA
# ============================================================================
OUTPUT_DIR = '/home/kali/AI/'

df_features = pd.read_csv(OUTPUT_DIR + 'fcm_features_raw.csv', index_col=0)
cluster_labels = np.load(OUTPUT_DIR + 'fcm_cluster_labels_c2.npy')
membership_matrix = np.load(OUTPUT_DIR + 'fcm_membership_c2.npy')
cluster_centers = np.load(OUTPUT_DIR + 'fcm_cluster_centers_c2.npy')

In [4]:
scaler = StandardScaler()
X_normalized = scaler.fit_transform(df_features.values)

provinces = df_features.index.tolist()

print(f"\n✓ Loaded: {len(provinces)} provinces, {X_normalized.shape[1]} features")
print(f"✓ Cluster distribution: C0={np.sum(cluster_labels==0)}, C1={np.sum(cluster_labels==1)}")



✓ Loaded: 34 provinces, 50 features
✓ Cluster distribution: C0=12, C1=22


In [5]:
print("\n" + "="*80)
print("EVALUATION 1: INTERNAL VALIDATION METRICS")
print("="*80)

# 1.1 Silhouette Score (-1 to 1, higher better)
sil_score = silhouette_score(X_normalized, cluster_labels)
print(f"\n✓ Silhouette Score: {sil_score:.4f}")
print(f"  Interpretation: ", end='')
if sil_score > 0.5:
    print("Excellent separation")
elif sil_score > 0.25:
    print("Good separation (reasonable structure)")
elif sil_score > 0:
    print("Weak structure (overlapping clusters)")
else:
    print("Poor clustering (misclassified points)")

# 1.2 Davies-Bouldin Index (0 to ∞, lower better)
dbi_score = davies_bouldin_score(X_normalized, cluster_labels)
print(f"\n✓ Davies-Bouldin Index: {dbi_score:.4f}")
print(f"  Interpretation: ", end='')
if dbi_score < 1.0:
    print("Excellent (well-separated clusters)")
elif dbi_score < 2.0:
    print("Good (acceptable separation)")
else:
    print("Poor (high overlap between clusters)")

# 1.3 Calinski-Harabasz Score (variance ratio, higher better)
ch_score = calinski_harabasz_score(X_normalized, cluster_labels)
print(f"\n✓ Calinski-Harabasz Score: {ch_score:.4f}")
print(f"  Interpretation: Higher = better defined clusters")

# 1.4 Partition Coefficient (FCM-specific, 0 to 1, higher better)
u = membership_matrix
fpc = np.sum(u ** 2) / X_normalized.shape[0]
print(f"\n✓ Fuzzy Partition Coefficient (FPC): {fpc:.4f}")
print(f"  Interpretation: ", end='')
if fpc > 0.7:
    print("High crisp (low fuzziness)")
elif fpc > 0.5:
    print("Moderate fuzziness (acceptable)")
else:
    print("High fuzziness (overlapping clusters)")

# 1.5 Partition Entropy (FCM-specific, 0 to log(c), lower better)
pe = -np.sum(u * np.log(u + 1e-10)) / X_normalized.shape[0]
print(f"\n✓ Partition Entropy (PE): {pe:.4f}")
print(f"  Max entropy (c=2): {np.log(2):.4f}")
print(f"  Interpretation: ", end='')
if pe < np.log(2) * 0.5:
    print("Low entropy (crisp assignments)")
elif pe < np.log(2) * 0.7:
    print("Moderate entropy (acceptable)")
else:
    print("High entropy (fuzzy assignments)")




EVALUATION 1: INTERNAL VALIDATION METRICS

✓ Silhouette Score: 0.2445
  Interpretation: Weak structure (overlapping clusters)

✓ Davies-Bouldin Index: 1.6812
  Interpretation: Good (acceptable separation)

✓ Calinski-Harabasz Score: 10.6152
  Interpretation: Higher = better defined clusters

✓ Fuzzy Partition Coefficient (FPC): 0.5608
  Interpretation: Moderate fuzziness (acceptable)

✓ Partition Entropy (PE): 0.6297
  Max entropy (c=2): 0.6931
  Interpretation: High entropy (fuzzy assignments)


In [6]:
# EVALUATION 2: CLUSTER COMPACTNESS & SEPARATION
# ============================================================================
print("\n" + "="*80)
print("EVALUATION 2: CLUSTER COMPACTNESS & SEPARATION")
print("="*80)

# 2.1 Within-cluster sum of squares (WCSS)
wcss_total = 0
for c_id in [0, 1]:
    cluster_points = X_normalized[cluster_labels == c_id]
    center = cluster_centers[c_id]
    wcss = np.sum((cluster_points - center) ** 2)
    wcss_total += wcss
    print(f"\n✓ Cluster {c_id} WCSS: {wcss:.2f}")
    print(f"  Average distance to center: {np.sqrt(wcss / len(cluster_points)):.4f}")

print(f"\n✓ Total WCSS: {wcss_total:.2f}")

# 2.2 Between-cluster separation (distance between centers)
center_distance = np.linalg.norm(cluster_centers[0] - cluster_centers[1])
print(f"\n✓ Distance between cluster centers: {center_distance:.4f}")
print(f"  Interpretation: Larger = better separation")

# 2.3 Dunn Index (min inter-cluster / max intra-cluster, higher better)
# Simplified version
def dunn_index(X, labels, centers):
    # Min inter-cluster distance
    inter_dist = np.linalg.norm(centers[0] - centers[1])
    
    # Max intra-cluster distance
    max_intra = 0
    for c_id in [0, 1]:
        cluster_points = X[labels == c_id]
        if len(cluster_points) > 1:
            dists = cdist(cluster_points, cluster_points)
            max_intra = max(max_intra, dists.max())
    
    return inter_dist / max_intra if max_intra > 0 else 0

dunn = dunn_index(X_normalized, cluster_labels, cluster_centers)
print(f"\n✓ Dunn Index: {dunn:.4f}")
print(f"  Interpretation: ", end='')
if dunn > 0.1:
    print("Good separation (inter >> intra)")
else:
    print("Moderate separation")




EVALUATION 2: CLUSTER COMPACTNESS & SEPARATION

✓ Cluster 0 WCSS: 775.55
  Average distance to center: 8.0392

✓ Cluster 1 WCSS: 583.26
  Average distance to center: 5.1490

✓ Total WCSS: 1358.81

✓ Distance between cluster centers: 4.3700
  Interpretation: Larger = better separation

✓ Dunn Index: 0.2530
  Interpretation: Good separation (inter >> intra)


In [7]:
# ============================================================================
# EVALUATION 3: MEMBERSHIP QUALITY ANALYSIS
# ============================================================================
print("\n" + "="*80)
print("EVALUATION 3: FUZZY MEMBERSHIP QUALITY")
print("="*80)

# 3.1 Crisp vs Fuzzy assignment ratio
max_memberships = membership_matrix.max(axis=0)
crisp_mask = max_memberships > 0.8
crisp_count = crisp_mask.sum()
fuzzy_count = (~crisp_mask).sum()

print(f"\n✓ Assignment quality:")
print(f"  Crisp (membership > 0.8): {crisp_count}/{len(provinces)} ({crisp_count/len(provinces)*100:.1f}%)")
print(f"  Fuzzy (membership ≤ 0.8): {fuzzy_count}/{len(provinces)} ({fuzzy_count/len(provinces)*100:.1f}%)")

# 3.2 Membership statistics per cluster
print(f"\n✓ Membership statistics:")
for c_id in [0, 1]:
    mem = membership_matrix[c_id, :]
    assigned_count = (cluster_labels == c_id).sum()
    
    print(f"\n  Cluster {c_id} (n={assigned_count}):")
    print(f"    Mean membership: {mem[cluster_labels == c_id].mean():.4f}")
    print(f"    Std membership:  {mem[cluster_labels == c_id].std():.4f}")
    print(f"    Min membership:  {mem[cluster_labels == c_id].min():.4f}")
    print(f"    Max membership:  {mem[cluster_labels == c_id].max():.4f}")

# 3.3 Ambiguous cases (closest to 0.5-0.5)
ambiguity_scores = np.abs(membership_matrix[0, :] - 0.5)
most_ambiguous_idx = np.argsort(ambiguity_scores)[:5]

print(f"\n✓ Top 5 most ambiguous provinces (closest to 50-50):")
for rank, idx in enumerate(most_ambiguous_idx, 1):
    prov = provinces[idx]
    mem_c0 = membership_matrix[0, idx]
    mem_c1 = membership_matrix[1, idx]
    assigned = cluster_labels[idx]
    print(f"  {rank}. {prov:25s}: C0={mem_c0:.3f}, C1={mem_c1:.3f} → Assigned to C{assigned}")


EVALUATION 3: FUZZY MEMBERSHIP QUALITY

✓ Assignment quality:
  Crisp (membership > 0.8): 2/34 (5.9%)
  Fuzzy (membership ≤ 0.8): 32/34 (94.1%)

✓ Membership statistics:

  Cluster 0 (n=12):
    Mean membership: 0.6089
    Std membership:  0.0462
    Min membership:  0.5251
    Max membership:  0.6910

  Cluster 1 (n=22):
    Mean membership: 0.6720
    Std membership:  0.0988
    Min membership:  0.5062
    Max membership:  0.8281

✓ Top 5 most ambiguous provinces (closest to 50-50):
  1. DKI Jakarta              : C0=0.494, C1=0.506 → Assigned to C1
  2. Sulawesi Tengah          : C0=0.493, C1=0.507 → Assigned to C1
  3. Riau                     : C0=0.483, C1=0.517 → Assigned to C1
  4. Sulawesi Utara           : C0=0.478, C1=0.522 → Assigned to C1
  5. Kalimantan Selatan       : C0=0.525, C1=0.475 → Assigned to C0


In [8]:
print("\n" + "="*80)
print("EVALUATION 4: CLUSTER STABILITY (BOOTSTRAP)")
print("="*80)

print("\nRunning bootstrap validation (n=50 iterations)...")

import skfuzzy as fuzz
from sklearn.metrics import adjusted_rand_score

stability_scores = []
n_bootstrap = 50

for i in range(n_bootstrap):
    # Resample with replacement
    indices = resample(np.arange(len(X_normalized)), random_state=i)
    X_boot = X_normalized[indices]
    
    try:
        # Run FCM on bootstrapped data
        cntr_boot, u_boot, _, _, _, _, _ = fuzz.cluster.cmeans(
            X_boot.T,
            c=2,
            m=2.0,
            error=0.005,
            maxiter=150,
            init=None,
            seed=42+i
        )
        
        # Get cluster labels
        labels_boot = np.argmax(u_boot, axis=0)
        
        # Compare with original (for overlapping samples)
        ari = adjusted_rand_score(cluster_labels[indices], labels_boot)
        stability_scores.append(ari)
        
    except:
        continue
    
    if (i+1) % 10 == 0:
        print(f"  Progress: {i+1}/50 iterations completed")

ari_mean = np.mean(stability_scores)
ari_std = np.std(stability_scores)

print(f"\n✓ Stability Assessment:")
print(f"  Mean ARI: {ari_mean:.4f} ± {ari_std:.4f}")
print(f"  Interpretation: ", end='')
if ari_mean > 0.8:
    print("Highly stable (robust clusters)")
elif ari_mean > 0.6:
    print("Moderately stable (acceptable)")
else:
    print("Low stability (sensitive to sampling)")



EVALUATION 4: CLUSTER STABILITY (BOOTSTRAP)

Running bootstrap validation (n=50 iterations)...
  Progress: 10/50 iterations completed
  Progress: 20/50 iterations completed
  Progress: 30/50 iterations completed
  Progress: 40/50 iterations completed
  Progress: 50/50 iterations completed

✓ Stability Assessment:
  Mean ARI: 0.6647 ± 0.2314
  Interpretation: Moderately stable (acceptable)


In [9]:
# ============================================================================
# EVALUATION 5: CONVERGENCE ANALYSIS
# ============================================================================
print("\n" + "="*80)
print("EVALUATION 5: CONVERGENCE ANALYSIS")
print("="*80)

# Run FCM multiple times with different initializations
convergence_iters = []
convergence_fpc = []

print("\nTesting convergence with 10 different random initializations...")

for seed in range(10):
    try:
        cntr_test, u_test, _, _, _, p_test, fpc_test = fuzz.cluster.cmeans(
            X_normalized.T,
            c=2,
            m=2.0,
            error=0.005,
            maxiter=150,
            init=None,
            seed=seed
        )
        
        convergence_iters.append(p_test)
        convergence_fpc.append(fpc_test)
        
    except:
        continue

print(f"\n✓ Convergence statistics (n={len(convergence_iters)} runs):")
print(f"  Mean iterations: {np.mean(convergence_iters):.1f} ± {np.std(convergence_iters):.1f}")
print(f"  Min iterations:  {np.min(convergence_iters)}")
print(f"  Max iterations:  {np.max(convergence_iters)}")
print(f"\n✓ FPC consistency:")
print(f"  Mean FPC: {np.mean(convergence_fpc):.4f} ± {np.std(convergence_fpc):.4f}")
print(f"  Interpretation: ", end='')
if np.std(convergence_fpc) < 0.01:
    print("Highly consistent (stable solution)")
else:
    print("Some variability (sensitive to initialization)")



EVALUATION 5: CONVERGENCE ANALYSIS

Testing convergence with 10 different random initializations...

✓ Convergence statistics (n=10 runs):
  Mean iterations: 21.5 ± 6.5
  Min iterations:  10
  Max iterations:  33

✓ FPC consistency:
  Mean FPC: 0.5607 ± 0.0001
  Interpretation: Highly consistent (stable solution)


In [10]:
print("\n" + "="*80)
print("EVALUATION 6: FEATURE IMPORTANCE FOR CLUSTERING")
print("="*80)

# Feature groups
feature_groups = {
    'Mean': [c for c in df_features.columns if c.startswith('Mean_')],
    'CV': [c for c in df_features.columns if c.startswith('CV_')],
    'Trend': [c for c in df_features.columns if c.startswith('Trend_')],
    'Autocorr': [c for c in df_features.columns if c.startswith('Autocorr_')],
    'Skewness': [c for c in df_features.columns if c.startswith('Skewness_')]
}

print("\nFeature type contribution to cluster separation (T-test):")
print("-" * 70)

feature_importance = []
for ftype, cols in feature_groups.items():
    # Average features per province
    c0_data = df_features[cluster_labels == 0][cols].mean(axis=1).values
    c1_data = df_features[cluster_labels == 1][cols].mean(axis=1).values
    
    # T-test
    t_stat, p_val = stats.ttest_ind(c0_data, c1_data)
    
    # Effect size (Cohen's d)
    pooled_std = np.sqrt((np.var(c0_data) + np.var(c1_data)) / 2)
    cohens_d = (c0_data.mean() - c1_data.mean()) / pooled_std if pooled_std > 0 else 0
    
    significance = "***" if p_val < 0.001 else "**" if p_val < 0.01 else "*" if p_val < 0.05 else "ns"
    
    feature_importance.append({
        'Feature_Type': ftype,
        't_statistic': t_stat,
        'p_value': p_val,
        'cohens_d': cohens_d,
        'significance': significance
    })
    
    print(f"{ftype:12s}: t={t_stat:7.3f}, p={p_val:.4f} {significance}, Cohen's d={cohens_d:6.3f}")

df_importance = pd.DataFrame(feature_importance).sort_values('cohens_d', key=abs, ascending=False)

print("\nFeature importance ranking (by effect size |Cohen's d|):")
for i, row in df_importance.iterrows():
    print(f"  {row['Feature_Type']:12s}: |d|={abs(row['cohens_d']):.3f}")




EVALUATION 6: FEATURE IMPORTANCE FOR CLUSTERING

Feature type contribution to cluster separation (T-test):
----------------------------------------------------------------------
Mean        : t=  6.730, p=0.0000 ***, Cohen's d= 2.336
CV          : t= -9.255, p=0.0000 ***, Cohen's d=-3.366
Trend       : t= -2.372, p=0.0239 *, Cohen's d=-0.869
Autocorr    : t= -4.342, p=0.0001 ***, Cohen's d=-1.386
Skewness    : t= -0.470, p=0.6413 ns, Cohen's d=-0.168

Feature importance ranking (by effect size |Cohen's d|):
  CV          : |d|=3.366
  Mean        : |d|=2.336
  Autocorr    : |d|=1.386
  Trend       : |d|=0.869
  Skewness    : |d|=0.168


In [13]:
print("\n" + "="*80)
print("EVALUATION 7: SAVING EVALUATION REPORT")
print("="*80)

eval_summary = {
    'Metric': [
        'Silhouette Score',
        'Davies-Bouldin Index',
        'Calinski-Harabasz Score',
        'Fuzzy Partition Coefficient',
        'Partition Entropy',
        'WCSS Total',
        'Center Distance',
        'Dunn Index',
        'Crisp Assignments (%)',
        'Fuzzy Assignments (%)',
        'Bootstrap Stability (ARI)',
        'Convergence Iterations (mean)',
        'FPC Consistency (std)'
    ],
    'Value': [
        f'{sil_score:.4f}',
        f'{dbi_score:.4f}',
        f'{ch_score:.2f}',
        f'{fpc:.4f}',
        f'{pe:.4f}',
        f'{wcss_total:.2f}',
        f'{center_distance:.4f}',
        f'{dunn:.4f}',
        f'{crisp_count/len(provinces)*100:.1f}%',
        f'{fuzzy_count/len(provinces)*100:.1f}%',
        f'{ari_mean:.4f} ± {ari_std:.4f}',
        f'{np.mean(convergence_iters):.1f} ± {np.std(convergence_iters):.1f}',
        f'{np.std(convergence_fpc):.4f}'
    ],
    'Interpretation': [
        'Good' if sil_score > 0.25 else 'Weak',
        'Good' if dbi_score < 2.0 else 'Poor',
        'Higher is better',
        'Moderate' if fpc > 0.5 else 'High fuzziness',
        'Low' if pe < np.log(2)*0.5 else 'Moderate',
        'Sum of within-cluster variance',
        'Inter-cluster separation',
        'Good' if dunn > 0.1 else 'Moderate',
        'High crisp ratio' if crisp_count/len(provinces) > 0.7 else 'Moderate',
        'Ambiguous assignments',
        'Highly stable' if ari_mean > 0.8 else 'Moderate',
        'Fast convergence' if np.mean(convergence_iters) < 50 else 'Slow',
        'Consistent' if np.std(convergence_fpc) < 0.01 else 'Variable'
    ]
}

df_eval_summary = pd.DataFrame(eval_summary)
df_eval_summary.to_csv(OUTPUT_DIR + 'model_evaluation_summary.csv', index=False)
print(f"\n✓ Saved: model_evaluation_summary.csv")

# Save feature importance
df_importance.to_csv(OUTPUT_DIR + 'feature_importance.csv', index=False)
print(f"✓ Saved: feature_importance.csv")


EVALUATION 7: SAVING EVALUATION REPORT

✓ Saved: model_evaluation_summary.csv
✓ Saved: feature_importance.csv
