In [1]:
# ---------------------------------------
# PREDIKSI KANKER PAYUDARA (GANAS / JINAK)
# ---------------------------------------

# 1. Import Library
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
import joblib

# 2. Load Dataset
# Gantilah path di bawah ini jika file CSV kamu ada di folder lain
data = pd.read_csv('data/breast_cancer_data.csv')  # Dataset dari UCI
print("✅ Data berhasil dimuat")

# 3. Eksplorasi Singkat
print(f"Jumlah sampel: {data.shape[0]}, Jumlah fitur: {data.shape[1]}")
print(data['diagnosis'].value_counts())

# Visualisasi distribusi diagnosis
sns.countplot(x='diagnosis', data=data)
plt.title("Distribusi Diagnosis (M: Malignant, B: Benign)")
plt.show()

# 4. Preprocessing
data = data.drop(['id', 'Unnamed: 32'], axis=1, errors='ignore')
data['diagnosis'] = data['diagnosis'].map({'M': 1, 'B': 0})

X = data.drop('diagnosis', axis=1)
y = data['diagnosis']

# Normalisasi fitur
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split data latih dan uji
X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y, test_size=0.2, random_state=42
)

# 5. Melatih Model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)
print("✅ Model selesai dilatih")

# 6. Evaluasi Model
y_pred = model.predict(X_test)

acc = accuracy_score(y_test, y_pred)
print(f"\n🎯 Akurasi: {acc * 100:.2f}%\n")
print("📋 Classification Report:\n", classification_report(y_test, y_pred))

# Confusion Matrix
cm = confusion_matrix(y_test, y_pred)
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
            xticklabels=['Benign', 'Malignant'],
            yticklabels=['Benign', 'Malignant'])
plt.xlabel('Predicted')
plt.ylabel('True')
plt.title('Confusion Matrix')
plt.show()

# 7. Simpan Model (opsional)
joblib.dump(model, 'model_rf.pkl')
print("💾 Model disimpan sebagai 'model_rf.pkl'")
