In [8]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

In [None]:
data = pd.read_csv("dataset/plant_health_data.csv")

FileNotFoundError: [Errno 2] No such file or directory: 'plant_health_data.csv'

In [None]:
print("=== 5 Data Teratas ===")
print(data.head())

In [None]:
print("=== 5 Data Terakhir ===")
print(data.tail())

In [None]:
print("\n=== Informasi Dataset ===")
print(data.info())

In [None]:
print("\n=== Statistik Deskriptif ===")
print(data.describe())

In [None]:
# Encode kolom target (Plant_Health_Status)
label_encoder = LabelEncoder()
data['Plant_Health_Status'] = label_encoder.fit_transform(data['Plant_Health_Status'])

In [None]:
label_mapping = dict(zip(label_encoder.classes_, label_encoder.transform(label_encoder.classes_)))
print("Mapping Label â†’ Angka:")
print(label_mapping)

In [None]:
print("\n=== Jumlah Data per Kelas ===")
print(data['Plant_Health_Status'].value_counts())

In [None]:
# ðŸ”¹ Korelasi antar fitur numerik
plt.figure(figsize=(12, 10))
sns.heatmap(data.corr(numeric_only=True), annot=True, cmap='coolwarm')
plt.title('Korelasi Antar Fitur')
plt.show()

In [None]:
# ðŸ”¹ daftar fitur yang akan dianalisis
selected_features = [
    'Soil_Moisture',
    'Ambient_Temperature',
    'Soil_Temperature',
    'Humidity',
    'Light_Intensity',
    'Soil_pH',
    'Nitrogen_Level',
    'Phosphorus_Level',
    'Potassium_Level',
    'Chlorophyll_Content',
    'Electrochemical_Signal'
]

# ðŸ”¹ melt data menjadi format long untuk boxplot gabungan
data_melted = data.melt(
    id_vars='Plant_Health_Status',
    value_vars=selected_features,
    var_name='Fitur',
    value_name='Nilai'
)

# ðŸ”¹ plot boxplot gabungan
plt.figure(figsize=(12, 7))
sns.boxplot(x='Fitur', y='Nilai', hue='Plant_Health_Status', data=data_melted, palette='Set3')

plt.title('Distribusi Nilai Fitur Terpilih Berdasarkan Plant Health Status', fontsize=14)
plt.xticks(rotation=30)
plt.xlabel("Fitur")
plt.ylabel("Nilai")
plt.legend(title="Plant Health Status")
plt.tight_layout()
plt.show()

In [None]:
print("\n=== Mengecek Missing Values ===")
print(data.isnull().sum())

In [None]:
from sklearn.svm import SVC

# ðŸ”¹ Pilih fitur dan label
selected_features = [
    'Soil_Moisture',
    'Ambient_Temperature',
    'Soil_Temperature',
    'Humidity',
    'Light_Intensity',
    'Soil_pH',
    'Nitrogen_Level',
    'Phosphorus_Level',
    'Potassium_Level',
    'Chlorophyll_Content',
    'Electrochemical_Signal'
]

X = data[selected_features]
y = data['Plant_Health_Status']


# ðŸ”¹ Split data (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(
    X, y,
    test_size=0.2,
    random_state=42,
    stratify=y   # menjaga proporsi kelas
)

# ðŸ”¹ Standarisasi fitur
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# ðŸ”¹ Train SVM
model_svm = SVC(kernel='rbf', C=1, gamma='scale', probability=True, random_state=42)
model_svm.fit(X_train, y_train)

# ðŸ”¹ Klasifikasi
y_pred_svm = model_svm.predict(X_test)

# ðŸ”¹ Evaluasi
print("\n=== Support Vector Machine (SVM) ===")
print("Akurasi:", accuracy_score(y_test, y_pred_svm))
print("\nClassification Report:\n")
print(classification_report(y_test, y_pred_svm))

In [None]:
# ðŸ”¹ Confusion Matrix (Heatmap)
cm = confusion_matrix(y_test, y_pred_svm)

plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues",
            xticklabels=model_svm.classes_,
            yticklabels=model_svm.classes_)

plt.title("Confusion Matrix - SVM")
plt.xlabel("Prediksi")
plt.ylabel("Aktual")
plt.show()

In [None]:
# ðŸ”¹ Barplot Jumlah Prediksi vs Aktual per Kelas
plt.figure(figsize=(8,5))
unique_classes = model_svm.classes_
actual_counts = [sum(y_test == c) for c in unique_classes]
pred_counts = [sum(y_pred_svm == c) for c in unique_classes]

x = np.arange(len(unique_classes))
width = 0.35

plt.bar(x - width/2, actual_counts, width, label='Aktual')
plt.bar(x + width/2, pred_counts, width, label='Prediksi')

plt.xticks(x, unique_classes, rotation=45)
plt.ylabel("Jumlah Sampel")
plt.title("Perbandingan Jumlah Prediksi vs Aktual (SVM)")
plt.legend()
plt.tight_layout()
plt.show()

In [None]:
# ðŸ”¹ Ambil classification report
report = classification_report(y_test, y_pred_svm, output_dict=True)
df_report = pd.DataFrame(report).transpose().iloc[:-3]  # hapus avg/total

plt.figure(figsize=(15, 4))

# --- Precision ---
plt.subplot(1, 3, 1)
sns.barplot(x=df_report.index, y=df_report['precision'])
plt.xticks(rotation=45)
plt.ylabel("Precision")
plt.title("Precision per Kelas - SVM")

# --- Recall ---
plt.subplot(1, 3, 2)
sns.barplot(x=df_report.index, y=df_report['recall'])
plt.xticks(rotation=45)
plt.ylabel("Recall")
plt.title("Recall per Kelas - SVM")

# --- F1-Score ---
plt.subplot(1, 3, 3)
sns.barplot(x=df_report.index, y=df_report['f1-score'])
plt.xticks(rotation=45)
plt.ylabel("F1-Score")
plt.title("F1-Score per Kelas - SVM")

plt.tight_layout()
plt.show()

In [None]:
# Fungsi Klasifikasi Plant Health Status (SVM)
def predict_plant_health(input_dict):
    # Convert input ke DataFrame
    df_input = pd.DataFrame([input_dict])

    # Urutkan sesuai fitur training
    df_input = df_input[selected_features]

    # Standarisasi input
    df_scaled = scaler.transform(df_input)

    # Klasifikai kelas (hasil: 0, 1, atau 2)
    pred = model_svm.predict(df_scaled)[0]

    # Probabilitas klasifikasi
    proba = model_svm.predict_proba(df_scaled)[0]

    # Probabilitas kelas klasifikasi
    pred_probability = proba[pred]

    # Mapping angka â†’ label asli
    label_mapping = {v: k for k, v in zip(label_encoder.classes_, label_encoder.transform(label_encoder.classes_))}

    # Final label (ubah angka ke text)
    final_label = label_mapping.get(pred, pred)

    # Output hasil
    result = {
        "Plant_Health_Status": final_label,
        "Probability": f"{pred_probability * 100:.2f}%"
    }

    return result


# Test Case

test_case= {
    "Soil_Moisture": 45,
    "Ambient_Temperature": 28,
    "Soil_Temperature": 25,
    "Humidity": 70,
    "Light_Intensity": 300,
    "Soil_pH": 6.8,
    "Nitrogen_Level": 40,
    "Phosphorus_Level": 18,
    "Potassium_Level": 20,
    "Chlorophyll_Content": 38,
    "Electrochemical_Signal": 0.85
}

print(predict_plant_health(test_case))

In [None]:
import joblib
nama_file = 'Klasifikasi_Kesehatan_Tanaman.pkl'

joblib.dump(model_svm, "model.pkl")
joblib.dump(scaler, "scaler.pkl")

print("Model dan scaler berhasil disimpan")