In [1]:
# Import library
import os
import numpy as np
import pandas as pd
import cv2
from PIL import Image
from skimage.feature import local_binary_pattern, graycomatrix, graycoprops
from sklearn.model_selection import train_test_split, GridSearchCV, StratifiedKFold
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, roc_curve, auc
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
import seaborn as sns
from google.colab import drive
import zipfile

In [2]:
# Set random seed untuk reproduktibilitas
np.random.seed(42)

In [3]:
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


In [4]:
dataset_path = '/content/drive/MyDrive/dataset/data_udang.zip'
extract_to = '/content/dataset'

if not os.path.exists(extract_to):
    os.makedirs(extract_to)
    with zipfile.ZipFile(dataset_path, 'r') as zip_ref:
        zip_ref.extractall(extract_to)
    print("Dataset berhasil diekstrak")
else:
    print("Folder dataset sudah ada")

Dataset berhasil diekstrak


In [5]:
# Create Target Folder
!mkdir -p /content/dataset/raw_images_only

# Extract only the "Raw Images" folder
!unzip -o "/content/drive/MyDrive/dataset/data_udang.zip" \
"ShrimpDiseaseImageBD An Image Dataset for Computer Vision-Based Detection of Shrimp Diseases in Bangladesh/Root/Raw Images/Raw Images/*" \
-d /content/dataset/raw_images_only

Archive:  /content/drive/MyDrive/dataset/data_udang.zip
caution: filename not matched:  ShrimpDiseaseImageBD An Image Dataset for Computer Vision-Based Detection of Shrimp Diseases in Bangladesh/Root/Raw Images/Raw Images/*


In [6]:
base_path = '/content/dataset/data_udang'

# Verifikasi path
if not os.path.exists(base_path):
    print("\nStruktur folder yang ditemukan:")
    !find /content/dataset -maxdepth 4 -type d
    raise ValueError("Path tidak valid. Sesuaikan dengan struktur di atas")

In [7]:
# Load Dataset
base_path = '/content/dataset/data_udang'
folders = ['1. Healthy', '2. BG', '3. WSSV', '4. WSSV_BG']
label_map = {'1. Healthy': 0, '2. BG': 1, '3. WSSV': 1, '4. WSSV_BG': 1}  # 0=sehat, 1=sakit

In [8]:
def load_images():
    images = []
    labels = []
    filenames = []
    for folder in folders:
        folder_path = os.path.join(base_path, folder)
        for img_file in os.listdir(folder_path):
            img_path = os.path.join(folder_path, img_file)
            try:
                img = Image.open(img_path).convert('RGB').resize((128, 128))
                images.append(np.array(img))
                labels.append(label_map[folder])
                filenames.append(img_file)
            except Exception as e:
                print(f"Error loading {img_path}: {str(e)}")
                continue
    return np.array(images), np.array(labels), filenames

X, y, filenames = load_images()
print(f"Total gambar: {len(X)}")
print(f"Distribusi kelas: Sehat={sum(y==0)}, Sakit={sum(y==1)}")

Total gambar: 1149
Distribusi kelas: Sehat=403, Sakit=746


## Ekstrasi Fitur (LBP+GLCM)

In [None]:
def extract_features(images):
    lbp_features = []
    glcm_features = []
    radius = 3
    n_points = 8 * radius
    METHOD = 'uniform'

    for img in images:
        gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)

        # LBP
        lbp = local_binary_pattern(gray, n_points, radius, METHOD)
        hist, _ = np.histogram(lbp, bins=np.arange(0, n_points + 3), range=(0, n_points + 2))
        hist = hist.astype("float")
        hist /= (hist.sum() + 1e-6)
        lbp_features.append(hist)

        # GLCM
        glcm = graycomatrix(gray, distances=[1], angles=[0], levels=256, symmetric=True, normed=True)
        glcm_props = []
        glcm_props.extend(graycoprops(glcm, 'contrast').ravel())
        glcm_props.extend(graycoprops(glcm, 'correlation').ravel())
        glcm_props.extend(graycoprops(glcm, 'energy').ravel())
        glcm_props.extend(graycoprops(glcm, 'homogeneity').ravel())
        glcm_features.append(glcm_props)

    return np.hstack([np.array(lbp_features), np.array(glcm_features)])

print("Ekstraksi fitur LBP+GLCM...")
features = extract_features(X)
print("Shape fitur:", features.shape)

Ekstraksi fitur LBP+GLCM...


In [None]:
# Preprocessing
scaler = StandardScaler()
features_scaled = scaler.fit_transform(features)

n_components = min(50, features.shape[1]-1)
pca = PCA(n_components=n_components, random_state=42)
features_pca = pca.fit_transform(features_scaled)
print(f"\nMenggunakan {n_components} komponen PCA")

## SVM

In [None]:
# Split data untuk GridSearchCV
X_train, X_test, y_train, y_test, filenames_train, filenames_test, train_indices, test_indices = train_test_split(
    features_pca, y, filenames, range(len(X)), test_size=0.2, stratify=y, random_state=42)

In [None]:
# Parameter Grid untuk SVM
param_grid_svm = {
    'C': [0.1, 1, 10, 100],
    'gamma': [0.0001, 0.001, 0.01, 0.1],
    'kernel': ['rbf', 'linear']
}

In [None]:
# Inisialisasi GridSearchCV SVM
svm_model = SVC(probability=True, random_state=42)
grid_svm = GridSearchCV(svm_model, param_grid_svm, cv=5, scoring='accuracy', n_jobs=-1, verbose=1)

In [None]:
# Training SVM
print("\n=== Training SVM ===")
grid_svm.fit(X_train, y_train)
best_svm = grid_svm.best_estimator_

In [None]:
# Prediksi
y_pred_svm = best_svm.predict(X_test)

In [None]:
# Akurasi
acc_svm = accuracy_score(y_test, y_pred_svm)
print("\n=== Hasil Evaluasi ===")
print(f"Akurasi SVM: {acc_svm:.4f}")

In [None]:
# Classification Report
print("\nClassification Report (SVM):")
print(classification_report(y_test, y_pred_svm, target_names=['Sehat', 'Sakit']))

In [None]:
# Confusion Matrix SVM
plt.figure(figsize=(6, 4))
cm_svm = confusion_matrix(y_test, y_pred_svm)
sns.heatmap(cm_svm, annot=True, fmt='d', cmap='Oranges',
            xticklabels=['Sehat', 'Sakit'],
            yticklabels=['Sehat', 'Sakit'])
plt.title('Confusion Matrix SVM', fontsize=14)

plt.tight_layout()
plt.show()

## KNN


In [None]:
# Parameter Grid untuk KNN
param_grid_knn = {
    'n_neighbors': [3, 5, 7, 9, 11],
    'weights': ['uniform', 'distance'],
    'metric': ['euclidean', 'manhattan']
}

In [None]:
# Inisialisasi GridSearchCV KNN
knn = KNeighborsClassifier()
grid_knn = GridSearchCV(knn, param_grid_knn, cv=5, scoring='accuracy', n_jobs=-1, verbose=1)

In [None]:
# Training KNN
print("\n=== Training KNN ===")
grid_knn.fit(X_train, y_train)
best_knn = grid_knn.best_estimator_

In [None]:
# Prediksi
y_pred_knn = best_knn.predict(X_test)

In [None]:
# Akurasi
acc_knn = accuracy_score(y_test, y_pred_knn)
print("\n=== Hasil Evaluasi ===")
print(f"Akurasi KNN: {acc_knn:.4f}")

In [None]:
# Classification Report
print("\nClassification Report (KNN):")
print(classification_report(y_test, y_pred_knn, target_names=['Sehat', 'Sakit']))

In [None]:
# Confusion Matrix
plt.figure(figsize=(6, 4))
cm_knn = confusion_matrix(y_test, y_pred_knn)
sns.heatmap(cm_knn, annot=True, fmt='d', cmap='Blues',
            xticklabels=['Healthy', 'Diseased'],
            yticklabels=['Healthy', 'Diseased'])
plt.title('KNN Confusion Matrix')
plt.show()

## ROC Curve

In [None]:
# Probabilitas Prediksi
y_proba_knn = best_knn.predict_proba(X_test)[:, 1]
y_proba_svm = best_svm.predict_proba(X_test)[:, 1]

In [None]:
# ROC Curve
fpr_knn, tpr_knn, _ = roc_curve(y_test, y_proba_knn)
fpr_svm, tpr_svm, _ = roc_curve(y_test, y_proba_svm)
roc_auc_knn = auc(fpr_knn, tpr_knn)
roc_auc_svm = auc(fpr_svm, tpr_svm)

plt.figure(figsize=(8, 6))
plt.plot(fpr_knn, tpr_knn, label=f'KNN (AUC = {roc_auc_knn:.2f})', color='blue')
plt.plot(fpr_svm, tpr_svm, label=f'SVM (AUC = {roc_auc_svm:.2f})', color='red')
plt.plot([0, 1], [0, 1], 'k--')
plt.xlabel('False Positive Rate', fontsize=12)
plt.ylabel('True Positive Rate', fontsize=12)
plt.title('ROC Curve: KNN vs SVM', fontsize=14)
plt.legend(fontsize=12)
plt.grid(True, alpha=0.3)
plt.show()

In [None]:
# Perbandingan Akurasi
plt.figure(figsize=(6, 5))
plt.bar(['KNN', 'SVM'], [acc_knn, acc_svm], color=['blue', 'red'])
plt.ylim(0, 1)
plt.ylabel('Accuracy', fontsize=12)
plt.title('Perbandingan Akurasi KNN vs SVM', fontsize=14)
for i, v in enumerate([acc_knn, acc_svm]):
    plt.text(i, v + 0.02, f"{v:.4f}", ha='center', fontsize=12)
plt.show()

## Visualisasi

In [None]:
from matplotlib.patches import Rectangle

def visualize_svm_predictions(images, test_indices, y_true, y_pred, n_samples=5):
    """Visualisasi khusus untuk prediksi SVM"""
    y_true = np.array(y_true).flatten()
    y_pred = np.array(y_pred).flatten()
    test_images = images[test_indices]

    correct_idx = np.where(y_pred == y_true)[0]
    incorrect_idx = np.where(y_pred != y_true)[0]

    # Fungsi konversi label
    def get_label(val):
        return 'Sehat' if val == 0 else 'Sakit'

    # --- Prediksi Benar SVM ---
    if len(correct_idx) > 0:
        plt.figure(figsize=(20, 5))
        plt.suptitle('CONTOH PREDIKSI BENAR (SVM)', fontsize=18, y=1.05, fontweight='bold')

        for i in range(min(n_samples, len(correct_idx))):
            ax = plt.subplot(1, n_samples, i+1)
            img = test_images[correct_idx[i]]

            if img.ndim == 2:
                img = np.stack((img,)*3, axis=-1)

            ax.imshow(img)
            # Border hijau tebal
            ax.add_patch(Rectangle((0,0), img.shape[1], img.shape[0],
                         linewidth=12, edgecolor='#2ecc71', facecolor='none'))

            plt.title(f"AKTUAL: {get_label(y_true[correct_idx[i]])}\nPREDIKSI: {get_label(y_pred[correct_idx[i]])}",
                     fontsize=12, pad=12)
            plt.axis('off')

        plt.tight_layout()
        plt.show()

    # --- Prediksi Salah SVM ---
    if len(incorrect_idx) > 0:
        plt.figure(figsize=(20, 5))
        plt.suptitle('CONTOH PREDIKSI SALAH (SVM)', fontsize=18, y=1.05, fontweight='bold', color='red')

        for i in range(min(n_samples, len(incorrect_idx))):
            ax = plt.subplot(1, n_samples, i+1)
            img = test_images[incorrect_idx[i]]

            if img.ndim == 2:
                img = np.stack((img,)*3, axis=-1)

            ax.imshow(img)
            # Border merah tebal
            ax.add_patch(Rectangle((0,0), img.shape[1], img.shape[0],
                         linewidth=12, edgecolor='#e74c3c', facecolor='none'))

            plt.title(f"AKTUAL: {get_label(y_true[incorrect_idx[i]])}\nPREDIKSI: {get_label(y_pred[incorrect_idx[i]])}",
                     fontsize=12, pad=12, color='red')
            plt.axis('off')

        plt.tight_layout()
        plt.show()
    else:
        print("\nSVM: Tidak ada prediksi salah pada data test!")

# Panggil fungsi visualisasi SVM
print("\n" + "="*70)
print("VISUALISASI HASIL PREDIKSI SVM".center(70))
print("="*70)
visualize_svm_predictions(X, test_indices, y_test, y_pred_svm)

In [None]:
from matplotlib.patches import Rectangle

def visualize_knn_predictions(images, test_indices, y_true, y_pred, n_samples=5):
    """Visualisasi khusus untuk prediksi KNN"""
    y_true = np.array(y_true).flatten()
    y_pred = np.array(y_pred).flatten()
    test_images = images[test_indices]

    correct_idx = np.where(y_pred == y_true)[0]
    incorrect_idx = np.where(y_pred != y_true)[0]

    # Fungsi konversi label
    def get_label(val):
        return 'Sehat' if val == 0 else 'Sakit'

    # --- Prediksi Benar KNN ---
    if len(correct_idx) > 0:
        plt.figure(figsize=(20, 5))
        plt.suptitle('CONTOH PREDIKSI BENAR (KNN)', fontsize=18, y=1.05, fontweight='bold')

        for i in range(min(n_samples, len(correct_idx))):
            ax = plt.subplot(1, n_samples, i+1)
            img = test_images[correct_idx[i]]

            if img.ndim == 2:
                img = np.stack((img,)*3, axis=-1)

            ax.imshow(img)
            # Border biru tebal
            ax.add_patch(Rectangle((0,0), img.shape[1], img.shape[0],
                         linewidth=12, edgecolor='#3498db', facecolor='none'))

            plt.title(f"AKTUAL: {get_label(y_true[correct_idx[i]])}\nPREDIKSI: {get_label(y_pred[correct_idx[i]])}",
                     fontsize=12, pad=12)
            plt.axis('off')

        plt.tight_layout()
        plt.show()

    # --- Prediksi Salah KNN ---
    if len(incorrect_idx) > 0:
        plt.figure(figsize=(20, 5))
        plt.suptitle('CONTOH PREDIKSI SALAH (KNN)', fontsize=18, y=1.05, fontweight='bold', color='red')

        for i in range(min(n_samples, len(incorrect_idx))):
            ax = plt.subplot(1, n_samples, i+1)
            img = test_images[incorrect_idx[i]]

            if img.ndim == 2:
                img = np.stack((img,)*3, axis=-1)

            ax.imshow(img)
            # Border oranye tebal
            ax.add_patch(Rectangle((0,0), img.shape[1], img.shape[0],
                         linewidth=12, edgecolor='#f39c12', facecolor='none'))

            plt.title(f"AKTUAL: {get_label(y_true[incorrect_idx[i]])}\nPREDIKSI: {get_label(y_pred[incorrect_idx[i]])}",
                     fontsize=12, pad=12, color='red')
            plt.axis('off')

        plt.tight_layout()
        plt.show()
    else:
        print("\nKNN: Tidak ada prediksi salah pada data test!")

# Panggil fungsi visualisasi KNN
print("\n" + "="*70)
print("VISUALISASI HASIL PREDIKSI KNN".center(70))
print("="*70)
visualize_knn_predictions(X, test_indices, y_test, y_pred_knn)

## Hasil Deteksi

In [None]:
# Gabungkan hasil prediksi KNN & SVM
results_df = pd.DataFrame({
    'Filename': filenames_test,
    'True_Label': y_test,
    'KNN_Prediction': y_pred_knn,
    'SVM_Prediction': y_pred_svm,
    'KNN_Probability_Sakit': y_proba_knn,
    'SVM_Probability_Sakit': y_proba_svm
})

# Mapping label ke keterangan
results_df['True_Class'] = results_df['True_Label'].map({0: 'Sehat', 1: 'Sakit'})
results_df['KNN_Pred_Class'] = results_df['KNN_Prediction'].map({0: 'Sehat', 1: 'Sakit'})
results_df['SVM_Pred_Class'] = results_df['SVM_Prediction'].map({0: 'Sehat', 1: 'Sakit'})

# Simpan ke CSV
results_df.to_csv('hasil_prediksi_udang.csv', index=False)
print("Hasil prediksi disimpan di 'hasil_prediksi_udang.csv'")

In [None]:
# Tampilkan 5 baris pertama
print("\nContoh hasil prediksi:")
print(results_df.head())