<a href="https://colab.research.google.com/github/zaviruuu/Naga--ML-Based-Snake-Identifier-for-Sri-Lanka-/blob/snake_identification_model/Snake_Identification_Model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

##NĀGA - SNAKE IDENTIFICATION MODEL

In [None]:
from google.colab import drive
drive.mount("/content/drive")

In [None]:
SRC_DIR = "/content/drive/MyDrive/DSGP_Group_32/NĀGA/Snake Identification Model/Dataset"
OUT_DIR = "/content/drive/MyDrive/DSGP_Group_32/NĀGA/Snake Identification Model/Output"

In [None]:
#Split ratios (simple + standard)
TRAIN_RATIO = 0.70
VAL_RATIO   = 0.15
TEST_RATIO  = 0.15

#Image settings
IMG_SIZE = (224, 224)
BATCH_SIZE = 16
EPOCHS = 15
SEED = 42

In [None]:
##EDA
#Class distribution
from pathlib import Path
import matplotlib.pyplot as plt

DATA_DIR = "/content/drive/MyDrive/DSGP_Group_32/NĀGA/Snake Identification Model/Dataset"

classes = sorted([d.name for d in Path(DATA_DIR).iterdir() if d.is_dir()])
counts = []
for c in classes:
    counts.append(len([p for p in Path(DATA_DIR, c).glob("*") if p.suffix.lower() in [".jpg",".jpeg",".png"]]))

plt.figure()
plt.bar(classes, counts)
plt.xticks(rotation=45, ha="right")
plt.title("Class Distribution (Images per Class)")
plt.xlabel("Class")
plt.ylabel("Count")
plt.tight_layout()
plt.show()

In [None]:
#Sample images
import random
import matplotlib.pyplot as plt
from PIL import Image

def show_samples(data_dir, classes, n_per_class=3):
    plt.figure(figsize=(n_per_class*3, len(classes)*3))
    k = 1
    for c in classes:
        imgs = [p for p in Path(data_dir, c).glob("*") if p.suffix.lower() in [".jpg",".jpeg",".png"]]
        pick = random.sample(imgs, min(n_per_class, len(imgs)))
        for p in pick:
            img = Image.open(p).convert("RGB")
            plt.subplot(len(classes), n_per_class, k)
            plt.imshow(img)
            plt.axis("off")
            plt.title(c)
            k += 1
    plt.tight_layout()
    plt.show()

show_samples(DATA_DIR, classes, n_per_class=3)

In [None]:
#Brightness distribution
import numpy as np
from PIL import Image

all_imgs = []
for c in classes:
    all_imgs += [p for p in Path(DATA_DIR, c).glob("*") if p.suffix.lower() in [".jpg",".jpeg",".png"]]

sample = random.sample(all_imgs, min(400, len(all_imgs)))

brightness = []
for p in sample:
    img = Image.open(p).convert("L")
    brightness.append(np.array(img).mean())

plt.figure()
plt.hist(brightness, bins=30)
plt.title("Brightness Distribution (Sample)")
plt.xlabel("Mean brightness (0–255)")
plt.ylabel("Count")
plt.tight_layout()
plt.show()

In [None]:
##Split train/val/test
import random, shutil, os
from pathlib import Path

random.seed(SEED)

#Detect classes (folder names)
classes = [d.name for d in Path(SRC_DIR).iterdir() if d.is_dir()]
print("Classes found:", classes)

#Create output folder structure
for split in ["train", "val", "test"]:
    for c in classes:
        Path(f"{OUT_DIR}/{split}/{c}").mkdir(parents=True, exist_ok=True)

IMG_EXTS = {".jpg"}

#Copy files into splits
for c in classes:
    files = [f for f in Path(f"{SRC_DIR}/{c}").glob("*") if f.suffix.lower() in IMG_EXTS]
    random.shuffle(files)

    n = len(files)
    n_train = int(n * TRAIN_RATIO)
    n_val   = int(n * VAL_RATIO)

    train_files = files[:n_train]
    val_files   = files[n_train:n_train + n_val]
    test_files  = files[n_train + n_val:]

    for f in train_files:
        shutil.copy2(f, f"{OUT_DIR}/train/{c}/{f.name}")
    for f in val_files:
        shutil.copy2(f, f"{OUT_DIR}/val/{c}/{f.name}")
    for f in test_files:
        shutil.copy2(f, f"{OUT_DIR}/test/{c}/{f.name}")

    print(f"{c}: total={n} | train={len(train_files)} | val={len(val_files)} | test={len(test_files)}")

print("\nSplit complete ->", OUT_DIR)

In [None]:
#Load data(with simple preprocessing)
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator

#Train: rescale + augmentation
train_gen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=10,
    zoom_range=0.1,
    horizontal_flip=True)

#Val/Test: onlyrescale(no augmentation)
val_gen = ImageDataGenerator(rescale=1./255)
test_gen = ImageDataGenerator(rescale=1./255)

In [None]:
train_data = train_gen.flow_from_directory(
    f"{OUT_DIR}/train",
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode="categorical",
    seed=SEED
)

val_data = val_gen.flow_from_directory(
    f"{OUT_DIR}/val",
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode="categorical",
    seed=SEED
)

test_data = test_gen.flow_from_directory(
    f"{OUT_DIR}/test",
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode="categorical",
    shuffle=False
)

print("\nDataset summary")
print(f"Train set      : {train_data.samples} images | {train_data.num_classes} classes")
print(f"Validation set : {val_data.samples} images | {val_data.num_classes} classes")
print(f"Test set       : {test_data.samples} images | {test_data.num_classes} classes")

print("\nLabel mapping (class indices):", train_data.class_indices)

In [None]:
##Class weights(helps imbalance)
import numpy as np
from sklearn.utils.class_weight import compute_class_weight

y_train = train_data.classes
weights = compute_class_weight(class_weight="balanced", classes=np.unique(y_train), y=y_train)
class_weight = {i: w for i, w in enumerate(weights)}
print("Class weights:", class_weight)

In [None]:
##Build model(MobileNetV2 Transfer Learning)
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras import layers, models

num_classes = train_data.num_classes

base_model = MobileNetV2(
    input_shape=(224, 224, 3),
    include_top=False,
    weights="imagenet"
)
base_model.trainable = False

model = models.Sequential([
    base_model,
    layers.GlobalAveragePooling2D(),
    layers.Dense(num_classes, activation="softmax")
])

model.compile(
    optimizer="adam",
    loss="categorical_crossentropy",
    metrics=["accuracy"]
)

model.summary()

In [None]:
##Train
history = model.fit(
    train_data,
    validation_data=val_data,
    epochs=EPOCHS,
    class_weight=class_weight
)

In [None]:
#Evaluate on test set
test_loss, test_acc = model.evaluate(test_data)
print(f"\nTesting Accuracy: {test_acc:.4f}")

In [None]:
##Confusion matrix
from sklearn.metrics import confusion_matrix, classification_report

pred_probs = model.predict(test_data)
y_pred = np.argmax(pred_probs, axis=1)
y_true = test_data.classes

cm = confusion_matrix(y_true, y_pred)
print("\nConfusion Matrix:\n", cm)

In [None]:
#Confusion matrix (counts)
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix

cm = confusion_matrix(y_true, y_pred)

plt.figure()
plt.imshow(cm)
plt.title("Confusion Matrix (Counts)")
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.xticks(ticks=np.arange(len(labels_sorted)), labels=labels_sorted, rotation=45, ha="right")
plt.yticks(ticks=np.arange(len(labels_sorted)), labels=labels_sorted)

for i in range(cm.shape[0]):
    for j in range(cm.shape[1]):
        plt.text(j, i, cm[i, j], ha="center", va="center")

plt.tight_layout()
plt.show()

labels_sorted = [None] * len(test_data.class_indices)
for name, idx in test_data.class_indices.items():
    labels_sorted[idx] = name

In [None]:
#Confusion matrix (normalized)
cm_norm = cm.astype("float") / cm.sum(axis=1, keepdims=True)
cm_norm = np.nan_to_num(cm_norm)  # avoid NaN if a row is empty

plt.figure()
plt.imshow(cm_norm, vmin=0, vmax=1)
plt.title("Confusion Matrix (Normalized)")
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.xticks(ticks=np.arange(len(labels_sorted)), labels=labels_sorted, rotation=45, ha="right")
plt.yticks(ticks=np.arange(len(labels_sorted)), labels=labels_sorted)

for i in range(cm_norm.shape[0]):
    for j in range(cm_norm.shape[1]):
        plt.text(j, i, f"{cm_norm[i, j]*100:.0f}%", ha="center", va="center")

plt.tight_layout()
plt.show()

In [None]:
##Accuracy/Lose curves
import matplotlib.pyplot as plt

#Accuracy curve
plt.figure()
plt.plot(history.history["accuracy"], label="Train Accuracy")
plt.plot(history.history["val_accuracy"], label="Validation Accuracy")
plt.title("Training vs Validation Accuracy")
plt.xlabel("Epoch")
plt.ylabel("Accuracy")
plt.legend()
plt.tight_layout()
plt.show()

#Loss curve
plt.figure()
plt.plot(history.history["loss"], label="Train Loss")
plt.plot(history.history["val_loss"], label="Validation Loss")
plt.title("Training vs Validation Loss")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.legend()
plt.tight_layout()
plt.show()

In [None]:
##Classification report
from sklearn.metrics import classification_report

print("\nClassification Report:\n")
print(classification_report(y_true, y_pred, target_names=labels_sorted))

In [None]:
#Per-class F1 score
report = classification_report(y_true, y_pred, target_names=labels_sorted, output_dict=True)
f1_scores = [report[c]["f1-score"] for c in labels_sorted]

plt.figure()
plt.bar(labels_sorted, f1_scores)
plt.title("Per-Class F1 Score")
plt.xlabel("Class")
plt.ylabel("F1")
plt.xticks(rotation=45, ha="right")
plt.ylim(0, 1)
plt.tight_layout()
plt.show()

In [None]:
#ROC-AUC curves
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import label_binarize
from sklearn.metrics import roc_curve, auc

K = len(labels_sorted)

y_true_bin = label_binarize(y_true, classes=list(range(K)))

plt.figure()
for i in range(K):
    fpr, tpr, _ = roc_curve(y_true_bin[:, i], pred_probs[:, i])
    roc_auc = auc(fpr, tpr)
    plt.plot(fpr, tpr, label=f"{labels_sorted[i]} (AUC={roc_auc:.2f})")

plt.title("ROC Curves (One-vs-Rest)")
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.legend(fontsize=8)
plt.tight_layout()
plt.show()