<a href="https://colab.research.google.com/github/zaviruuu/Naga--ML-Based-Snake-Identifier-for-Sri-Lanka-/blob/input_validation_model/Input_Validation_Model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Input Validation Model


In [None]:
import os, shutil, random
from pathlib import Path
import numpy as np
import tensorflow as tf
from google.colab import drive

drive.mount('/content/drive')
print("TensorFlow:", tf.__version__)


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
TensorFlow: 2.19.0


In [None]:
INPUT_DIR  = "/content/drive/MyDrive/DSGP_Group_32/NĀGA/Input Validation Model/Dataset"
OUTPUT_DIR = "/content/drive/MyDrive/DSGP_Group_32/NĀGA/Input Validation Model/Output"

IMG_SIZE = (224, 224)
BATCH = 32
SEED = 42

print("INPUT exists :", os.path.exists(INPUT_DIR))
print("valid exists :", os.path.exists(f"{INPUT_DIR}/valid"))
print("invalid exists:", os.path.exists(f"{INPUT_DIR}/invalid"))


INPUT exists : True
valid exists : False
invalid exists: False


In [None]:
from collections import Counter

def scan_exts(folder):
    exts = []
    for root, _, files in os.walk(folder):
        for f in files:
            exts.append(os.path.splitext(f)[1].lower())
    return Counter(exts)

print("Valid exts:", scan_exts(f"{INPUT_DIR}/Valid"))
print("Invalid exts:", scan_exts(f"{INPUT_DIR}/Invalid"))


Valid exts: Counter({'.jpg': 299})
Invalid exts: Counter({'.jpg': 300})


In [None]:
from PIL import Image

random.seed(SEED)

CLASS_MAP = {
    "valid": "Valid",
    "invalid": "Invalid"
}

def make_clean_dir(path):
    path = Path(path)
    if path.exists():
        shutil.rmtree(path)
    path.mkdir(parents=True, exist_ok=True)

def list_all_images(folder):
    exts = {".jpg",".jpeg",".png",".webp",".bmp",".gif",".tif",".tiff"}
    return [p for p in Path(folder).rglob("*") if p.is_file() and p.suffix.lower() in exts]

def save_as_jpg(src, dst):
    img = Image.open(src).convert("RGB")
    img.save(dst, format="JPEG", quality=95)

def split_convert_and_save(train_ratio=0.7, val_ratio=0.15, test_ratio=0.15):
    assert abs(train_ratio + val_ratio + test_ratio - 1.0) < 1e-9

    make_clean_dir(OUTPUT_DIR)
    for sp in ["train","val","test"]:
        for cls in ["valid","invalid"]:
            (Path(OUTPUT_DIR)/sp/cls).mkdir(parents=True, exist_ok=True)

    for cls, real_folder in CLASS_MAP.items():
        src_cls = Path(INPUT_DIR) / real_folder
        if not src_cls.exists():
            raise FileNotFoundError(f"Missing folder: {src_cls}")

        files = list_all_images(src_cls)
        if len(files) == 0:
            raise ValueError(f"No images found in: {src_cls}")

        random.shuffle(files)
        n = len(files)
        n_train = int(n * train_ratio)
        n_val   = int(n * val_ratio)

        splits = {
            "train": files[:n_train],
            "val":   files[n_train:n_train+n_val],
            "test":  files[n_train+n_val:]
        }

        for sp, flist in splits.items():
            dst_dir = Path(OUTPUT_DIR) / sp / cls
            for f in flist:
                out = dst_dir / f"{f.stem}.jpg"
                if out.exists():
                    out = dst_dir / f"{f.stem}_{random.randint(1000,9999)}.jpg"
                try:
                    save_as_jpg(f, out)
                except Exception as e:
                    print("Skipping unreadable:", f, "|", e)

        print(f" {cls.upper()} -> total:{n} train:{len(splits['train'])} val:{len(splits['val'])} test:{len(splits['test'])}")

split_convert_and_save()
print(" Output saved to:", OUTPUT_DIR)


 VALID -> total:299 train:209 val:44 test:46
 INVALID -> total:300 train:210 val:45 test:45
 Output saved to: /content/drive/MyDrive/DSGP_Group_32/NĀGA/Input Validation Model/Output


In [None]:
def count_files(p):
    c = 0
    for _, _, files in os.walk(p):
        c += len(files)
    return c

for sp in ["train","val","test"]:
    for cls in ["valid","invalid"]:
        p = f"{OUTPUT_DIR}/{sp}/{cls}"
        print(sp, cls, "files =", count_files(p))


train valid files = 209
train invalid files = 210
val valid files = 44
val invalid files = 45
test valid files = 46
test invalid files = 45


###Load datasets

In [None]:
AUTOTUNE = tf.data.AUTOTUNE

train_ds = tf.keras.utils.image_dataset_from_directory(
    f"{OUTPUT_DIR}/train",
    label_mode="binary",
    image_size=IMG_SIZE,
    batch_size=BATCH,
    shuffle=True,
    seed=SEED
)

val_ds = tf.keras.utils.image_dataset_from_directory(
    f"{OUTPUT_DIR}/val",
    label_mode="binary",
    image_size=IMG_SIZE,
    batch_size=BATCH,
    shuffle=False
)

test_ds = tf.keras.utils.image_dataset_from_directory(
    f"{OUTPUT_DIR}/test",
    label_mode="binary",
    image_size=IMG_SIZE,
    batch_size=BATCH,
    shuffle=False
)


Found 419 files belonging to 2 classes.
Found 89 files belonging to 2 classes.
Found 91 files belonging to 2 classes.


###Preprocess

In [None]:
data_aug = tf.keras.Sequential([
    tf.keras.layers.RandomFlip("horizontal"),
    tf.keras.layers.RandomRotation(0.05),
    tf.keras.layers.RandomZoom(0.1),
])

def prep_train(x,y):
    x = tf.cast(x, tf.float32)/255.0
    x = data_aug(x, training=True)
    return x,y

def prep_eval(x,y):
    x = tf.cast(x, tf.float32)/255.0
    return x,y

train_ds = train_ds.map(prep_train, num_parallel_calls=AUTOTUNE).prefetch(AUTOTUNE)
val_ds   = val_ds.map(prep_eval,  num_parallel_calls=AUTOTUNE).prefetch(AUTOTUNE)
test_ds  = test_ds.map(prep_eval, num_parallel_calls=AUTOTUNE).prefetch(AUTOTUNE)


###Train model

In [None]:
base = tf.keras.applications.MobileNetV2(
    input_shape=(224,224,3),
    include_top=False,
    weights="imagenet"
)
base.trainable = False

model = tf.keras.Sequential([
    base,
    tf.keras.layers.GlobalAveragePooling2D(),
    tf.keras.layers.Dropout(0.3),
    tf.keras.layers.Dense(1, activation="sigmoid")
])

model.compile(
    optimizer=tf.keras.optimizers.Adam(1e-3),
    loss="binary_crossentropy",
    metrics=["accuracy", tf.keras.metrics.AUC(name="auc")]
)

callbacks = [
    tf.keras.callbacks.EarlyStopping(patience=4, restore_best_weights=True),
    tf.keras.callbacks.ModelCheckpoint("best_validity_model.keras", save_best_only=True)
]

model.fit(train_ds, validation_data=val_ds, epochs=15, callbacks=callbacks)


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/mobilenet_v2/mobilenet_v2_weights_tf_dim_ordering_tf_kernels_1.0_224_no_top.h5
[1m9406464/9406464[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Epoch 1/15
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 1s/step - accuracy: 0.5544 - auc: 0.5979 - loss: 0.7189 - val_accuracy: 0.6067 - val_auc: 0.7030 - val_loss: 0.6304
Epoch 2/15
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 1s/step - accuracy: 0.6632 - auc: 0.6939 - loss: 0.6400 - val_accuracy: 0.7191 - val_auc: 0.7854 - val_loss: 0.5659
Epoch 3/15
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 1s/step - accuracy: 0.7679 - auc: 0.8471 - loss: 0.4981 - val_accuracy: 0.7303 - val_auc: 0.7997 - val_loss: 0.5398
Epoch 4/15
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 1s/step - accuracy: 0.7336 - auc: 0.8182 - loss: 0.5259 - val_accuracy: 0.7303 - val_auc: 0.8111 - val_

<keras.src.callbacks.history.History at 0x7a06cec30a70>