<a href="https://colab.research.google.com/github/zaviruuu/Naga--ML-Based-Snake-Identifier-for-Sri-Lanka-/blob/input_validation_model/Input_Validation_Model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Input Validation Model


In [1]:
import os, shutil, random
from pathlib import Path
import numpy as np
import tensorflow as tf
from google.colab import drive

drive.mount('/content/drive')
print("TensorFlow:", tf.__version__)


Mounted at /content/drive
TensorFlow: 2.19.0


In [2]:
INPUT_DIR  = "/content/drive/MyDrive/DSGP_Group_32/NĀGA/Input Validation Model/Dataset"
OUTPUT_DIR = "/content/drive/MyDrive/DSGP_Group_32/NĀGA/Input Validation Model/Output"

IMG_SIZE = (224, 224)
BATCH = 32
SEED = 42

print("INPUT exists :", os.path.exists(INPUT_DIR))
print("valid exists :", os.path.exists(f"{INPUT_DIR}/valid"))
print("invalid exists:", os.path.exists(f"{INPUT_DIR}/invalid"))


INPUT exists : True
valid exists : False
invalid exists: False


In [3]:
from collections import Counter

def scan_exts(folder):
    exts = []
    for root, _, files in os.walk(folder):
        for f in files:
            exts.append(os.path.splitext(f)[1].lower())
    return Counter(exts)

print("Valid exts:", scan_exts(f"{INPUT_DIR}/Valid"))
print("Invalid exts:", scan_exts(f"{INPUT_DIR}/Invalid"))


Valid exts: Counter({'.jpg': 299})
Invalid exts: Counter({'.jpg': 300})


In [4]:
from PIL import Image

random.seed(SEED)

CLASS_MAP = {
    "valid": "Valid",
    "invalid": "Invalid"
}

def make_clean_dir(path):
    path = Path(path)
    if path.exists():
        shutil.rmtree(path)
    path.mkdir(parents=True, exist_ok=True)

def list_all_images(folder):
    exts = {".jpg",".jpeg",".png",".webp",".bmp",".gif",".tif",".tiff"}
    return [p for p in Path(folder).rglob("*") if p.is_file() and p.suffix.lower() in exts]

def save_as_jpg(src, dst):
    img = Image.open(src).convert("RGB")
    img.save(dst, format="JPEG", quality=95)

def split_convert_and_save(train_ratio=0.7, val_ratio=0.15, test_ratio=0.15):
    assert abs(train_ratio + val_ratio + test_ratio - 1.0) < 1e-9

    make_clean_dir(OUTPUT_DIR)
    for sp in ["train","val","test"]:
        for cls in ["valid","invalid"]:
            (Path(OUTPUT_DIR)/sp/cls).mkdir(parents=True, exist_ok=True)

    for cls, real_folder in CLASS_MAP.items():
        src_cls = Path(INPUT_DIR) / real_folder
        if not src_cls.exists():
            raise FileNotFoundError(f"Missing folder: {src_cls}")

        files = list_all_images(src_cls)
        if len(files) == 0:
            raise ValueError(f"No images found in: {src_cls}")

        random.shuffle(files)
        n = len(files)
        n_train = int(n * train_ratio)
        n_val   = int(n * val_ratio)

        splits = {
            "train": files[:n_train],
            "val":   files[n_train:n_train+n_val],
            "test":  files[n_train+n_val:]
        }

        for sp, flist in splits.items():
            dst_dir = Path(OUTPUT_DIR) / sp / cls
            for f in flist:
                out = dst_dir / f"{f.stem}.jpg"
                if out.exists():
                    out = dst_dir / f"{f.stem}_{random.randint(1000,9999)}.jpg"
                try:
                    save_as_jpg(f, out)
                except Exception as e:
                    print("Skipping unreadable:", f, "|", e)

        print(f" {cls.upper()} -> total:{n} train:{len(splits['train'])} val:{len(splits['val'])} test:{len(splits['test'])}")

split_convert_and_save()
print(" Output saved to:", OUTPUT_DIR)


 VALID -> total:299 train:209 val:44 test:46
 INVALID -> total:300 train:210 val:45 test:45
 Output saved to: /content/drive/MyDrive/DSGP_Group_32/NĀGA/Input Validation Model/Output


In [5]:
def count_files(p):
    c = 0
    for _, _, files in os.walk(p):
        c += len(files)
    return c

for sp in ["train","val","test"]:
    for cls in ["valid","invalid"]:
        p = f"{OUTPUT_DIR}/{sp}/{cls}"
        print(sp, cls, "files =", count_files(p))


train valid files = 209
train invalid files = 210
val valid files = 44
val invalid files = 45
test valid files = 46
test invalid files = 45
