In [None]:
from google.colab import files
import zipfile, os
from pathlib import Path
import cv2
import numpy as np

uploaded = files.upload()  # Upload NewDataset.zip

zip_path = "/content/NewDataset.zip"
extract_dir = "/content/LeafDisease"

with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall(extract_dir)

print("Extracted to:", extract_dir)

def read_image(path):
    img = cv2.imdecode(np.fromfile(str(path), dtype=np.uint8), cv2.IMREAD_COLOR)
    return img

def write_image(path, img):
    path.parent.mkdir(parents=True, exist_ok=True)
    _, buf = cv2.imencode('.jpg', img)
    buf.tofile(str(path))

def largest_component_mask(mask):
    contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    if not contours:
        return None
    c = max(contours, key=cv2.contourArea)
    out = np.zeros_like(mask)
    cv2.drawContours(out, [c], -1, 255, thickness=cv2.FILLED)
    out = cv2.morphologyEx(out, cv2.MORPH_CLOSE, cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (7, 7)))
    out = cv2.morphologyEx(out, cv2.MORPH_OPEN, cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (5, 5)))
    return out

def hsv_union_mask(img):
    hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
    # Looser thresholds to preserve diseased areas
    lower_green = np.array([20, 30, 30], dtype=np.uint8)
    upper_green = np.array([90, 255, 255], dtype=np.uint8)
    lower_yellow = np.array([10, 30, 30], dtype=np.uint8)
    upper_yellow = np.array([45, 255, 255], dtype=np.uint8)
    lower_brown = np.array([0, 20, 10], dtype=np.uint8)
    upper_brown = np.array([25, 255, 200], dtype=np.uint8)

    m1 = cv2.inRange(hsv, lower_green, upper_green)
    m2 = cv2.inRange(hsv, lower_yellow, upper_yellow)
    m3 = cv2.inRange(hsv, lower_brown, upper_brown)
    sat_mask = cv2.threshold(hsv[:,:,1], 30, 255, cv2.THRESH_BINARY)[1]

    mask = cv2.bitwise_or(cv2.bitwise_or(m1, m2), m3)
    mask = cv2.bitwise_and(mask, sat_mask)

    # Morphological operations (less aggressive)
    mask = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (5,5)))
    mask = cv2.morphologyEx(mask, cv2.MORPH_OPEN, cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3,3)))
    return mask

def grabcut_mask(img):
    h, w = img.shape[:2]
    rect = (int(w*0.05), int(h*0.05), int(w*0.9), int(h*0.9))
    bgd = np.zeros((1,65), np.float64)
    fgd = np.zeros((1,65), np.float64)
    mask = np.zeros((h,w), np.uint8)
    cv2.grabCut(img, mask, rect, bgd, fgd, 5, cv2.GC_INIT_WITH_RECT)
    out = np.where((mask==cv2.GC_FGD)|(mask==cv2.GC_PR_FGD), 255, 0).astype(np.uint8)
    out = cv2.morphologyEx(out, cv2.MORPH_OPEN, cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(5,5)))
    return out

def kmeans_mask(img):
    lab = cv2.cvtColor(img, cv2.COLOR_BGR2LAB)
    ab = lab[:,:,1:3].reshape(-1,2).astype(np.float32)
    criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 20, 1.0)
    _, labels, centers = cv2.kmeans(ab, 2, None, criteria, 3, cv2.KMEANS_PP_CENTERS)
    labels = labels.reshape(lab.shape[:2])
    # Use central region to pick cluster
    ch, cw = lab.shape[0]//2, lab.shape[1]//2
    center_mask = np.zeros_like(labels, dtype=np.uint8)
    center_mask[ch-10:ch+10, cw-10:cw+10] = 1
    l0 = np.sum((labels==0)&(center_mask==1))
    l1 = np.sum((labels==1)&(center_mask==1))
    target = 0 if l0>=l1 else 1
    out = np.where(labels==target, 255, 0).astype(np.uint8)
    out = cv2.morphologyEx(out, cv2.MORPH_OPEN, cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(5,5)))
    out = cv2.morphologyEx(out, cv2.MORPH_CLOSE, cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(7,7)))
    return out

def refine_mask(img, base):
    lc = largest_component_mask(base)
    if lc is not None:
        return lc
    # fallback
    alt = grabcut_mask(img)
    lc2 = largest_component_mask(alt)
    if lc2 is not None:
        return lc2
    alt2 = kmeans_mask(img)
    lc3 = largest_component_mask(alt2)
    return lc3

def apply_mask(img, mask):
    bg = cv2.GaussianBlur(img, (15,15), 0)  # keep background blurred
    res = np.where(mask[:,:,None]==255, img, bg)
    return res

def process_one(in_path, out_path, save_mask=False):
    img = read_image(in_path)
    if img is None:
        return False
    base = hsv_union_mask(img)
    mask = refine_mask(img, base)
    if mask is None:
        mask = grabcut_mask(img)
    seg = apply_mask(img, mask)
    write_image(out_path, seg)
    if save_mask:
        mask_path = out_path.parent / (out_path.stem+"_mask.jpg")
        write_image(mask_path, mask)
    return True

def gather_images(input_dir):
    exts = {'.jpg','.jpeg','.png','.bmp','.tif','.tiff','.webp'}
    for split in ['Training','Validation','Test','testing','validation','train','test']:
        sp = input_dir / split
        if sp.exists():
            for cls in sorted([d for d in sp.iterdir() if d.is_dir()]):
                for p in cls.rglob('*'):
                    if p.is_file() and p.suffix.lower() in exts:
                        yield split, cls.name, p

import sys, argparse

def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--input', type=str, required=True)
    parser.add_argument('--output', type=str, required=True)
    parser.add_argument('--save-mask', action='store_true')
    args = parser.parse_args()

    in_root = Path(args.input)
    out_root = Path(args.output)

    items = list(gather_images(in_root))
    print(f"Total images to process: {len(items)}")

    count, ok = 0, 0
    for split, cls, p in items:
        out_file = out_root / 'Segmentation' / split / cls / p.name
        done = process_one(p, out_file, save_mask=args.save_mask)
        ok += int(done)
        count += 1
        if count % 200 == 0:
            print(f"Progress: {count}/{len(items)} processed, saved {ok}")
    print(f"Finished! Processed {count}, saved {ok}. Output at {out_root/'Segmentation'}")

# Run in Colab directly
if __name__=='__main__':
    sys.argv = ['colab_kernel_launcher.py',
                '--input', '/content/LeafDisease/NewDataset',
                '--output', '/content/SegmentationNew']
    main()

Saving NewDataset.zip to NewDataset.zip
Extracted to: /content/LeafDisease
Total images to process: 1893
Progress: 200/1893 processed, saved 200
Progress: 400/1893 processed, saved 400
Progress: 600/1893 processed, saved 600
Progress: 800/1893 processed, saved 800
Progress: 1000/1893 processed, saved 1000
Progress: 1200/1893 processed, saved 1200
Progress: 1400/1893 processed, saved 1400
Progress: 1600/1893 processed, saved 1600
Progress: 1800/1893 processed, saved 1800
Finished! Processed 1893, saved 1893. Output at /content/SegmentationNew/Segmentation


In [None]:
import shutil
shutil.make_archive("/content/SegmentationNew", 'zip', "/content/SegmentationNew/Segmentation")
files.download("/content/SegmentationNew.zip")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>