In [1]:
# Jalankan sekali jika perlu:
# !pip install rembg opencv-python-headless numpy pillow scikit-learn matplotlib tqdm pandas

import os
import glob
from pathlib import Path
from tqdm import tqdm
import numpy as np
import cv2
from rembg import remove
from PIL import Image
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt
import pandas as pd


In [2]:

# ---------- Set folder input / output ----------
INPUT_DIR = "img/"   # <-- ganti dengan folder gambar kamu
OUTPUT_DIR = "hasil" # hasil akan disimpan di sini
os.makedirs(OUTPUT_DIR, exist_ok=True)
NO_BG_DIR = os.path.join(OUTPUT_DIR, "no_bg"); os.makedirs(NO_BG_DIR, exist_ok=True)
SEG_DIR = os.path.join(OUTPUT_DIR, "kmeans_seg"); os.makedirs(SEG_DIR, exist_ok=True)
HIGHLIGHT_DIR = os.path.join(OUTPUT_DIR, "highlighted"); os.makedirs(HIGHLIGHT_DIR, exist_ok=True)

# ---------- Utility: load image (PIL -> RGBA) --------------
def load_image_pil(path):
    img = Image.open(path).convert("RGBA")
    return img


In [3]:

# ---------- Remove background using rembg (PIL in/out) ----------
def remove_background_pil(pil_img):
    # pil_img: RGBA PIL Image
    with Image.new("RGBA", pil_img.size) as bg:
        pass
    in_bytes = pil_img.tobytes("raw", "RGBA")
    # rembg expects raw bytes for remove(); easiest is to save to bytes buffer:
    from io import BytesIO
    buf_in = BytesIO()
    pil_img.save(buf_in, format="PNG")
    buf_in.seek(0)
    out_bytes = remove(buf_in.read())
    buf_out = BytesIO(out_bytes)
    out_img = Image.open(buf_out).convert("RGBA")
    return out_img


In [4]:

# ---------- Convert PIL RGBA to OpenCV BGR with alpha mask ----------
def pil_to_cv2_rgba(pil_img):
    arr = np.array(pil_img)  # H x W x 4 (RGBA)
    # split alpha
    if arr.shape[2] == 4:
        alpha = arr[:, :, 3]
        rgb = arr[:, :, :3]
        bgr = cv2.cvtColor(rgb, cv2.COLOR_RGB2BGR)
        return bgr, alpha
    else:
        bgr = cv2.cvtColor(arr, cv2.COLOR_RGB2BGR)
        alpha = 255 * np.ones((arr.shape[0], arr.shape[1]), dtype=np.uint8)
        return bgr, alpha


In [5]:

# ---------- K-Means segmentation on pixels inside mask ----------
def kmeans_segmentation_bgr(bgr, alpha_mask, n_clusters=3, subsample=1):
    """
    bgr: OpenCV BGR image
    alpha_mask: 2D uint8 mask where >0 are foreground
    returns:
      labels image (H x W) with cluster ids (0..n_clusters-1)
      cluster_centers (in BGR)
    """
    h, w = alpha_mask.shape
    fg_idx = np.where(alpha_mask > 0)
    pixels = bgr[fg_idx]  # Nx3
    if pixels.size == 0:
        # kosong
        labels_img = np.zeros((h, w), dtype=np.int32)
        centers = np.zeros((n_clusters, 3), dtype=np.float32)
        return labels_img, centers

    # subsample to speed up KMeans if necessary
    if subsample > 1 and pixels.shape[0] > 10000:
        sample = pixels[::subsample]
    else:
        sample = pixels

    # KMeans expects float
    km = KMeans(n_clusters=n_clusters, random_state=0, n_init=10)
    km.fit(sample.astype(np.float32))
    centers = km.cluster_centers_.astype(np.uint8)

    # assign all pixels to nearest center (predict)
    full_labels = km.predict(pixels.astype(np.float32))
    labels_img = -1 * np.ones((h, w), dtype=np.int32)
    labels_img[fg_idx] = full_labels
    return labels_img, centers


In [6]:

# ---------- Create segmented visualization (each cluster colored by its center) ----------
def visualize_clusters(labels_img, centers, alpha_mask):
    h, w = labels_img.shape
    out = np.zeros((h, w, 3), dtype=np.uint8)
    fg_idx = np.where(labels_img >= 0)
    out[fg_idx] = centers[labels_img[fg_idx]]
    # put transparent background where alpha_mask == 0 (optional)
    # convert to RGBA for saving
    rgba = cv2.cvtColor(out, cv2.COLOR_BGR2RGBA)
    rgba[:, :, 3] = np.where(alpha_mask>0, 255, 0).astype(np.uint8)
    return rgba

# ---------- Heuristic: decide which clusters are 'sick' based on color -->
# This is a simple heuristic: clusters with lower green channel or high brownish tone flagged.
def cluster_health_score(centers):
    # centers: n x 3 BGR
    # compute a score: higher = healthier
    # convert to RGB for intuition
    rgb = centers[:, ::-1].astype(np.float32)  # convert BGR->RGB
    R, G, B = rgb[:,0], rgb[:,1], rgb[:,2]
    # normalized green dominance:
    score = G - 0.5*(R + B)  # green minus others
    return score  # higher -> more green -> likely healthy

# ---------- Highlight sick areas on original bgr image --------------
def highlight_sick_areas(bgr, labels_img, alpha_mask, centers, healthy_threshold=None):
    h, w = labels_img.shape
    overlay = bgr.copy()
    # compute health score for each cluster (higher = healthy)
    scores = cluster_health_score(centers)
    if healthy_threshold is None:
        # choose threshold as median score; clusters below median considered 'sick'
        healthy_threshold = np.median(scores)
    sick_clusters = np.where(scores < healthy_threshold)[0].tolist()

    # build binary mask of sick pixels
    sick_mask = np.zeros_like(alpha_mask, dtype=np.uint8)
    for c in sick_clusters:
        sick_mask[labels_img == c] = 255

    # find contours of sick regions
    contours, _ = cv2.findContours(sick_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    highlighted = bgr.copy()
    for cnt in contours:
        area = cv2.contourArea(cnt)
        if area < 50:  # skip very small specks (tune this if needed)
            continue
        # draw red contour
        cv2.drawContours(highlighted, [cnt], -1, (0,0,255), 2)  # BGR red
        # optionally draw enclosing circle
        (x,y), r = cv2.minEnclosingCircle(cnt)
        cv2.circle(highlighted, (int(x),int(y)), int(r), (0,0,255), 1)

    # create RGBA where background is transparent
    rgba = cv2.cvtColor(highlighted, cv2.COLOR_BGR2RGBA)
    rgba[:, :, 3] = np.where(alpha_mask>0, 255, 0).astype(np.uint8)
    return rgba, sick_mask, sick_clusters, scores

# ---------- Main loop over images ----------
def process_folder(input_dir, output_dir):
    img_paths = []
    for ext in ("*.png","*.jpg","*.jpeg","*.tif","*.tiff","*.bmp"):
        img_paths.extend(sorted(glob.glob(os.path.join(input_dir, ext))))
    if len(img_paths) == 0:
        print("Tidak ada file gambar di", input_dir)
        return

    records = []
    for p in tqdm(img_paths, desc="Processing images"):
        name = Path(p).stem
        try:
            pil = load_image_pil(p)
            # remove bg
            no_bg = remove_background_pil(pil)
            no_bg_path = os.path.join(NO_BG_DIR, f"{name}_no_bg.png")
            no_bg.save(no_bg_path)

            # to cv2
            bgr, alpha = pil_to_cv2_rgba(no_bg)

            # kmeans
            labels_img, centers = kmeans_segmentation_bgr(bgr, alpha, n_clusters=3, subsample=10)

            # visualize segmentation
            seg_rgba = visualize_clusters(labels_img, centers, alpha)
            seg_path = os.path.join(SEG_DIR, f"{name}_seg.png")
            Image.fromarray(seg_rgba).save(seg_path)

            # highlight sick areas
            highlighted_rgba, sick_mask, sick_clusters, scores = highlight_sick_areas(bgr, labels_img, alpha, centers)
            highlight_path = os.path.join(HIGHLIGHT_DIR, f"{name}_highlight.png")
            Image.fromarray(highlighted_rgba).save(highlight_path)

            # compute some stats
            total_pixels = np.count_nonzero(alpha>0)
            sick_pixels = np.count_nonzero(sick_mask>0)
            percent_sick = (sick_pixels / total_pixels * 100) if total_pixels>0 else 0.0

            # record cluster centers and scores
            center_list = [list(map(int,c.tolist())) for c in centers]  # BGR ints
            record = {
                "filename": os.path.basename(p),
                "no_bg": os.path.relpath(no_bg_path, output_dir),
                "segmentation": os.path.relpath(seg_path, output_dir),
                "highlight": os.path.relpath(highlight_path, output_dir),
                "total_pixels_fg": int(total_pixels),
                "sick_pixels": int(sick_pixels),
                "percent_sick": float(round(percent_sick,3)),
                "cluster_centers_bgr": str(center_list),
                "cluster_scores": str([float(round(s,3)) for s in scores.tolist()]),
                "sick_clusters": str(sick_clusters)
            }
            records.append(record)

        except Exception as e:
            print(f"Error processing {p}: {e}")
            records.append({"filename": os.path.basename(p), "error": str(e)})
            continue

    # save summary CSV
    df = pd.DataFrame(records)
    csv_path = os.path.join(output_dir, "summary.csv")
    df.to_csv(csv_path, index=False)
    print("Selesai. Hasil disimpan di:", output_dir)
    print("Ringkasan:", csv_path)

# Run
process_folder(INPUT_DIR, OUTPUT_DIR)


Processing images: 100%|██████████| 20/20 [00:55<00:00,  2.78s/it]

Selesai. Hasil disimpan di: hasil
Ringkasan: hasil\summary.csv



