## STEP 1 — Inisialisasi path & bikin struktur minimal

In [None]:
from pathlib import Path

BASE = Path(".")
DATA = BASE / "data"
TRAIN = DATA / "train"
VAL = DATA / "val"
SEG = BASE / "data_seg"
MODELS = BASE / "models"
LOGS = BASE / "logs"

for p in [TRAIN/"rgb", TRAIN/"depth", VAL/"rgb", VAL/"depth",
          SEG/"train/images", SEG/"train/labels", SEG/"val/images", SEG/"val/labels",
          MODELS, LOGS, BASE/"web", BASE/"api"]:
    p.mkdir(parents=True, exist_ok=True)

# Buat template labels.csv bila belum ada
for split in ["train","val"]:
    csvp = DATA/split/"labels.csv"
    if not csvp.exists():
        csvp.write_text("cow_id,image_name_rgb,image_name_depth,bcs_label\n")

print("OK: struktur siap.")

## STEP 2 — (Opsional) Import & ekstraksi Dryad .bag → frame RGB + depth 16-bit
Dryad rekamannya RealSense D435i (.bag ROS-style). Simpelnya, mohon ekstrak 1 frame tiap ~0.5–1.0 detik untuk mengurangi temporal leakage (sesuai saran README Dryad). Depth simpan uint16 lalu normalisasi di saat preprocessing agar tidak korup. https://datadryad.org/dataset/doi%3A10.5061/dryad.tqjq2bw4s

In [None]:
# Jalankan hanya jika kamu punya .bag dari Dryad di ./raw_bag/
import os, cv2, numpy as np
from pathlib import Path

HAS_RS = False
try:
    import pyrealsense2 as rs
    HAS_RS = True
except Exception as e:
    print("pyrealsense2 tidak tersedia:", e)

RAW_BAG = BASE/"raw_bag"    # taruh .bag di sini
OUT_SPLIT = "train"         # atau 'val'
SAVE_EVERY_N_FRAMES = 15    # ~0.5s jika 30 FPS

def extract_from_bag(bag_path: Path, out_split="train", every_n=15):
    assert HAS_RS, "pyrealsense2 belum terpasang"
    rgb_dir = DATA/out_split/"rgb"
    dpt_dir = DATA/out_split/"depth"
    rgb_dir.mkdir(parents=True, exist_ok=True)
    dpt_dir.mkdir(parents=True, exist_ok=True)

    pipeline = rs.pipeline()
    config = rs.config()
    config.enable_device_from_file(str(bag_path), repeat_playback=False)
    config.enable_stream(rs.stream.color, 640, 480, rs.format.bgr8, 30)
    config.enable_stream(rs.stream.depth, 640, 480, rs.format.z16, 30)

    pipeline.start(config)
    align = rs.align(rs.stream.color)

    frame_idx = 0
    saved = 0
    try:
        while True:
            frames = pipeline.wait_for_frames()
            if not frames: break
            frames = align.process(frames)
            color = frames.get_color_frame()
            depth = frames.get_depth_frame()
            if not color or not depth: 
                continue

            if frame_idx % every_n == 0:
                c_img = np.asanyarray(color.get_data())
                d_img = np.asanyarray(depth.get_data())  # uint16

                base = f"{bag_path.stem}_{frame_idx:06d}"
                cv2.imwrite(str(rgb_dir/f"{base}.png"), c_img)
                cv2.imwrite(str(dpt_dir/f"{base}.png"), d_img)  # tetap uint16!
                saved += 1
            frame_idx += 1
    except Exception:
        pass
    finally:
        try: pipeline.stop()
        except: pass
    return saved

if RAW_BAG.exists():
    total = 0
    for f in RAW_BAG.glob("*.bag"):
        total += extract_from_bag(f, OUT_SPLIT, SAVE_EVERY_N_FRAMES)
    print(f"Saved frames: {total}")
else:
    print("Lewati: folder raw_bag tidak ada.")

## STEP 3 — (Opsional) Import UNICT BCS DB ke struktur data/
Setelah unduh, taruh citra ke data/train/rgb dan data/val/rgb. Isi labels.csv (bisa dari file meta/penilaian mingguan). Dataset ini 207 citra top-view + anotasi titik anatomi & label 2 assessor. https://iplab.dmi.unict.it/BCS/index.html

In [None]:
# Contoh merge csv meta UNICT ke labels.csv (pseudo, sesuaikan kolom aslinya)
import pandas as pd
from pathlib import Path

# Misal kamu punya unict_meta.csv berisi: filename,bcs_label,cow_id
unict_meta = Path("unict_meta.csv")
if unict_meta.exists():
    meta = pd.read_csv(unict_meta)
    # Bagi train/val sederhana: 80/20
    mtrain = meta.sample(frac=0.8, random_state=42)
    mval   = meta.drop(mtrain.index)

    def write_labels(df, split):
        rows = []
        for _, r in df.iterrows():
            rows.append({
                "cow_id": r.get("cow_id", "UNK"),
                "image_name_rgb": r["filename"],
                "image_name_depth": "",           # kosong (UNICT 2D); isi jika ada depth pendamping
                "bcs_label": r["bcs_label"]
            })
        pd.DataFrame(rows).to_csv(DATA/split/"labels.csv", index=False)

    write_labels(mtrain, "train")
    write_labels(mval,   "val")
    print("labels.csv dari UNICT ditulis.")
else:
    print("Lewati: unict_meta.csv tidak ditemukan (isi manual labels.csv).")

## STEP 4 — Siapkan dataset segmentasi (YOLOv8-Seg)
Ekspor dari Roboflow (format YOLOv8-Seg) atau pakai anotasi sendiri → letakkan ke data_seg/train/{images,labels} dan data_seg/val/{images,labels}. (Ada banyak dataset sapi instance-seg di Roboflow Universe). https://universe.roboflow.com/capstone-8gixe/cow-body-parts?utm_source=chatgpt.com

In [None]:
import yaml, json
from pathlib import Path

seg_yaml = {
    "path": str((SEG).resolve()),
    "train": "train/images",
    "val": "val/images",
    "names": ["cow"]
}
Path("yolo_seg_data.yaml").write_text(yaml.dump(seg_yaml))
print(Path("yolo_seg_data.yaml").read_text())

## STEP 5 — Train YOLOv8-Seg (fine-tune)

In [None]:
from ultralytics import YOLO

# pakai bobot dasar kecil agar edge-friendly
seg_model = YOLO("yolov8s-seg.pt")
seg_model.train(
    data="yolo_seg_data.yaml",
    epochs=50,
    imgsz=640,
    batch=8,
    patience=10,
    device=0 if torch.cuda.is_available() else "cpu"
)

# ambil best weights → simpan ke models/
best = seg_model.ckpt_path if hasattr(seg_model, "ckpt_path") else "runs/segment/train/weights/best.pt"
Path(MODELS/"seg_yolov8s.pt").write_bytes(Path(best).read_bytes())
print("Saved:", MODELS/"seg_yolov8s.pt")

## STEP 6 — Infer mask untuk seluruh RGB (train/val) → simpan PNG mask

In [None]:
import numpy as np, cv2
from ultralytics import YOLO
from pathlib import Path
from tqdm import tqdm

seg_model = YOLO(str(MODELS/"seg_yolov8s.pt"))

def infer_and_save_masks(img_dir: Path, out_dir: Path, imgsz=640, conf=0.25):
    out_dir.mkdir(parents=True, exist_ok=True)
    for imgp in tqdm(sorted(img_dir.glob("*.png")) + sorted(img_dir.glob("*.jpg"))):
        res = seg_model.predict(source=str(imgp), imgsz=imgsz, conf=conf, verbose=False)
        if not res: 
            continue
        r = res[0]
        if r.masks is None:
            # fallback: full mask (biar pipeline jalan)
            im = cv2.imread(str(imgp))
            mask = np.ones(im.shape[:2], np.uint8)*255
        else:
            # gabungkan semua instance 'cow' jadi satu mask
            m = (r.masks.data.cpu().numpy() > 0.5).astype(np.uint8)  # [N,H,W]
            mask = (m.max(axis=0)*255).astype(np.uint8)
        cv2.imwrite(str(out_dir/f"{imgp.stem}_mask.png"), mask)

for split in ["train","val"]:
    infer_and_save_masks(DATA/split/"rgb", DATA/split/"masks")
print("Masks saved.")

## STEP 7 — Ekstraksi fitur morfometrik dari depth 16-bit + mask

In [None]:
import numpy as np, pandas as pd, cv2
from pathlib import Path

def load_depth_uint16(p: Path):
    d = cv2.imread(str(p), cv2.IMREAD_UNCHANGED)
    if d is None:
        raise FileNotFoundError(p)
    assert d.dtype == np.uint16, f"depth harus uint16, dapat {d.dtype}"
    return d

def normalize01_uint16(d):
    d = d.astype(np.float32)
    return d / d.max() if d.max() > 0 else d

def compute_features(depth_u16, mask_u8):
    d = normalize01_uint16(depth_u16)
    m = (mask_u8 > 0).astype(np.uint8)
    if m.sum() == 0:
        return dict(mean_depth=0., depth_variance=0., area_px=0, hw_ratio=0., body_volume_est=0.)
    roi = d * (m>0)
    h, w = m.shape
    ys, xs = np.where(m>0)
    hmin,hmax = ys.min(), ys.max()
    wmin,wmax = xs.min(), xs.max()
    box_h = hmax-hmin+1
    box_w = wmax-wmin+1
    return dict(
        mean_depth=float(roi[roi>0].mean()),
        depth_variance=float(roi[roi>0].var()),
        area_px=int(m.sum()),
        hw_ratio=float(box_h/box_w) if box_w>0 else 0.,
        body_volume_est=float(roi.sum())/1e6  # skala proxy
    )

def build_feature_csv(split="train"):
    df = pd.read_csv(DATA/split/"labels.csv")
    rows = []
    for _, r in df.iterrows():
        rgb_name   = r["image_name_rgb"]
        depth_name = r["image_name_depth"]
        mask_path  = DATA/split/"masks"/f"{Path(rgb_name).stem}_mask.png"
        if not mask_path.exists():
            continue
        if depth_name and (DATA/split/"depth"/depth_name).exists():
            depth_u16 = load_depth_uint16(DATA/split/"depth"/depth_name)
        else:
            # Kalau tidak ada depth (misal UNICT), skip baris ini atau set fitur nol
            # di sini aku skip untuk menjaga kualitas sinyal fitur
            continue
        mask = cv2.imread(str(mask_path), cv2.IMREAD_GRAYSCALE)
        feats = compute_features(depth_u16, mask)
        feats.update({
            "cow_id": r["cow_id"],
            "image_name_rgb": rgb_name,
            "image_name_depth": depth_name,
            "bcs_label": float(r["bcs_label"])
        })
        rows.append(feats)
    fe = pd.DataFrame(rows)
    fe.to_csv(DATA/split/"features.csv", index=False)
    return fe

train_fe = build_feature_csv("train")
val_fe   = build_feature_csv("val")
train_fe.head(), val_fe.head()

## STEP 8 — Training model BCS (Regresi dan opsi Ordinal)
Praktik dari JDS 2024 menggunakan pendekatan ordinal untuk BCS 1–5 (step 0.25) ketika menganalisis agreement; di sini kuberi 2 opsi:

(A) MLPRegressor (kontinu)

(B) Ordinal logistic (mord) — kalau kamu ingin metrik klasifikasi yang sejalan dengan skala ordinal. 

https://pubmed.ncbi.nlm.nih.gov/37977440/

In [None]:
import numpy as np, pandas as pd, joblib
from sklearn.model_selection import train_test_split, KFold, cross_val_score
from sklearn.metrics import mean_absolute_error, r2_score, cohen_kappa_score
from sklearn.neural_network import MLPRegressor

def as_Xy(fe):
    X = fe[['mean_depth','depth_variance','area_px','hw_ratio','body_volume_est']].values
    y = fe['bcs_label'].values
    return X, y

train_fe = pd.read_csv(DATA/"train"/"features.csv")
X, y = as_Xy(train_fe)

# -----------------------------
# (A) Regressor kontinu (MLP)
# -----------------------------
X_tr, X_te, y_tr, y_te = train_test_split(X, y, test_size=0.2, random_state=42)
reg = MLPRegressor(hidden_layer_sizes=(128,64), activation='relu',
                   random_state=42, max_iter=600)
reg.fit(X_tr, y_tr)
pred = reg.predict(X_te)
print("Regressor  MAE=", mean_absolute_error(y_te, pred), " R2=", r2_score(y_te, pred))

# 5-fold CV (MAE)
kf = KFold(n_splits=5, shuffle=True, random_state=42)
cv_mae = -cross_val_score(reg, X, y, cv=kf, scoring="neg_mean_absolute_error")
print("5-fold MAE:", cv_mae.mean(), "+/-", cv_mae.std())

# simpan
joblib.dump(reg, MODELS/"bcs_regressor.pkl")
print("Saved:", MODELS/"bcs_regressor.pkl")

# -----------------------------
# (B) Ordinal (opsional)
# -----------------------------
USE_ORDINAL = False
try:
    import mord
    USE_ORDINAL = True
except:
    print("mord belum terpasang; lewati ordinal.")

if USE_ORDINAL:
    # map nilai 1..5 (step 0.25) ke kelas ordinal 0..16
    def to_ord_class(v): 
        idx = int(round((v - 1.0) / 0.25))
        return max(0, min(16, idx))
    y_ord = np.array([to_ord_class(v) for v in y])

    X_tr, X_te, y_tr, y_te = train_test_split(X, y_ord, test_size=0.2, random_state=42)
    ord_model = mord.LogisticAT(alpha=1.0)
    ord_model.fit(X_tr, y_tr)
    y_pred_ord = ord_model.predict(X_te)

    # metrik agreement (kelas dibawa ke 1..5 step 0.25)
    def from_ord_class(k): return 1.0 + 0.25*int(k)
    y_true_c = np.array([from_ord_class(k) for k in y_te])
    y_pred_c = np.array([from_ord_class(k) for k in y_pred_ord])

    def pct_agree(y_true, y_pred, tol):
        return np.mean(np.abs(y_true - y_pred) <= tol)

    print("Ordinal — exact:", pct_agree(y_true_c, y_pred_c, 0.00))
    print("Ordinal — ±0.25:", pct_agree(y_true_c, y_pred_c, 0.25))
    print("Ordinal — ±0.50:", pct_agree(y_true_c, y_pred_c, 0.50))
    # Kappa tertimbang (kuadratik)
    # untuk kappa, konversi ke indeks kelas 0..16
    print("Ordinal — κw:", cohen_kappa_score(y_te, y_pred_ord, weights='quadratic'))

## STEP 9 — Evaluasi pada set validasi (pakai data/val/features.csv)

In [None]:
import pandas as pd, numpy as np
from sklearn.metrics import mean_absolute_error, r2_score, cohen_kappa_score
import joblib

val_fe = pd.read_csv(DATA/"val"/"features.csv")
Xv = val_fe[['mean_depth','depth_variance','area_px','hw_ratio','body_volume_est']].values
yv = val_fe['bcs_label'].values

reg = joblib.load(MODELS/"bcs_regressor.pkl")
pv = reg.predict(Xv)

def pct_agree(y_true, y_pred, tol):
    return np.mean(np.abs(y_true - y_pred) <= tol)

print("[VAL] MAE=", mean_absolute_error(yv, pv), " R2=", r2_score(yv, pv))
print("[VAL] exact:", pct_agree(yv, pv, 0.00))
print("[VAL] ±0.25:", pct_agree(yv, pv, 0.25))
print("[VAL] ±0.50:", pct_agree(yv, pv, 0.50))

# kelas bulat (1..5) untuk kappa (opsional)
def to_class05(v): return int(round(v))  # bulat ke integer 1..5
yvc = np.array([to_class05(v) for v in yv])
pvc = np.array([to_class05(v) for v in pv])
print("[VAL] κw (kuadratik) =", cohen_kappa_score(yvc, pvc, weights='quadratic'))

## STEP 10 — Inferensi satu gambar RGB-D

In [None]:
import cv2, numpy as np, joblib
from ultralytics import YOLO
from pathlib import Path

reg = joblib.load(MODELS/"bcs_regressor.pkl")
seg_model = YOLO(str(MODELS/"seg_yolov8s.pt"))

def predict_single(rgb_path: Path, depth_path: Path):
    # mask
    r = seg_model.predict(source=str(rgb_path), imgsz=640, conf=0.25, verbose=False)[0]
    if r.masks is None:
        # fallback: full mask
        mask = np.ones(cv2.imread(str(rgb_path)).shape[:2], np.uint8)*255
    else:
        m = (r.masks.data.cpu().numpy() > 0.5).astype(np.uint8)
        mask = (m.max(axis=0)*255).astype(np.uint8)

    # depth
    d = cv2.imread(str(depth_path), cv2.IMREAD_UNCHANGED)
    d = d.astype(np.float32)
    d = d / d.max() if d.max() > 0 else d

    # fitur (samakan dengan STEP 7)
    roi = d * (mask>0)
    h, w = mask.shape
    ys, xs = np.where(mask>0)
    if len(ys)==0:
        return dict(bcs_pred=None, note="mask kosong")
    box_h, box_w = ys.max()-ys.min()+1, xs.max()-xs.min()+1
    X = np.array([[
        roi[roi>0].mean(),
        roi[roi>0].var(),
        (mask>0).sum(),
        box_h/box_w if box_w>0 else 0.,
        roi.sum()/1e6
    ]], dtype=np.float32)
    pred = float(reg.predict(X)[0])
    return dict(bcs_pred=pred)

# contoh:
# predict_single(Path("data/val/rgb/example.png"), Path("data/val/depth/example.png"))