In [63]:
import os
import cv2
import numpy as np
from glob import glob

### # Image Augmentation
#### - Add more variations to training images
#### - Techniques: flip, rotate, crop, noise
#### - Libraries: OpenCV, NumPy

In [64]:
input_dir = "./data/prepared-2"
output_dir = "./data/prepared-train"
os.makedirs(output_dir, exist_ok=True)

In [65]:
# count images
image_paths = glob(os.path.join(input_dir, "*.jpg"))
print(f"Total images: {len(image_paths)}")

Total images: 75


In [66]:
def augment_image(img):
    augmented = []

    # Flip
    augmented.append(("flip_h", cv2.flip(img, 1)))   # horizontal
    augmented.append(("flip_v", cv2.flip(img, 0)))   # vertical

    # Rotate
    (h, w) = img.shape[:2]
    M = cv2.getRotationMatrix2D((w//2, h//2), 15, 1.0)
    augmented.append(("rot15", cv2.warpAffine(img, M, (w, h))))

    # Crop (random crop)
    cropped = img[5:h-5, 5:w-5]
    cropped = cv2.resize(cropped, (w, h))
    augmented.append(("crop", cropped))

    # Noise
    noise = np.random.normal(0, 75, img.shape).astype(np.uint8)
    noisy = cv2.add(img, noise)
    augmented.append(("noise", noisy))

    return augmented

# -----------------------------
# Process Images
# -----------------------------
image_paths = glob(os.path.join(input_dir, "*.jpg"))

for path in image_paths:
    label = os.path.basename(os.path.dirname(path))
    fname = os.path.splitext(os.path.basename(path))[0]

    # เตรียมโฟลเดอร์ class
    out_class_dir = output_dir
    os.makedirs(out_class_dir, exist_ok=True)

    # โหลดภาพ
    img = cv2.imread(path)

    # save original
    cv2.imwrite(os.path.join(out_class_dir, f"{fname}_orig.jpg"), img)

    # save augmentations
    for aug_name, aug_img in augment_image(img):
        out_path = os.path.join(out_class_dir, f"{fname}_{aug_name}.jpg")
        cv2.imwrite(out_path, aug_img)

print("✅ เสร็จสิ้น: Augmented images saved to", output_dir)

✅ เสร็จสิ้น: Augmented images saved to ./data/prepared-train


### # Prepare Data for Training

In [67]:
input_dir = "./data/prepared-train"
volume_dir = "data/volumes"

image_paths = glob(os.path.join(input_dir, "*.jpg"))
print(f"Total images: {len(image_paths)}")

Total images: 450


In [68]:
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_absolute_error
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.linear_model import Ridge

In [69]:
# images, labels = [], []
#
# for img_path in glob(os.path.join(input_dir, "*.jpg")):
#     prefix = os.path.splitext(os.path.basename(img_path))[0].split("_")[0]  # เช่น i001
#
#     vol_path = os.path.join(volume_dir, f"{prefix}_mangosteen_grid.txt")
#     if not os.path.exists(vol_path):
#         continue
#
#     # โหลดรูป
#     img = cv2.imread(img_path)
#     img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
#     img = cv2.resize(img, (224, 224))
#     img = img / 255.0  # normalize
#     images.append(img)
#
#     # โหลด volume
#     with open(vol_path) as f:
#         vol = float(f.read().strip())
#     labels.append(vol)
#
# X = np.array(images, dtype="float32")
# y = np.array(labels, dtype="float32")
#
# print("Dataset:", X.shape, y.shape)
#
# # --------------------------
# # 2. Train-Test Split
# # --------------------------
# X_train, X_test, y_train, y_test = train_test_split(
#     X, y, test_size=0.2, random_state=42
# )
#
# # --------------------------
# # 3. Build CNN Regression Model
# # --------------------------
# base_model = tf.keras.applications.MobileNetV2(
#     input_shape=(224,224,3), include_top=False, weights="imagenet"
# )
# base_model.trainable = False  # freeze transfer learning
#
# model = tf.keras.Sequential([
#     base_model,
#     tf.keras.layers.GlobalAveragePooling2D(),
#     tf.keras.layers.Dense(128, activation="relu"),
#     tf.keras.layers.Dense(64, activation="relu"),
#     tf.keras.layers.Dense(1, activation="linear")   # regression output
# ])
#
# model.compile(optimizer="adam", loss="mse", metrics=["mae"])
# model.summary()
#
# # --------------------------
# # 4. Train
# # --------------------------
# history = model.fit(
#     X_train, y_train,
#     validation_split=0.2,
#     epochs=20,
#     batch_size=8,
#     verbose=1
# )
#
# # --------------------------
# # 5. Evaluate
# # --------------------------
# y_pred = model.predict(X_test).flatten()
#
# mae = mean_absolute_error(y_test, y_pred)
# r2  = r2_score(y_test, y_pred)
#
# print("\n=== Evaluation ===")
# print("Mean Absolute Error (MAE):", mae)
# print("R² Score:", r2)

### # Traditional ML Models
#### - Random Forest, Gradient Boosting, Ridge Regression

In [None]:
X, y = [], []

for img_path in glob(os.path.join(input_dir, "*.jpg")):
    prefix = os.path.splitext(os.path.basename(img_path))[0].split("_")[0]  # เช่น i001
    vol_path = os.path.join(volume_dir, f"{prefix}_mangosteen_grid.txt")

    if not os.path.exists(vol_path):
        continue

    # โหลดรูป -> resize และ flatten เป็น vector
    img = cv2.imread(img_path)
    if img is None:
        continue
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img = cv2.resize(img, (64, 64))  # ลดขนาดเพื่อให้ feature ไม่เยอะเกินไป
    feat = img.flatten() / 255.0

    # โหลด label
    with open(vol_path) as f:
        vol = float(f.read().strip())

    X.append(feat)
    y.append(vol)

X = np.array(X, dtype=np.float32)
y = np.array(y, dtype=np.float32)

print("Dataset:", X.shape, y.shape)

# --------------------------
# 2. Train-Test Split
# --------------------------
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# --------------------------
# 3. Define models
# --------------------------
models = {
    "Random Forest": RandomForestRegressor(n_estimators=100, random_state=42),
    "Gradient Boosting": GradientBoostingRegressor(n_estimators=100, random_state=42),
    "Ridge Regression": Ridge(alpha=1.0, random_state=42)
}

# --------------------------
# 4. Train & Evaluate
# --------------------------
for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)

    mae = mean_absolute_error(y_test, y_pred)
    r2  = r2_score(y_test, y_pred)

    print(f"\n=== {name} ===")
    print(f"MAE: {mae:.4f}")
    print(f"R² : {r2:.4f}")

Dataset: (450, 12288) (450,)
