In [None]:
import os
import cv2
import numpy as np
import pandas as pd
from skimage.feature import graycomatrix, graycoprops, local_binary_pattern
from skimage import img_as_ubyte
from tqdm import tqdm

In [None]:
# 1. Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# 2. Path untuk Input dan Output
input_path = "/content/drive/MyDrive/Preprocessing_Dataset_Skripsi"
output_csv = "/content/drive/MyDrive/Hasil_Ekstraksi_Telur.csv"

In [None]:
# 3) Kelas
classes = ['Berkualitas', 'Kurang Berkualitas', 'Tidak Berkualitas']

def get_storage_age(filename):
    if   filename.startswith('0_'): return '0 minggu'
    elif filename.startswith('1_'): return '1 minggu'
    elif filename.startswith('2_'): return '2 minggu'
    elif filename.startswith('3_'): return '3 minggu'
    elif filename.startswith('4_'): return '4 minggu'
    elif filename.startswith('5_'): return '5 minggu'
    else:                           return 'Tidak diketahui'

def determine_quality_class(filename):
    if filename.startswith(('0_','1_','2_')): return 'Berkualitas'
    elif filename.startswith('3_'):           return 'Kurang Berkualitas'
    else:                                     return 'Tidak Berkualitas'

In [None]:
# ---------- GLCM ----------
def extract_glcm_features(image):
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    gray = img_as_ubyte(gray)  # pastikan 8-bit

    glcm = graycomatrix(
        gray,
        distances=[1,2,3],
        angles=[0, np.pi/4, np.pi/2, 3*np.pi/4],
        symmetric=True, normed=True
    )

    contrast    = graycoprops(glcm, 'contrast').mean()
    correlation = graycoprops(glcm, 'correlation').mean()
    energy      = graycoprops(glcm, 'energy').mean()
    homogeneity = graycoprops(glcm, 'homogeneity').mean()

    # Entropi GLCM (dari matriks ko-occurence yg sudah ternormalisasi)
    P = glcm.astype(np.float64)
    eps = 1e-12
    ent_glcm = -np.sum(P * np.log2(P + eps))

    # statistik intensitas citra (bukan GLCM)
    var_glcm   = float(np.var(gray))
    std_glcm   = float(np.std(gray))
    mean_glcm  = float(np.mean(gray))

    # skewness intensitas citra
    m  = np.mean(gray)
    sd = np.std(gray) + 1e-12
    skew_glcm = float(np.mean(((gray - m)/sd)**3))

    return dict(
        kontras_glcm=contrast,
        korelasi_glcm=correlation,
        energi_glcm=energy,
        homogenitas_glcm=homogeneity,
        entropi_glcm=ent_glcm,
        variance_glcm=var_glcm,
        skewness_glcm=skew_glcm,
        std_dev_glcm=std_glcm,
        mean_glcm=mean_glcm
    )

In [None]:
# ---------- LBP ----------
def extract_lbp_features(image, P=8, R=1, method='uniform'):
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

    lbp = local_binary_pattern(gray, P, R, method=method)
    # jumlah bin untuk 'uniform' = P+2 (0..P untuk uniform, dan satu bin non-uniform)
    n_bins = P + 2
    bins = np.arange(0, n_bins+1)  # edges: 0..n_bins

    hist, _ = np.histogram(lbp.ravel(), bins=bins, range=(0, n_bins))
    hist = hist.astype(np.float64)
    hist_sum = hist.sum() + 1e-12
    p = hist / hist_sum  # distribusi probabilitas

    # ---- fitur LBP dari distribusi p ----
    # Mean LBP = ekspektasi kode (bukan mean dari p)
    bin_indices = np.arange(n_bins, dtype=np.float64)
    mean_lbp = float(np.sum(bin_indices * p))

    # Entropi LBP (definisi info teoritis)
    eps = 1e-12
    entropy_lbp = float(-np.sum(p * np.log2(p + eps)))

    # Energi (ASM) = sum p^2
    energy_lbp = float(np.sum(p**2))

    # Variansi LBP (terhadap mean kode)
    var_lbp = float(np.sum(((bin_indices - mean_lbp)**2) * p))

    # Std dev LBP
    std_lbp = float(np.sqrt(var_lbp + 1e-18))

    # Skewness LBP
    skew_lbp = float(np.sum(((bin_indices - mean_lbp)**3) * p) / ((std_lbp + 1e-12)**3))

    # "Kontras/Korelasi/Homogenitas" berbasis histogram (definisi turunan, bukan standar)
    # Kontras LBP (pilihan umum: variansi distribusi p terhadap indeks)
    kontras_lbp = var_lbp

    # Korelasi LBP: korelasi antara indeks bin dan probabilitas sebagai sinyal (heuristik)
    # gunakan korelasi Pearson pada (indeks, p)
    if np.std(bin_indices) < 1e-12 or np.std(p) < 1e-12:
        korelasi_lbp = 0.0
    else:
        korelasi_lbp = float(np.corrcoef(bin_indices, p)[0,1])

    # Homogenitas LBP (heuristik seperti IDM pada indeks)
    homogenitas_lbp = float(np.sum(p / (1.0 + (bin_indices - mean_lbp)**2)))

    return dict(
        mean_lbp=mean_lbp,
        std_dev_lbp=std_lbp,
        energi_lbp=energy_lbp,
        entropi_lbp=entropy_lbp,
        variance_lbp=var_lbp,
        skewness_lbp=skew_lbp,
        kontras_lbp=kontras_lbp,
        korelasi_lbp=korelasi_lbp,
        homogenitas_lbp=homogenitas_lbp
    )

In [None]:
# ---------- Proses semua gambar ----------
def extract_and_save_features(input_path, output_csv):
    cols = [
        # GLCM
        'kontras_glcm','korelasi_glcm','energi_glcm','homogenitas_glcm','entropi_glcm',
        'variance_glcm','skewness_glcm','std_dev_glcm','mean_glcm',
        # LBP
        'mean_lbp','std_dev_lbp','energi_lbp','entropi_lbp','variance_lbp','skewness_lbp',
        'kontras_lbp','korelasi_lbp','homogenitas_lbp',
        # label
        'usia_simpan','kelas_kualitas','filename'
    ]
    out = []

    for kls in classes:
        input_dir = os.path.join(input_path, kls)
        if not os.path.isdir(input_dir):
            continue

        for filename in tqdm([f for f in os.listdir(input_dir) if f.lower().endswith(('.jpg','.png','.jpeg'))],
                             desc=f"Proses {kls}"):
            path = os.path.join(input_dir, filename)
            img  = cv2.imread(path)
            if img is None:
                continue

            f_glcm = extract_glcm_features(img)
            f_lbp  = extract_lbp_features(img, P=8, R=1, method='uniform')

            row = {
                **f_glcm,
                **f_lbp,
                'usia_simpan': get_storage_age(filename),
                'kelas_kualitas': determine_quality_class(filename),
                'filename': filename
            }
            out.append(row)

    df = pd.DataFrame(out, columns=cols)
    df.to_csv(output_csv, index=False)
    print("Selesai:", output_csv)

In [None]:
extract_and_save_features(input_path, output_csv)

Proses Berkualitas: 100%|██████████| 810/810 [01:55<00:00,  7.03it/s]
Proses Kurang Berkualitas: 100%|██████████| 810/810 [01:55<00:00,  7.02it/s]
Proses Tidak Berkualitas: 100%|██████████| 810/810 [01:54<00:00,  7.05it/s]


Selesai: /content/drive/MyDrive/Hasil_Ekstraksi_Telur.csv
