In [None]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)


Mounted at /content/drive


In [None]:
import os
import cv2
import numpy as np
import pandas as pd
from skimage import io, img_as_float
from skimage.filters import sobel
from scipy.stats import skew, kurtosis, entropy


# Load image in grayscale
def load_gray(img_path, size=(512, 512)):
    img = io.imread(img_path, as_gray=True)
    img = img_as_float(img)
    return cv2.resize(img, size, interpolation=cv2.INTER_AREA)


# Extract numerical features from the image
def extract_features(img, file_path, scanner_id="unknown"):
    h, w = img.shape
    pixels = img.flatten()
    edges = sobel(img)

    # Calculate pixel density (ratio of bright pixels)
    pixel_density = np.sum(pixels > 0.5) / pixels.size

    features = {
        "file_name": os.path.basename(file_path),
        "class_label": scanner_id,
        "width": w,
        "height": h,
        "aspect_ratio": w / h,
        "file_size_kb": os.path.getsize(file_path) / 1024,
        "mean_intensity": np.mean(pixels),
        "std_intensity": np.std(pixels),
        "skewness": skew(pixels),
        "kurtosis": kurtosis(pixels),
        "entropy": entropy(np.histogram(pixels, bins=256, range=(0, 1))[0] + 1e-6),
        "edge_density": np.mean(edges > 0.1),
        "pixel_density": pixel_density
    }
    return features


# Process all images in a folder
def process_all_images(folder_path, scanner_id="unknown", size=(512, 512)):
    df = pd.DataFrame()
    supported_ext = ('.jpg', '.jpeg', '.png', '.tif', '.tiff', '.bmp')

    for root, _, files in os.walk(folder_path):
        for file in files:
            if not file.lower().endswith(supported_ext):
                continue

            file_path = os.path.join(root, file)
            try:
                img = load_gray(file_path, size)
                feats = extract_features(img, file_path, scanner_id)
                df = pd.concat([df, pd.DataFrame([feats])], ignore_index=True)
                print(f"[{scanner_id}] Processed: {file}")
            except Exception as e:
                print(f"Error processing {file}: {e}")
    return df


# Main Execution
if __name__ == "__main__":
    base_path = input("Enter full path (e.g., /content/drive/MyDrive/Official):\n> ").strip()

    if not os.path.exists(base_path):
        print("Folder not found.")
        raise SystemExit

    all_metadata = pd.DataFrame()
    print(f"\nScanning base folder: {base_path}")

    # Process each scanner folder
    for scanner_folder in os.listdir(base_path):
        folder_path = os.path.join(base_path, scanner_folder)
        if os.path.isdir(folder_path):
            print(f"\nProcessing folder: {scanner_folder}")
            df = process_all_images(folder_path, scanner_id=scanner_folder)
            all_metadata = pd.concat([all_metadata, df], ignore_index=True)

    # Save and show dataset summary
    if not all_metadata.empty:
        output_file = "/content/image_feature_dataset.csv"
        all_metadata.to_csv(output_file, index=False)
        print(f"\nDataset created successfully and saved to: {output_file}")

        print("\n--- Sample of Extracted Features ---")
        print(all_metadata.head())

        print("\n--- Statistical Summary ---")
        print(all_metadata.describe())
    else:
        print("No images processed. Please check the folder structure.")


Enter full path (e.g., /content/drive/MyDrive/Official):
> /content/drive/MyDrive/Official

Scanning base folder: /content/drive/MyDrive/Official

Processing folder: Canon120-1
[Canon120-1] Processed: s1_1.tif
[Canon120-1] Processed: s1_2.tif
[Canon120-1] Processed: s1_3.tif
[Canon120-1] Processed: s1_4.tif
[Canon120-1] Processed: s1_5.tif
[Canon120-1] Processed: s1_6.tif
[Canon120-1] Processed: s1_7.tif
[Canon120-1] Processed: s1_8.tif
[Canon120-1] Processed: s1_9.tif
[Canon120-1] Processed: s1_10.tif
[Canon120-1] Processed: s1_11.tif
[Canon120-1] Processed: s1_12.tif
[Canon120-1] Processed: s1_13.tif
[Canon120-1] Processed: s1_14.tif
[Canon120-1] Processed: s1_15.tif
[Canon120-1] Processed: s1_16.tif
[Canon120-1] Processed: s1_17.tif
[Canon120-1] Processed: s1_19.tif
[Canon120-1] Processed: s1_18.tif
[Canon120-1] Processed: s1_20.tif
[Canon120-1] Processed: s1_21.tif
[Canon120-1] Processed: s1_22.tif
[Canon120-1] Processed: s1_23.tif
[Canon120-1] Processed: s1_24.tif
[Canon120-1] Pro

In [14]:
from google.colab import files
files.download(output_file)


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
from google.colab import files
files.download("/content/scanner_image_features_sampled.csv")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>