In [1]:
import os
from pathlib import Path
import json
import numpy as np
import pandas as pd
import cv2
from PIL import Image, ImageOps
from tqdm import tqdm

####
# Konfiguration
####

BASE_DIR = Path(r"C:\Users\valen\OneDrive\Dokumente\01_Studium\9\CulturalAnalytics\FilmBilder\2024_InsideOut2_Pixar")

# Bild Endung
IMG_EXTS = {".jpg", ".jpeg", ".png"}

# Optional: zusätzlich Histogramme berechnen (Hue: 12 Bins, Sat/Val: 4)
COMPUTE_HIST = False


# Hilfsfunktionen
def load_image_bgr(path: Path):

    # Mit PIL öffnen, EXIF-Orientierung korrigieren, dann nach numpy
    with Image.open(path) as im:
        im = ImageOps.exif_transpose(im).convert("RGB")
        arr = np.array(im)  # RGB
    # nach BGR für OpenCV Processing
    bgr = cv2.cvtColor(arr, cv2.COLOR_RGB2BGR)
    return bgr



In [2]:
def rgb_to_hsv_components(img_bgr: np.ndarray):
    
    # Konvertiert zu HSV und gibt H (0..360), S (0..1), V (0..1) Arrays zurück.
    
    img_hsv = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2HSV).astype(np.float32)
    H = img_hsv[:, :, 0] * 2.0            # OpenCV: H in [0,180] -> Grad [0,360)
    S = img_hsv[:, :, 1] / 255.0
    V = img_hsv[:, :, 2] / 255.0
    return H, S, V


In [3]:
def circular_mean_deg(h_deg: np.ndarray) -> float:
    
    # Zirkulärer Mittelwert in Grad [0,360).
    
    h_rad = np.deg2rad(h_deg)
    s = np.sin(h_rad).mean()
    c = np.cos(h_rad).mean()
    ang = np.arctan2(s, c)
    if ang < 0:
        ang += 2 * np.pi
    return float(np.rad2deg(ang))

In [4]:
def colorfulness_hasler(img_bgr: np.ndarray) -> float:
    
    # Hasler & Süsstrunk Colorfulness.
   
    img = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB).astype(np.float32)
    R, G, B = img[:, :, 0], img[:, :, 1], img[:, :, 2]
    rg = np.abs(R - G)
    yb = np.abs(0.5 * (R + G) - B)
    std_rg, std_yb = np.std(rg), np.std(yb)
    mean_rg, mean_yb = np.mean(rg), np.mean(yb)
    return float(np.sqrt(std_rg**2 + std_yb**2) + 0.3 * np.sqrt(mean_rg**2 + mean_yb**2))



In [5]:
def compute_histograms(H: np.ndarray, S: np.ndarray, V: np.ndarray):
    hue_bins = np.linspace(0, 360, 13)  # 12 Bins
    sat_bins = np.linspace(0, 1, 5)     # 4 Bins
    val_bins = np.linspace(0, 1, 5)     # 4 Bins

    h_hist, _ = np.histogram(H.flatten(), bins=hue_bins)
    s_hist, _ = np.histogram(S.flatten(), bins=sat_bins)
    v_hist, _ = np.histogram(V.flatten(), bins=val_bins)

    # Normieren auf Summe 1
    h_hist = (h_hist / max(h_hist.sum(), 1)).tolist()
    s_hist = (s_hist / max(s_hist.sum(), 1)).tolist()
    v_hist = (v_hist / max(v_hist.sum(), 1)).tolist()
    return h_hist, s_hist, v_hist

In [6]:
def iter_image_files(base_dir: Path):
    # Liefert (film_label, pfad) für jede Bilddatei.
    # Wenn es Unterordner gibt: film_label = Unterordnername
    # Sonst: film_label = base_dir.name

    # Prüfen, ob Unterordner mit Bildern existieren
    subfolders = [p for p in base_dir.iterdir() if p.is_dir()]
    has_subfolders_with_images = any(
        any((f.suffix.lower() in IMG_EXTS) for f in p.rglob("*")) for p in subfolders
    )

    if has_subfolders_with_images:
        for folder in sorted(subfolders):
            files = [f for f in folder.rglob("*") if f.suffix.lower() in IMG_EXTS]
            for f in sorted(files):
                yield folder.name, f
    else:
        files = [f for f in base_dir.glob("*") if f.suffix.lower() in IMG_EXTS]
        for f in sorted(files):
            yield base_dir.name, f

In [7]:
# Hauptlogik
def main():
    rows = []

    for film_label, img_path in tqdm(list(iter_image_files(BASE_DIR)), desc="Bilder verarbeiten"):
        try:
            img_bgr = load_image_bgr(img_path)
        except Exception as e:
            # Bild überspringen, wenn defekt
            rows.append({
                "film": film_label,
                "image_path": str(img_path),
                "error": f"load_failed: {e}"
            })
            continue

        try:
            H, S, V = rgb_to_hsv_components(img_bgr)
            mean_hue_deg = circular_mean_deg(H)
            mean_sat = float(np.mean(S))
            mean_val = float(np.mean(V))
            colorfulness = colorfulness_hasler(img_bgr)

            rec = {
                "film": film_label,
                "image_path": str(img_path),
                "mean_hue_deg": mean_hue_deg,
                "mean_sat": mean_sat,
                "mean_val": mean_val,
                "colorfulness_hs2003": colorfulness,
                "error": ""
            }

            if COMPUTE_HIST:
                h_hist, s_hist, v_hist = compute_histograms(H, S, V)
                rec["h_hist_12"] = json.dumps(h_hist)
                rec["s_hist_4"]  = json.dumps(s_hist)
                rec["v_hist_4"]  = json.dumps(v_hist)

            rows.append(rec)

        except Exception as e:
            rows.append({
                "film": film_label,
                "image_path": str(img_path),
                "error": f"metric_failed: {e}"
            })
            continue

    if not rows:
        print("Keine Bilddaten gefunden.")
        return

    df = pd.DataFrame(rows)
    out_frames_csv = BASE_DIR / "frames_metrics.csv"
    df.to_csv(out_frames_csv, index=False)
    print(f"[OK] Einzelmetriken gespeichert: {out_frames_csv}")

    # Aggregation pro Film
    ok = df[df["error"] == ""].copy()
    if ok.empty:
        print("Keine erfolgreichen Messungen für Aggregation.")
        return

    agg = ok.groupby("film", as_index=False).agg(
        mean_hue_deg=("mean_hue_deg", "mean"),
        mean_sat=("mean_sat", "mean"),
        mean_val=("mean_val", "mean"),
        colorfulness_hs2003=("colorfulness_hs2003", "mean"),
        n_images=("image_path", "count")
    )
    out_agg_csv = BASE_DIR / "film_aggregates.csv"
    agg.to_csv(out_agg_csv, index=False)
    print(f"[OK] Aggregation gespeichert: {out_agg_csv}")

    METRICS = ["mean_hue_deg", "mean_sat", "mean_val", "colorfulness_hs2003"]



    def film_root_from_path(p: str, base_dir: Path) -> str:    
        pth = Path(p)
        try:
            rel = pth.relative_to(base_dir)
            # Bild liegt direkt im Filmordner
            if len(rel.parts) == 1:
                return base_dir.name
            # Bild liegt in einem Unterordner
            return rel.parts[0]
        except Exception:
            # Fallback, wenn relative_to nicht klappt
            if len(pth.parents) >= 2:
                return pth.parent.parent.name
            return pth.parent.name if pth.parent.name else base_dir.name

    
    if not df.empty:
        ok = df[df["error"] == ""].copy()
        if not ok.empty:
            ok["film_root"] = ok["image_path"].apply(lambda s: film_root_from_path(s, BASE_DIR))

            for film_root, g in ok.groupby("film_root"):
                rows = []
                n_images = int(g["image_path"].nunique())

                for m in METRICS:
                    s = g[m].dropna().astype(float)
                    if s.empty:
                        stats = dict(min=np.nan, q1=np.nan, median=np.nan, q3=np.nan,
                                     max=np.nan, std=np.nan, n_images=n_images)
                    else:
                        stats = dict(
                            min=float(np.min(s)),
                            q1=float(np.percentile(s, 25)),
                            median=float(np.median(s)),
                            q3=float(np.percentile(s, 75)),
                            max=float(np.max(s)),
                            std=float(np.std(s, ddof=1) if s.size > 1 else 0.0),
                            n_images=n_images
                        )
                    rows.append({"metric": m, **stats})

                df_box = pd.DataFrame(rows, columns=[
                    "metric","min","q1","median","q3","max","std","n_images"
                ])

                # Zielpfad: im Filmordner ablegen
                film_dir = BASE_DIR / film_root
                film_dir.mkdir(parents=True, exist_ok=True)

                out_name = f"film_boxplot_{film_root}.csv"
                out_path = film_dir / out_name
                df_box.to_csv(out_path, index=False)
                print(f"[OK] Boxplot-Stats gespeichert: {out_path}")


if __name__ == "__main__":
    main()

Bilder verarbeiten: 100%|██████████████████████████████████████████████████████████████| 46/46 [00:09<00:00,  4.99it/s]

[OK] Einzelmetriken gespeichert: C:\Users\valen\OneDrive\Dokumente\01_Studium\9\CulturalAnalytics\FilmBilder\2024_InsideOut2_Pixar\frames_metrics.csv
[OK] Aggregation gespeichert: C:\Users\valen\OneDrive\Dokumente\01_Studium\9\CulturalAnalytics\FilmBilder\2024_InsideOut2_Pixar\film_aggregates.csv
[OK] Boxplot-Stats gespeichert: C:\Users\valen\OneDrive\Dokumente\01_Studium\9\CulturalAnalytics\FilmBilder\2024_InsideOut2_Pixar\2024_InsideOut2_Pixar\film_boxplot_2024_InsideOut2_Pixar.csv



