In [34]:
import os
import cv2
import numpy as np
import csv

<hr>

# Helper functions

In [22]:
def has_two_bones(mask, min_area_femur=5000, min_area_tibia=2500):
    """
    Check if a slice contains two large connected components:
    one for femur and one for tibia.
    """
    # Find contours
    cnts, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    
    # Compute contour areas
    areas = sorted([cv2.contourArea(c) for c in cnts], reverse=True)

    if len(areas) < 2:
        return False

    # Largest area ~ femur, second largest ~ tibia
    femur_area = areas[0]
    tibia_area = areas[1]

    return (femur_area > min_area_femur) and (tibia_area > min_area_tibia)

In [25]:
def find_valid_slice_range_for_case(case_files, preds_dir):
    """
    case_files: list of (slice_num, filename) for one case
    preds_dir: path to 'preds' folder
    Returns: (start_slice, end_slice, valid_slices_list)
    """
    valid_slices = []

    # Sort by slice number
    case_files = sorted(case_files, key=lambda x: x[0])

    for slice_num, fname in case_files:
        path = os.path.join(preds_dir, fname)

        img = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
        if img is None:
            # Skip unreadable files
            continue

        # Threshold â†’ binary mask
        _, mask = cv2.threshold(img, 127, 255, cv2.THRESH_BINARY)

        if has_two_bones(mask):
            valid_slices.append(slice_num)

    if len(valid_slices) == 0:
        return None, None, []

    return min(valid_slices), max(valid_slices), valid_slices

<hr>

# Handle folder, files, file names

In [26]:
def parse_filename(filename):
    """
    Expected filename format: <case_id>_<slice>_pred.jpg
    Example: 9002116_027_pred.jpg
    Returns:
        case_id (str), slice_num (int)
    """
    # Remove extension
    name = os.path.splitext(filename)[0]       # "9002116_027_pred"
    parts = name.split('_')                    # ["9002116", "027", "pred"]

    case_id = parts[0]
    slice_num = int(parts[1])                  # "027" -> 27

    return case_id, slice_num

In [44]:
def process_all_cases(preds_dir="preds", output_csv="valid_slice_ranges.csv"):
    # 1) Group files by case_id
    cases = {}   # case_id -> list of (slice_num, filename)

    for fname in os.listdir(preds_dir):
        if not fname.lower().endswith((".jpg", ".png", ".jpeg", ".bmp", ".tif", ".tiff")):
            continue

        try:
            case_id, slice_num = parse_filename(fname)
        except Exception as e:
            print(f"Skipping file (cannot parse): {fname}")
            continue

        if case_id not in cases:
            cases[case_id] = []
        cases[case_id].append((slice_num, fname))

    print(f"Found {len(cases)} cases in folder '{preds_dir}'")

    # 2) For each case, find valid slice range
    results = []  # list of dicts

    for case_id, file_list in sorted(cases.items()):
        start_slice, end_slice, valid_slices = find_valid_slice_range_for_case(file_list, preds_dir)

        if start_slice is None:
            print(f"[{case_id}] No valid slices found.")
            results.append({
                "case_id": case_id,
                "start_slice": "",
                "end_slice": "",
                "num_valid_slices": 0
            })
        else:
            print(f"[{case_id}] valid slices: {start_slice} to {end_slice} "
                  f"({len(valid_slices)} slices)")
            results.append({
                "case_id": case_id,
                "start_slice": start_slice,
                "end_slice": end_slice,
                "num_valid_slices": len(valid_slices)
            })

    # 3) Save results to CSV
    with open(output_csv, "w", newline="") as f:
        writer = csv.DictWriter(f, fieldnames=["case_id", "start_slice", "end_slice", "num_valid_slices"])
        writer.writeheader()
        for row in results:
            writer.writerow(row)

    print(f"\nSaved results to: {output_csv}")

<hr>

# Run and write to csv file

In [48]:
if __name__ == "__main__":
    process_all_cases(preds_dir="../preds", output_csv="../final_problem3_kien_steven/valid_slice_ranges.csv")

Found 198 cases in folder '../preds'
[9002116] valid slices: 33 to 140 (106 slices)
[9005075] valid slices: 30 to 136 (107 slices)
[9005132] valid slices: 30 to 139 (107 slices)
[9026934] valid slices: 38 to 129 (92 slices)
[9030718] valid slices: 24 to 114 (91 slices)
[9031141] valid slices: 16 to 133 (118 slices)
[9033275] valid slices: 29 to 119 (91 slices)
[9037952] valid slices: 18 to 138 (110 slices)
[9043945] valid slices: 35 to 139 (105 slices)
[9047800] valid slices: 26 to 130 (105 slices)
[9048789] valid slices: 25 to 120 (96 slices)
[9049007] valid slices: 34 to 136 (103 slices)
[9058960] valid slices: 35 to 121 (87 slices)
[9063955] valid slices: 41 to 134 (94 slices)
[9064631] valid slices: 39 to 131 (93 slices)
[9065272] valid slices: 26 to 130 (102 slices)
[9072646] valid slices: 32 to 160 (112 slices)
[9074878] valid slices: 37 to 123 (86 slices)
[9081858] valid slices: 38 to 135 (98 slices)
[9086868] valid slices: 45 to 123 (79 slices)
[9088414] valid slices: 35 to 138