In [2]:
import pandas as pd
import json
import os

In [3]:
csv_file = "./Sister Cities Sampling.csv"  # CSV file name
df = pd.read_csv(csv_file, encoding="utf-8-sig", dtype={"Group_ID": str, "city_order": str})

# Filter only valid images (Result == '1 good')
df_filtered = df[df['Result'] == '1 good']

# Create hierarchical data structure: groups[family][city_order][cluster]
groups = {}

for _, row in df_filtered.iterrows():
    city = row['City'].strip()
    country = row['State_Province'].strip()
    cluster = str(row['Cluster']).strip()
    family = row['Group_ID'].strip()
    city_order = row['city_order'].strip()

    # Build hierarchical structure
    if family not in groups:
        groups[family] = {}
    if city_order not in groups[family]:
        groups[family][city_order] = {}
    if cluster not in groups[family][city_order]:
        groups[family][city_order][cluster] = []

    # Add image file path
    folder_name = f"./stim/{city}, {country}_8_1_8"
    image_name = f"{city}___{country}_{cluster}_{row['Image']}.jpg"
    file_path = os.path.join(folder_name, image_name)

    groups[family][city_order][cluster].append(file_path)

# Select only clusters with at least 5 images, then choose 6 such clusters
filtered_groups = {
    family: {
        city_order: {
            cluster: sorted(groups[family][city_order][cluster])[:5]  # Include up to 5 images per cluster
            for cluster in sorted(
                [c for c in groups[family][city_order] if len(groups[family][city_order][c]) >= 5],  # Only clusters with ≥5 images
                key=lambda x: int(x)
            )[:6]  # Include up to 6 clusters
        }
        for city_order in sorted(groups[family], key=lambda x: int(x))  # Sort by city order
        if len([c for c in groups[family][city_order] if len(groups[family][city_order][c]) >= 5]) >= 6  # Ensure at least 6 valid clusters
    }
    for family in sorted(groups, key=lambda x: int(x))  # Sort by family ID
}

# Convert JSON to JavaScript format
js_code = f"const groups = {json.dumps(filtered_groups, indent=4, ensure_ascii=False)};"

# Save JS file
js_file_path = "groups.js"
with open(js_file_path, "w", encoding="utf-8") as f:
    f.write(js_code)

print(f"JavaScript file has been created: {js_file_path} (Only clusters with ≥5 images, selecting 6 such clusters)")

JavaScript file has been created: groups.js (Only clusters with ≥5 images, selecting 6 such clusters)
