In [4]:
import os
import requests
import yaml
import pandas as pd
from serpapi import GoogleSearch
import time

# --- Path Configuration ---
CONFIG_PATH = "/Users/tawate/Documents/Architecture_Image_Modeling/conf/apikeys.yaml"
CSV_PATH = "/Users/tawate/Documents/Architecture_Image_Modeling/data/house_style_list.csv"

def load_config(config_file):
    """Loads settings from the YAML file."""
    with open(config_file, 'r') as file:
        return yaml.safe_load(file)

def download_images_from_csv():
    # 1. Load configuration and API Key
    config = load_config(CONFIG_PATH)
    api_key = config['api_keys']['serpapi']
    base_download_path = config['paths']['download_base']
    
    # 2. Load the CSV file
    # Ensure you have 'pandas' installed: pip install pandas
    try:
        df = pd.read_csv(CSV_PATH)
    except Exception as e:
        print(f"Error reading CSV: {e}")
        return

    # 3. Identify the columns (assuming 'style' for folder names and 'description' for search)
    # Adjust these column names if your CSV uses different titles
    style_col = 'style' 
    query_col = 'description'

    if style_col not in df.columns or query_col not in df.columns:
        print(f"Error: CSV must have '{style_col}' and '{query_col}' columns.")
        print(f"Found columns: {df.columns.tolist()}")
        return

    # 4. Loop through each row in the CSV
    for index, row in df.iterrows():
        style_name = str(row[style_col]).strip().replace(" ", "_").lower()
        search_query = row[query_col]
        
        # Create a specific directory for this style
        style_dir = os.path.join(base_download_path, style_name)
        if not os.path.exists(style_dir):
            os.makedirs(style_dir)
            print(f"\n--- Processing Style: {style_name.upper()} ---")

        # Setup SerpApi Search
        params = {
            "engine": "google_images",
            "q": search_query,
            "api_key": api_key,
            "num": 50  # Number of images per description
        }

        try:
            search = GoogleSearch(params)
            results = search.get_dict()
            images = results.get("images_results", [])
            
            print(f"Found {len(images)} images for: {search_query}")

            for i, img in enumerate(images):
                img_url = img.get("original")
                try:
                    # Download the image
                    response = requests.get(img_url, timeout=10, headers={'User-Agent': 'Mozilla/5.0'})
                    response.raise_for_status()

                    # Create filename (e.g., gothic_revival_001.jpg)
                    filename = f"{style_name}_{i+1:03}.jpg"
                    filepath = os.path.join(style_dir, filename)

                    with open(filepath, 'wb') as f:
                        f.write(response.content)
                    print(f"   [+] Saved {filename}")
                    
                    # Polite delay to avoid IP blocks even with SerpApi
                    time.sleep(0.5)

                except Exception as e:
                    print(f"   [-] Failed image {i+1}: {e}")

        except Exception as e:
            print(f"API Search Error for '{search_query}': {e}")

    print("\nDataset creation complete.")

if __name__ == "__main__":
    download_images_from_csv()


--- Processing Style: CAPE_COD ---
Found 100 images for: Cape Cod house architecture style
   [+] Saved cape_cod_001.jpg
   [+] Saved cape_cod_002.jpg
   [+] Saved cape_cod_003.jpg
   [+] Saved cape_cod_004.jpg
   [-] Failed image 5: 403 Client Error: Forbidden for url: https://hgtvhome.sndimg.com/content/dam/images/hgtv/fullset/2011/2/2/0/istock-4039691_colonial-cape-cod-house_s3x4.jpg.rend.hgtvcom.1280.1280.85.suffix/1400962512746.webp
   [+] Saved cape_cod_006.jpg
   [+] Saved cape_cod_007.jpg
   [+] Saved cape_cod_008.jpg
   [+] Saved cape_cod_009.jpg
   [+] Saved cape_cod_010.jpg
   [+] Saved cape_cod_011.jpg
   [+] Saved cape_cod_012.jpg
   [+] Saved cape_cod_013.jpg
   [+] Saved cape_cod_014.jpg
   [+] Saved cape_cod_015.jpg
   [+] Saved cape_cod_016.jpg
   [+] Saved cape_cod_017.jpg
   [+] Saved cape_cod_018.jpg
   [+] Saved cape_cod_019.jpg
   [+] Saved cape_cod_020.jpg
   [+] Saved cape_cod_021.jpg
   [+] Saved cape_cod_022.jpg
   [+] Saved cape_cod_023.jpg
   [+] Saved cape