In [None]:
import os
import requests
import time
from duckduckgo_search import DDGS

# --- Configuration ---
SEARCH_QUERY = "Gothic Revival architecture house"
DOWNLOAD_DIRECTORY = "/Users/tawate/Documents/Architecture_Image_Modeling/architectural_style_images/gothic_revival_images"
MAX_IMAGES = 5  # Increased slightly for testing

def download_images():
    print(f"Searching for '{SEARCH_QUERY}' on DuckDuckGo...")

    # Set up headers to look like a real browser
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36"
    }

    if not os.path.exists(DOWNLOAD_DIRECTORY):
        os.makedirs(DOWNLOAD_DIRECTORY)
        print(f"Created directory: {DOWNLOAD_DIRECTORY}")

    with DDGS() as ddgs:
        # Fetching results
        try:
            search_results = ddgs.images(
                keywords=SEARCH_QUERY,
                region="us-en",
                safesearch="off",
                max_results=MAX_IMAGES
            )
        except Exception as e:
            print(f"Failed to fetch search results: {e}")
            return

        for i, result in enumerate(search_results):
            image_url = result.get("image")
            if not image_url:
                continue

            try:
                print(f"[{i+1}/{MAX_IMAGES}] Downloading: {image_url[:60]}...")
                
                # Pass headers to the request to avoid 403 errors from image hosts
                response = requests.get(image_url, headers=headers, timeout=15)
                response.raise_for_status()

                # Determine extension
                ext = os.path.splitext(image_url.split('?')[0])[1]
                if ext.lower() not in ['.jpg', '.jpeg', '.png', '.webp']:
                    ext = ".jpg"
                
                filename = f"gothic_revival_{i+1}{ext}"
                filepath = os.path.join(DOWNLOAD_DIRECTORY, filename)

                with open(filepath, 'wb') as f:
                    f.write(response.content)
                
                print(f"   -> Saved successfully.")
                
                # Small sleep to prevent aggressive rate limiting
                time.sleep(1)

            except requests.exceptions.RequestException as e:
                print(f"   -> Skip: {e}")
            except Exception as e:
                print(f"   -> Unexpected error: {e}")

    print("\nDownload process finished.")

if __name__ == "__main__":
    download_images()

Searching for 'Gothic Revival architecture house' on DuckDuckGo...


  with DDGS() as ddgs:


RatelimitException: https://duckduckgo.com/i.js?o=json&q=Gothic+Revival+architecture+house&l=us-en&vqd=4-216508373693060604467202148159804881958&p=-1&f=%2C%2C%2C%2C%2C 403 Ratelimit

In [2]:
import os
import re
import requests
from duckduckgo_search import DDGS
import time

# --- Configuration ---

# List of house styles transcribed from the image
HOUSE_STYLES = [
    "Saltbox", "Georgian", "Federal", "Greek Revival", "Gothic Revival",
    "Italianate", "Second Empire", "Queen Anne", "Shingle",
    "Richardsonian Romanesque", "Folk Victorian", "Colonial Revival",
    "Cape Cod", "Neo Classical", "Tudor Revival", "Spanish Colonial Revival",
    "French Revival", "Pueblo Revival", "Craftsman", "Modernistic",
    "International", "Ranch/Prairie"
]

# The base directory where all style folders will be created
BASE_DOWNLOAD_DIRECTORY = "architectural_style_images"

# The maximum number of images to download for each style
MAX_IMAGES_PER_STYLE = 50

def sanitize_foldername(name):
    """
    Cleans a string to be used as a valid folder name.
    Replaces spaces and slashes with underscores and removes invalid characters.
    """
    name = name.replace('/', '_').replace(' ', '_')
    name = re.sub(r'[^\w-]', '', name)
    return name

def download_images_for_style(style, base_dir, max_images):
    """
    Searches for images of a given architectural style and downloads them
    into a dedicated sub-folder.
    """
    print(f"\n{'='*20}\nProcessing style: {style}\n{'='*20}")

    style_folder_name = sanitize_foldername(style)
    download_path = os.path.join(base_dir, style_folder_name)
    os.makedirs(download_path, exist_ok=True)
    print(f"Saving images to: {download_path}")

    search_query = f"{style} house architecture"
    print(f"Searching for: '{search_query}'...")

    # --- FIX IS HERE ---
    # Initialize the counter BEFORE the try block to ensure it always exists.
    download_count = 0

    try:
        with DDGS() as ddgs:
            search_results = ddgs.images(
                keywords=search_query,
                region="us-en",
                safesearch="off",
                max_results=max_images
            )
            
            for i, result in enumerate(search_results):
                # The loop will only run if search_results is not empty
                image_url = result.get("image")
                if not image_url:
                    print(f"Skipping result {i+1}: No image URL found.")
                    continue

                try:
                    print(f"[{i+1}/{max_images}] Downloading: {image_url[:70]}...")
                    response = requests.get(image_url, timeout=15)
                    response.raise_for_status()

                    file_ext = os.path.splitext(image_url)[1]
                    if not file_ext or len(file_ext) > 5:
                        content_type = response.headers.get('content-type')
                        if content_type and 'jpeg' in content_type: file_ext = '.jpg'
                        elif content_type and 'png' in content_type: file_ext = '.png'
                        else: file_ext = '.jpg'

                    filename = f"{style_folder_name}_{download_count + 1}{file_ext}"
                    filepath = os.path.join(download_path, filename)

                    with open(filepath, 'wb') as f:
                        f.write(response.content)
                    
                    print(f"   -> Saved as {filepath}")
                    download_count += 1

                except requests.exceptions.RequestException as e:
                    print(f"   -> Error downloading image {i+1}: {e}")
                except Exception as e:
                    print(f"   -> An unexpected error occurred for image {i+1}: {e}")

    except Exception as e:
        print(f"A critical error occurred while searching for {style}: {e}")
    
    # This print statement is now safe, as download_count is guaranteed to exist.
    print(f"\nFinished processing '{style}'. Downloaded {download_count} images.")


def main():
    """
    Main function to loop through all house styles and initiate downloads.
    """
    os.makedirs(BASE_DOWNLOAD_DIRECTORY, exist_ok=True)
    
    total_styles = len(HOUSE_STYLES)
    for index, style in enumerate(HOUSE_STYLES):
        print(f"\n--- Starting style {index + 1} of {total_styles} ---")
        download_images_for_style(
            style=style,
            base_dir=BASE_DOWNLOAD_DIRECTORY,
            max_images=MAX_IMAGES_PER_STYLE
        )
        time.sleep(2)
        
    print("\n\nAll architectural styles have been processed. Scraping complete.")


if __name__ == "__main__":
    main()


--- Starting style 1 of 22 ---

Processing style: Saltbox
Saving images to: architectural_style_images/Saltbox
Searching for: 'Saltbox house architecture'...


  with DDGS() as ddgs:


[1/50] Downloading: https://homeshiftteam.com/images/blog/post/132/saltbox-house.webp...
   -> Saved as architectural_style_images/Saltbox/Saltbox_1.webp
[2/50] Downloading: https://www.bhg.com/thmb/PXaxV1xZuCecC1SiUv-zJ42cuaQ=/1866x0/filters:n...
   -> Saved as architectural_style_images/Saltbox/Saltbox_2.jpg
[3/50] Downloading: https://na.rdcpix.com/663211063/8ecb8b5c10a19ccbe1daba7bc38ec77cw-c305...
   -> Saved as architectural_style_images/Saltbox/Saltbox_3.jpg
[4/50] Downloading: https://www.bhg.com/thmb/C8iGOZGBZU9paMTgOa8ClRXb1No=/4000x0/filters:n...
   -> Saved as architectural_style_images/Saltbox/Saltbox_4.jpg
[5/50] Downloading: https://hips.hearstapps.com/hmg-prod/images/saltbox-469219085-16558447...
   -> Saved as architectural_style_images/Saltbox/Saltbox_5.jpg
[6/50] Downloading: https://www.housedigest.com/img/gallery/what-are-saltbox-houses-and-ho...
   -> Saved as architectural_style_images/Saltbox/Saltbox_6.jpg
[7/50] Downloading: https://architecturedetective.com/sa

  with DDGS() as ddgs:


A critical error occurred while searching for Georgian: https://duckduckgo.com/i.js?o=json&q=Georgian+house+architecture&l=us-en&vqd=4-304389962064770194470815779864219775229&p=-1&f=%2C%2C%2C%2C%2C 403 Ratelimit

Finished processing 'Georgian'. Downloaded 0 images.

--- Starting style 3 of 22 ---

Processing style: Federal
Saving images to: architectural_style_images/Federal
Searching for: 'Federal house architecture'...


  with DDGS() as ddgs:


A critical error occurred while searching for Federal: https://duckduckgo.com/i.js?o=json&q=Federal+house+architecture&l=us-en&vqd=4-93457504906688190527481030148465371632&p=-1&f=%2C%2C%2C%2C%2C 403 Ratelimit

Finished processing 'Federal'. Downloaded 0 images.

--- Starting style 4 of 22 ---

Processing style: Greek Revival
Saving images to: architectural_style_images/Greek_Revival
Searching for: 'Greek Revival house architecture'...


  with DDGS() as ddgs:


A critical error occurred while searching for Greek Revival: https://duckduckgo.com/i.js?o=json&q=Greek+Revival+house+architecture&l=us-en&vqd=4-190363163744577850478479441385917464472&p=-1&f=%2C%2C%2C%2C%2C 403 Ratelimit

Finished processing 'Greek Revival'. Downloaded 0 images.

--- Starting style 5 of 22 ---

Processing style: Gothic Revival
Saving images to: architectural_style_images/Gothic_Revival
Searching for: 'Gothic Revival house architecture'...
A critical error occurred while searching for Gothic Revival: https://duckduckgo.com/?q=Gothic+Revival+house+architecture 202 Ratelimit

Finished processing 'Gothic Revival'. Downloaded 0 images.


  with DDGS() as ddgs:



--- Starting style 6 of 22 ---

Processing style: Italianate
Saving images to: architectural_style_images/Italianate
Searching for: 'Italianate house architecture'...


  with DDGS() as ddgs:


A critical error occurred while searching for Italianate: https://duckduckgo.com/i.js?o=json&q=Italianate+house+architecture&l=us-en&vqd=4-157038553487235681213199220510291954059&p=-1&f=%2C%2C%2C%2C%2C 403 Ratelimit

Finished processing 'Italianate'. Downloaded 0 images.

--- Starting style 7 of 22 ---

Processing style: Second Empire
Saving images to: architectural_style_images/Second_Empire
Searching for: 'Second Empire house architecture'...


  with DDGS() as ddgs:


A critical error occurred while searching for Second Empire: https://duckduckgo.com/i.js?o=json&q=Second+Empire+house+architecture&l=us-en&vqd=4-266371346502047382016944604646908277021&p=-1&f=%2C%2C%2C%2C%2C 403 Ratelimit

Finished processing 'Second Empire'. Downloaded 0 images.

--- Starting style 8 of 22 ---

Processing style: Queen Anne
Saving images to: architectural_style_images/Queen_Anne
Searching for: 'Queen Anne house architecture'...


  with DDGS() as ddgs:


A critical error occurred while searching for Queen Anne: https://duckduckgo.com/i.js?o=json&q=Queen+Anne+house+architecture&l=us-en&vqd=4-83683080302256303859014012100262364426&p=-1&f=%2C%2C%2C%2C%2C 403 Ratelimit

Finished processing 'Queen Anne'. Downloaded 0 images.

--- Starting style 9 of 22 ---

Processing style: Shingle
Saving images to: architectural_style_images/Shingle
Searching for: 'Shingle house architecture'...


  with DDGS() as ddgs:


A critical error occurred while searching for Shingle: https://duckduckgo.com/i.js?o=json&q=Shingle+house+architecture&l=us-en&vqd=4-260310361431781233124750003141800943425&p=-1&f=%2C%2C%2C%2C%2C 403 Ratelimit

Finished processing 'Shingle'. Downloaded 0 images.

--- Starting style 10 of 22 ---

Processing style: Richardsonian Romanesque
Saving images to: architectural_style_images/Richardsonian_Romanesque
Searching for: 'Richardsonian Romanesque house architecture'...


  with DDGS() as ddgs:


A critical error occurred while searching for Richardsonian Romanesque: https://duckduckgo.com/i.js?o=json&q=Richardsonian+Romanesque+house+architecture&l=us-en&vqd=4-281335754837709984329135252051286296620&p=-1&f=%2C%2C%2C%2C%2C 403 Ratelimit

Finished processing 'Richardsonian Romanesque'. Downloaded 0 images.

--- Starting style 11 of 22 ---

Processing style: Folk Victorian
Saving images to: architectural_style_images/Folk_Victorian
Searching for: 'Folk Victorian house architecture'...


  with DDGS() as ddgs:


A critical error occurred while searching for Folk Victorian: https://duckduckgo.com/i.js?o=json&q=Folk+Victorian+house+architecture&l=us-en&vqd=4-266872992602840534497169991397173208187&p=-1&f=%2C%2C%2C%2C%2C 403 Ratelimit

Finished processing 'Folk Victorian'. Downloaded 0 images.

--- Starting style 12 of 22 ---

Processing style: Colonial Revival
Saving images to: architectural_style_images/Colonial_Revival
Searching for: 'Colonial Revival house architecture'...


  with DDGS() as ddgs:


A critical error occurred while searching for Colonial Revival: https://duckduckgo.com/i.js?o=json&q=Colonial+Revival+house+architecture&l=us-en&vqd=4-25880591412776444783305276651825784446&p=-1&f=%2C%2C%2C%2C%2C 403 Ratelimit

Finished processing 'Colonial Revival'. Downloaded 0 images.

--- Starting style 13 of 22 ---

Processing style: Cape Cod
Saving images to: architectural_style_images/Cape_Cod
Searching for: 'Cape Cod house architecture'...


  with DDGS() as ddgs:


A critical error occurred while searching for Cape Cod: https://duckduckgo.com/i.js?o=json&q=Cape+Cod+house+architecture&l=us-en&vqd=4-11457865954865431178182049896089733869&p=-1&f=%2C%2C%2C%2C%2C 403 Ratelimit

Finished processing 'Cape Cod'. Downloaded 0 images.

--- Starting style 14 of 22 ---

Processing style: Neo Classical
Saving images to: architectural_style_images/Neo_Classical
Searching for: 'Neo Classical house architecture'...


  with DDGS() as ddgs:


A critical error occurred while searching for Neo Classical: https://duckduckgo.com/i.js?o=json&q=Neo+Classical+house+architecture&l=us-en&vqd=4-187125662261490585799714818019230660681&p=-1&f=%2C%2C%2C%2C%2C 403 Ratelimit

Finished processing 'Neo Classical'. Downloaded 0 images.

--- Starting style 15 of 22 ---

Processing style: Tudor Revival
Saving images to: architectural_style_images/Tudor_Revival
Searching for: 'Tudor Revival house architecture'...


  with DDGS() as ddgs:


[1/50] Downloading: https://hgtvhome.sndimg.com/content/dam/images/hgtv/fullset/2013/2/19/...
   -> Error downloading image 1: 403 Client Error: Forbidden for url: https://hgtvhome.sndimg.com/content/dam/images/hgtv/fullset/2013/2/19/4/DesignLens_large-tudor-home_s4x3.jpg.rend.hgtvcom.1280.960.suffix/1400976380192.jpeg
[2/50] Downloading: https://www.williampitt.com/wp-content/uploads/2015/06/TudorRevival-.j...
   -> Saved as architectural_style_images/Tudor_Revival/Tudor_Revival_1.jpg
[3/50] Downloading: https://www.merrimackdesign.com/wp-content/uploads/2015/01/tudor-exter...
   -> Saved as architectural_style_images/Tudor_Revival/Tudor_Revival_2.jpg
[4/50] Downloading: https://static.vecteezy.com/system/resources/previews/055/358/669/non_...
   -> Saved as architectural_style_images/Tudor_Revival/Tudor_Revival_3.jpg
[5/50] Downloading: https://c8.alamy.com/comp/EA9E3P/tudor-revival-house-forest-hills-gard...
   -> Saved as architectural_style_images/Tudor_Revival/Tudor_Revival_4.jpg

  with DDGS() as ddgs:


A critical error occurred while searching for Spanish Colonial Revival: https://duckduckgo.com/i.js?o=json&q=Spanish+Colonial+Revival+house+architecture&l=us-en&vqd=4-84810098566640844605042951911560677522&p=-1&f=%2C%2C%2C%2C%2C 403 Ratelimit

Finished processing 'Spanish Colonial Revival'. Downloaded 0 images.

--- Starting style 17 of 22 ---

Processing style: French Revival
Saving images to: architectural_style_images/French_Revival
Searching for: 'French Revival house architecture'...


  with DDGS() as ddgs:


A critical error occurred while searching for French Revival: https://duckduckgo.com/i.js?o=json&q=French+Revival+house+architecture&l=us-en&vqd=4-152389003715754349954480114325624604254&p=-1&f=%2C%2C%2C%2C%2C 403 Ratelimit

Finished processing 'French Revival'. Downloaded 0 images.

--- Starting style 18 of 22 ---

Processing style: Pueblo Revival
Saving images to: architectural_style_images/Pueblo_Revival
Searching for: 'Pueblo Revival house architecture'...


  with DDGS() as ddgs:


A critical error occurred while searching for Pueblo Revival: https://duckduckgo.com/i.js?o=json&q=Pueblo+Revival+house+architecture&l=us-en&vqd=4-268539723489187491370810461134828625951&p=-1&f=%2C%2C%2C%2C%2C 403 Ratelimit

Finished processing 'Pueblo Revival'. Downloaded 0 images.

--- Starting style 19 of 22 ---

Processing style: Craftsman
Saving images to: architectural_style_images/Craftsman
Searching for: 'Craftsman house architecture'...


  with DDGS() as ddgs:


A critical error occurred while searching for Craftsman: https://duckduckgo.com/i.js?o=json&q=Craftsman+house+architecture&l=us-en&vqd=4-269926924292759565864684503255467288295&p=-1&f=%2C%2C%2C%2C%2C 403 Ratelimit

Finished processing 'Craftsman'. Downloaded 0 images.

--- Starting style 20 of 22 ---

Processing style: Modernistic
Saving images to: architectural_style_images/Modernistic
Searching for: 'Modernistic house architecture'...


  with DDGS() as ddgs:


A critical error occurred while searching for Modernistic: https://duckduckgo.com/i.js?o=json&q=Modernistic+house+architecture&l=us-en&vqd=4-112063178688884075362217269845884002194&p=-1&f=%2C%2C%2C%2C%2C 403 Ratelimit

Finished processing 'Modernistic'. Downloaded 0 images.

--- Starting style 21 of 22 ---

Processing style: International
Saving images to: architectural_style_images/International
Searching for: 'International house architecture'...


  with DDGS() as ddgs:


A critical error occurred while searching for International: https://duckduckgo.com/i.js?o=json&q=International+house+architecture&l=us-en&vqd=4-152373790848580718876169605032949022617&p=-1&f=%2C%2C%2C%2C%2C 403 Ratelimit

Finished processing 'International'. Downloaded 0 images.

--- Starting style 22 of 22 ---

Processing style: Ranch/Prairie
Saving images to: architectural_style_images/Ranch_Prairie
Searching for: 'Ranch/Prairie house architecture'...


  with DDGS() as ddgs:


A critical error occurred while searching for Ranch/Prairie: https://duckduckgo.com/i.js?o=json&q=Ranch%2FPrairie+house+architecture&l=us-en&vqd=4-330849883121940918052340105567799920813&p=-1&f=%2C%2C%2C%2C%2C 403 Ratelimit

Finished processing 'Ranch/Prairie'. Downloaded 0 images.


All architectural styles have been processed. Scraping complete.
