In [1]:
! pip install icrawler --quiet

In [2]:
from icrawler.builtin import GoogleImageCrawler
import os

In [3]:
# List of bird species to download images for
bird_list = [
    "American Crow",
    "Black billed Cuckoo",
    "Brant Cormorant",
    "Brewer Blackbird",
    "Bronzed Cowbird",
    "Brown Creeper",
    "Eastern Towhee",
    "Fish Crow",
    "Gray Catbird",
    "Gray crowned Rosy Finch",
    "Groove billed Ani",
    "Indigo Bunting",
    "Lazuli Bunting",
    "Painted Bunting",
    "Red winged Blackbird",
    "Rusty Blackbird",
    "Yellow billed Cuckoo",
    "Yellow breasted Chat",
    "Yellow headed Blackbird"
]

# Base directory for saving images
base_dir = "/kaggle/working/bird_images"
os.makedirs(base_dir, exist_ok=True)

# Dictionary to store the count of downloaded images per species
download_counts = {}

# Loop through each bird species
for bird in bird_list:
    folder_name = bird.replace(" ", "_")
    save_dir = os.path.join(base_dir, folder_name)
    os.makedirs(save_dir, exist_ok=True)

    # Initialize the Google Image Crawler
    google_crawler = GoogleImageCrawler(
        storage={'root_dir': save_dir},
        downloader_threads=2,  # Reduce thread count to minimize detection
        parser_threads=1,
        feeder_threads=1
    )

    # Set a custom User-Agent to avoid detection
    google_crawler.session.headers.update({
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
    })

    print(f"\n Downloading images for: {bird} ...")

    try:
        google_crawler.crawl(
            keyword=bird,
            max_num=150,  # Number of images to download for each bird
            file_idx_offset=1,
            overwrite=False
        )

        # Rename downloaded files
        image_files = os.listdir(save_dir)
        for idx, file_name in enumerate(image_files, start=1):
            file_extension = file_name.split(".")[-1]  # Get the file extension
            new_name = f"{folder_name}_{idx}.{file_extension}"  # Generate new name
            os.rename(
                os.path.join(save_dir, file_name),
                os.path.join(save_dir, new_name)
            )

        # Store the number of successfully downloaded images
        download_counts[bird] = len(image_files)
        print(f"Finished downloading {download_counts[bird]} images for: {bird}")

    except Exception as e:
        print(f"❌ Error occurred while downloading images for '{bird}': {e}")
        download_counts[bird] = 0  # Mark as failed

# Summary report
print("\n Download Summary:")
for bird, count in download_counts.items():
    print(f"🦜 {bird}: {count} images downloaded.")

print(f"\n All images have been saved at: {base_dir}")


 Downloading images for: American Crow ...


Exception in thread parser-001:
Traceback (most recent call last):
  File "/usr/lib/python3.10/threading.py", line 1016, in _bootstrap_inner
    self.run()
  File "/usr/lib/python3.10/threading.py", line 953, in run
    self._target(*self._args, **self._kwargs)
  File "/usr/local/lib/python3.10/dist-packages/icrawler/parser.py", line 94, in worker_exec
    for task in self.parse(response, **kwargs):
TypeError: 'NoneType' object is not iterable


Finished downloading 76 images for: American Crow

 Downloading images for: Black billed Cuckoo ...


Exception in thread parser-001:
Traceback (most recent call last):
  File "/usr/lib/python3.10/threading.py", line 1016, in _bootstrap_inner
    self.run()
  File "/usr/lib/python3.10/threading.py", line 953, in run
    self._target(*self._args, **self._kwargs)
  File "/usr/local/lib/python3.10/dist-packages/icrawler/parser.py", line 94, in worker_exec
    for task in self.parse(response, **kwargs):
TypeError: 'NoneType' object is not iterable


Finished downloading 1 images for: Black billed Cuckoo

 Downloading images for: Brant Cormorant ...


Exception in thread parser-001:
Traceback (most recent call last):
  File "/usr/lib/python3.10/threading.py", line 1016, in _bootstrap_inner
    self.run()
  File "/usr/lib/python3.10/threading.py", line 953, in run
    self._target(*self._args, **self._kwargs)
  File "/usr/local/lib/python3.10/dist-packages/icrawler/parser.py", line 94, in worker_exec
    for task in self.parse(response, **kwargs):
TypeError: 'NoneType' object is not iterable


Finished downloading 84 images for: Brant Cormorant

 Downloading images for: Brewer Blackbird ...


Exception in thread parser-001:
Traceback (most recent call last):
  File "/usr/lib/python3.10/threading.py", line 1016, in _bootstrap_inner
    self.run()
  File "/usr/lib/python3.10/threading.py", line 953, in run
    self._target(*self._args, **self._kwargs)
  File "/usr/local/lib/python3.10/dist-packages/icrawler/parser.py", line 94, in worker_exec
    for task in self.parse(response, **kwargs):
TypeError: 'NoneType' object is not iterable


Finished downloading 68 images for: Brewer Blackbird

 Downloading images for: Bronzed Cowbird ...


Exception in thread parser-001:
Traceback (most recent call last):
  File "/usr/lib/python3.10/threading.py", line 1016, in _bootstrap_inner
    self.run()
  File "/usr/lib/python3.10/threading.py", line 953, in run
    self._target(*self._args, **self._kwargs)
  File "/usr/local/lib/python3.10/dist-packages/icrawler/parser.py", line 94, in worker_exec
    for task in self.parse(response, **kwargs):
TypeError: 'NoneType' object is not iterable


Finished downloading 82 images for: Bronzed Cowbird

 Downloading images for: Brown Creeper ...


Exception in thread parser-001:
Traceback (most recent call last):
  File "/usr/lib/python3.10/threading.py", line 1016, in _bootstrap_inner
    self.run()
  File "/usr/lib/python3.10/threading.py", line 953, in run
    self._target(*self._args, **self._kwargs)
  File "/usr/local/lib/python3.10/dist-packages/icrawler/parser.py", line 94, in worker_exec
    for task in self.parse(response, **kwargs):
TypeError: 'NoneType' object is not iterable


Finished downloading 78 images for: Brown Creeper

 Downloading images for: Eastern Towhee ...


Exception in thread parser-001:
Traceback (most recent call last):
  File "/usr/lib/python3.10/threading.py", line 1016, in _bootstrap_inner
    self.run()
  File "/usr/lib/python3.10/threading.py", line 953, in run
    self._target(*self._args, **self._kwargs)
  File "/usr/local/lib/python3.10/dist-packages/icrawler/parser.py", line 94, in worker_exec
    for task in self.parse(response, **kwargs):
TypeError: 'NoneType' object is not iterable


Finished downloading 74 images for: Eastern Towhee

 Downloading images for: Fish Crow ...


Exception in thread parser-001:
Traceback (most recent call last):
  File "/usr/lib/python3.10/threading.py", line 1016, in _bootstrap_inner
    self.run()
  File "/usr/lib/python3.10/threading.py", line 953, in run
    self._target(*self._args, **self._kwargs)
  File "/usr/local/lib/python3.10/dist-packages/icrawler/parser.py", line 94, in worker_exec
    for task in self.parse(response, **kwargs):
TypeError: 'NoneType' object is not iterable


Finished downloading 70 images for: Fish Crow

 Downloading images for: Gray Catbird ...


Exception in thread parser-001:
Traceback (most recent call last):
  File "/usr/lib/python3.10/threading.py", line 1016, in _bootstrap_inner
    self.run()
  File "/usr/lib/python3.10/threading.py", line 953, in run
    self._target(*self._args, **self._kwargs)
  File "/usr/local/lib/python3.10/dist-packages/icrawler/parser.py", line 94, in worker_exec
    for task in self.parse(response, **kwargs):
TypeError: 'NoneType' object is not iterable


Finished downloading 80 images for: Gray Catbird

 Downloading images for: Gray crowned Rosy Finch ...


Exception in thread parser-001:
Traceback (most recent call last):
  File "/usr/lib/python3.10/threading.py", line 1016, in _bootstrap_inner
    self.run()
  File "/usr/lib/python3.10/threading.py", line 953, in run
    self._target(*self._args, **self._kwargs)
  File "/usr/local/lib/python3.10/dist-packages/icrawler/parser.py", line 94, in worker_exec
    for task in self.parse(response, **kwargs):
TypeError: 'NoneType' object is not iterable


Finished downloading 73 images for: Gray crowned Rosy Finch

 Downloading images for: Groove billed Ani ...


Exception in thread parser-001:
Traceback (most recent call last):
  File "/usr/lib/python3.10/threading.py", line 1016, in _bootstrap_inner
    self.run()
  File "/usr/lib/python3.10/threading.py", line 953, in run
    self._target(*self._args, **self._kwargs)
  File "/usr/local/lib/python3.10/dist-packages/icrawler/parser.py", line 94, in worker_exec
    for task in self.parse(response, **kwargs):
TypeError: 'NoneType' object is not iterable


Finished downloading 72 images for: Groove billed Ani

 Downloading images for: Indigo Bunting ...


Exception in thread parser-001:
Traceback (most recent call last):
  File "/usr/lib/python3.10/threading.py", line 1016, in _bootstrap_inner
    self.run()
  File "/usr/lib/python3.10/threading.py", line 953, in run
    self._target(*self._args, **self._kwargs)
  File "/usr/local/lib/python3.10/dist-packages/icrawler/parser.py", line 94, in worker_exec
    for task in self.parse(response, **kwargs):
TypeError: 'NoneType' object is not iterable


Finished downloading 76 images for: Indigo Bunting

 Downloading images for: Lazuli Bunting ...


Exception in thread parser-001:
Traceback (most recent call last):
  File "/usr/lib/python3.10/threading.py", line 1016, in _bootstrap_inner
    self.run()
  File "/usr/lib/python3.10/threading.py", line 953, in run
    self._target(*self._args, **self._kwargs)
  File "/usr/local/lib/python3.10/dist-packages/icrawler/parser.py", line 94, in worker_exec
    for task in self.parse(response, **kwargs):
TypeError: 'NoneType' object is not iterable


Finished downloading 68 images for: Lazuli Bunting

 Downloading images for: Painted Bunting ...


Exception in thread parser-001:
Traceback (most recent call last):
  File "/usr/lib/python3.10/threading.py", line 1016, in _bootstrap_inner
    self.run()
  File "/usr/lib/python3.10/threading.py", line 953, in run
    self._target(*self._args, **self._kwargs)
  File "/usr/local/lib/python3.10/dist-packages/icrawler/parser.py", line 94, in worker_exec
    for task in self.parse(response, **kwargs):
TypeError: 'NoneType' object is not iterable


Finished downloading 80 images for: Painted Bunting

 Downloading images for: Red winged Blackbird ...


Exception in thread parser-001:
Traceback (most recent call last):
  File "/usr/lib/python3.10/threading.py", line 1016, in _bootstrap_inner
    self.run()
  File "/usr/lib/python3.10/threading.py", line 953, in run
    self._target(*self._args, **self._kwargs)
  File "/usr/local/lib/python3.10/dist-packages/icrawler/parser.py", line 94, in worker_exec
    for task in self.parse(response, **kwargs):
TypeError: 'NoneType' object is not iterable


Finished downloading 78 images for: Red winged Blackbird

 Downloading images for: Rusty Blackbird ...


Exception in thread parser-001:
Traceback (most recent call last):
  File "/usr/lib/python3.10/threading.py", line 1016, in _bootstrap_inner
    self.run()
  File "/usr/lib/python3.10/threading.py", line 953, in run
    self._target(*self._args, **self._kwargs)
  File "/usr/local/lib/python3.10/dist-packages/icrawler/parser.py", line 94, in worker_exec
    for task in self.parse(response, **kwargs):
TypeError: 'NoneType' object is not iterable


Finished downloading 78 images for: Rusty Blackbird

 Downloading images for: Yellow billed Cuckoo ...


Exception in thread parser-001:
Traceback (most recent call last):
  File "/usr/lib/python3.10/threading.py", line 1016, in _bootstrap_inner
    self.run()
  File "/usr/lib/python3.10/threading.py", line 953, in run
    self._target(*self._args, **self._kwargs)
  File "/usr/local/lib/python3.10/dist-packages/icrawler/parser.py", line 94, in worker_exec
    for task in self.parse(response, **kwargs):
TypeError: 'NoneType' object is not iterable


Finished downloading 72 images for: Yellow billed Cuckoo

 Downloading images for: Yellow breasted Chat ...


Exception in thread parser-001:
Traceback (most recent call last):
  File "/usr/lib/python3.10/threading.py", line 1016, in _bootstrap_inner
    self.run()
  File "/usr/lib/python3.10/threading.py", line 953, in run
    self._target(*self._args, **self._kwargs)
  File "/usr/local/lib/python3.10/dist-packages/icrawler/parser.py", line 94, in worker_exec
    for task in self.parse(response, **kwargs):
TypeError: 'NoneType' object is not iterable


Finished downloading 65 images for: Yellow breasted Chat

 Downloading images for: Yellow headed Blackbird ...


Exception in thread parser-001:
Traceback (most recent call last):
  File "/usr/lib/python3.10/threading.py", line 1016, in _bootstrap_inner
    self.run()
  File "/usr/lib/python3.10/threading.py", line 953, in run
    self._target(*self._args, **self._kwargs)
  File "/usr/local/lib/python3.10/dist-packages/icrawler/parser.py", line 94, in worker_exec
    for task in self.parse(response, **kwargs):
TypeError: 'NoneType' object is not iterable


Finished downloading 0 images for: Yellow headed Blackbird

 Download Summary:
🦜 American Crow: 76 images downloaded.
🦜 Black billed Cuckoo: 1 images downloaded.
🦜 Brant Cormorant: 84 images downloaded.
🦜 Brewer Blackbird: 68 images downloaded.
🦜 Bronzed Cowbird: 82 images downloaded.
🦜 Brown Creeper: 78 images downloaded.
🦜 Eastern Towhee: 74 images downloaded.
🦜 Fish Crow: 70 images downloaded.
🦜 Gray Catbird: 80 images downloaded.
🦜 Gray crowned Rosy Finch: 73 images downloaded.
🦜 Groove billed Ani: 72 images downloaded.
🦜 Indigo Bunting: 76 images downloaded.
🦜 Lazuli Bunting: 68 images downloaded.
🦜 Painted Bunting: 80 images downloaded.
🦜 Red winged Blackbird: 78 images downloaded.
🦜 Rusty Blackbird: 78 images downloaded.
🦜 Yellow billed Cuckoo: 72 images downloaded.
🦜 Yellow breasted Chat: 65 images downloaded.
🦜 Yellow headed Blackbird: 0 images downloaded.

 All images have been saved at: /kaggle/working/bird_images
