# Download Training/Test Images

In [2]:
import os
import requests
from urllib3.exceptions import NewConnectionError
import uuid
from bs4 import BeautifulSoup

In [12]:
classes = ['Coniferous', 'Deciduous']
page_length = 20

In [4]:
def get_image_links(query, nth_page):
    """Retrieve the image links from the response of the ecosia request."""
    image_links = []
    
    # Add headers to prevent IP block
    headers = {
        'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36',
        'content-type': 'text/html; charset=utf-8',
    }
    params = {
        'q': query,
        'imageType': 'photo',
        'color': 'colorOnly',
        'p': nth_page
    }
    response = requests.get('https://www.ecosia.org/images', headers=headers, params=params)
    
    soup = BeautifulSoup(response.text, 'html.parser')
    image_results = soup.select('a.image-result')
    for img in image_results:
        image_links.append(img.get('href'))
    return image_links

In [5]:
def download_images(img_links, folder_name, name_prefix):
    """Download images from the links and save them to a folder with name prefix."""
    for link in img_links:
        try:
            img = requests.get(link)
            # Image name = name_prefix + _uuid4 + .jpg
            path = f'./{folder_name}/{name_prefix}_{uuid.uuid4().hex}.jpg'
            with open(path, 'wb') as handle:
                handle.write(img.content)
        except:
            print(f'Unable to download image from the link: {link}')

In [13]:
for class_ in classes:
    try:
        # Folder name = class name + _images
        folder_name = f'{class_}_leaf'
        os.mkdir(folder_name)
    except FileExistsError:
        pass
    for i in range(1, page_length + 1):
        qurey = f'{class_}+leaf'
        img_links = get_image_links(query=qurey, nth_page=i)
        download_images(img_links, folder_name, name_prefix=class_)

Unable to download image from the link: http://articles.extension.org/sites/default/files/w/2/2f/Conifer.jpg
Unable to download image from the link: https://www.state.sc.us/forest/tid11.jpg
Unable to download image from the link: http://cache4.asset-cache.net/gc/103313446-thujopsis-dolabrata-scale-like-conifer-gettyimages.jpg?v=1&c=IWSAsset&k=2&d=Mvazb6BSvnF4ieMYho4EDmF%2b50QWUFLvYOT2EgTKIFksjJ3vkchJWWGZz20C2Ji120AApOzcNjxj9NxPB1ry4A%3d%3d
Unable to download image from the link: http://cache4.asset-cache.net/gc/103313446-thujopsis-dolabrata-scale-like-conifer-gettyimages.jpg?v=1&c=IWSAsset&k=2&d=Mvazb6BSvnF4ieMYho4EDmF%2b50QWUFLvYOT2EgTKIFksjJ3vkchJWWGZz20C2Ji120AApOzcNjxj9NxPB1ry4A%3d%3d
Unable to download image from the link: http://www.pklifescience.com/staticfiles/articles/images/PKLS3925.png
