In [1]:
import selenium
import os
from selenium import webdriver
import time
import requests
import io
from PIL import Image
import hashlib

In [2]:
DRIVER_PATH = "/Applications/chromedriver" # path to the chromedriver executable
#os.environ["webdriver.chrome.driver"] = chromedriver
wd = webdriver.Chrome(executable_path=DRIVER_PATH)

In [3]:
def search_and_download(search_term:str,driver_path:str,target_path='./images',number_images=150):
    target_folder = os.path.join(target_path,'_'.join(search_term.lower().split(' ')))

    if not os.path.exists(target_folder):
        os.makedirs(target_folder)

    with webdriver.Chrome(executable_path=driver_path) as wd:
        res = fetch_image_urls(search_term, number_images, wd=wd, sleep_between_interactions=0.5)
        
    for elem in res:
        persist_image(target_folder,elem)

In [4]:
def fetch_image_urls(query:str, max_links_to_fetch:int, wd:webdriver, sleep_between_interactions:int=1):
    def scroll_to_end(wd):
        wd.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(sleep_between_interactions)    
    
    # build the google query
    search_url = "https://www.google.com/search?safe=off&site=&tbm=isch&source=hp&q={q}&oq={q}&gs_l=img"

    # load the page
    wd.get(search_url.format(q=query))

    image_urls = set()
    image_count = 0
    results_start = 0
    while image_count < max_links_to_fetch:
        scroll_to_end(wd)

        # get all image thumbnail results
        thumbnail_results = wd.find_elements_by_css_selector("img.Q4LuWd")
        number_results = len(thumbnail_results)
        
        print(f"Found: {number_results} search results. Extracting links from {results_start}:{number_results}")
        
        for img in thumbnail_results[results_start:number_results]:
            # try to click every thumbnail such that we can get the real image behind it
            try:
                img.click()
                time.sleep(sleep_between_interactions)
            except Exception:
                continue

            # extract image urls    
            actual_images = wd.find_elements_by_css_selector('img.n3VNCb')
            for actual_image in actual_images:
                if actual_image.get_attribute('src') and 'http' in actual_image.get_attribute('src'):
                    image_urls.add(actual_image.get_attribute('src'))

            image_count = len(image_urls)

            if len(image_urls) >= max_links_to_fetch:
                print(f"Found: {len(image_urls)} image links, done!")
                break
        else:
            print("Found:", len(image_urls), "image links, looking for more ...")
            time.sleep(30)
            return
            load_more_button = wd.find_element_by_css_selector(".mye4qd")
            if load_more_button:
                wd.execute_script("document.querySelector('.mye4qd').click();")

        # move the result startpoint further down
        results_start = len(thumbnail_results)

    return image_urls

In [5]:
def persist_image(folder_path:str,url:str):
    try:
        image_content = requests.get(url).content

    except Exception as e:
        print(f"ERROR - Could not download {url} - {e}")

    try:
        image_file = io.BytesIO(image_content)
        image = Image.open(image_file).convert('RGB')
        file_path = os.path.join(folder_path,hashlib.sha1(image_content).hexdigest()[:10] + '.jpg')
        with open(file_path, 'wb') as f:
            image.save(f, "JPEG", quality=85)
        print(f"SUCCESS - saved {url} - as {file_path}")
    except Exception as e:
        print(f"ERROR - Could not save {url} - {e}")

In [None]:
search_term = 'inflatable alligator'
search_and_download(search_term = search_term, driver_path=DRIVER_PATH)

In [None]:
search_term = 'swimming inner tube'
search_and_download(search_term = search_term, driver_path=DRIVER_PATH)

In [None]:
search_term = 'pool float'
search_and_download(search_term = search_term, driver_path=DRIVER_PATH)

In [None]:
search_term = 'flamingo inflatable'
search_and_download(search_term = search_term, driver_path=DRIVER_PATH)

In [None]:
search_term = 'pool flamingo'
search_and_download(search_term = search_term, driver_path=DRIVER_PATH)

In [6]:
search_term = 'people in swim tube'
search_and_download(search_term = search_term, driver_path=DRIVER_PATH)

Found: 100 search results. Extracting links from 0:100
Found: 151 image links, done!
SUCCESS - saved https://i1.wp.com/intrepidtubie.com/wp-content/uploads/2019/08/IMG953611-1.jpg?fit=600%2C800&ssl=1 - as ./images/people_in_swim_tube/4ce8875b98.jpg
SUCCESS - saved https://upload.wikimedia.org/wikipedia/commons/thumb/9/92/Swimming-pool.jpg/220px-Swimming-pool.jpg - as ./images/people_in_swim_tube/504d77ce57.jpg
SUCCESS - saved https://encrypted-tbn0.gstatic.com/images?q=tbn%3AANd9GcTnJkWepx0ttaiyCNFWPFlyBAADvJjbwbKf1Mb7NC3NVnMg3MGi - as ./images/people_in_swim_tube/5c664dde4e.jpg
SUCCESS - saved https://i2.wp.com/bestinflatablesports.com/wp-content/uploads/2018/02/shutterstock_512094808-1-e1518035039529.jpg?fit=1000%2C545&ssl=1 - as ./images/people_in_swim_tube/8b1f53fc5f.jpg
SUCCESS - saved https://encrypted-tbn0.gstatic.com/images?q=tbn%3AANd9GcQlcCOTJm3fFlMG8zBYKTTBoC6yoiv8TlPHHq2x-Yz1btsGS-3F - as ./images/people_in_swim_tube/400b9326ff.jpg
SUCCESS - saved https://encrypted-tbn0.gst

SUCCESS - saved https://www.mic-key.com/wp-content/uploads/2016/09/pool@2x.jpg - as ./images/people_in_swim_tube/ed70dfc897.jpg
SUCCESS - saved https://encrypted-tbn0.gstatic.com/images?q=tbn%3AANd9GcSNXYDGsnCkzEeestwQR91AE6OO2TUainadWVhlv_ewl5qpb5_r - as ./images/people_in_swim_tube/0e4fd8fee3.jpg
SUCCESS - saved https://thumbs.dreamstime.com/z/penticton-british-columbia-canada-september-people-swim-float-inner-tube-penticton-river-channel-popular-summer-164198231.jpg - as ./images/people_in_swim_tube/b238f1b6f3.jpg
SUCCESS - saved https://encrypted-tbn0.gstatic.com/images?q=tbn%3AANd9GcTyFauEMyax47ZsKnBYtvwVY2PP8jMTjIyUJF6L_TiFP5-7M_-Y - as ./images/people_in_swim_tube/3c5e3f96d6.jpg
SUCCESS - saved https://images-na.ssl-images-amazon.com/images/I/71IxV%2BDlk1L._AC_SX425_.jpg - as ./images/people_in_swim_tube/f1afd0c7f8.jpg
SUCCESS - saved https://i.pinimg.com/originals/17/6d/fd/176dfdd834c672a7762bd570a24aab63.jpg - as ./images/people_in_swim_tube/fe9ac415b8.jpg
SUCCESS - saved http

SUCCESS - saved https://encrypted-tbn0.gstatic.com/images?q=tbn%3AANd9GcQ4SJj3yzPetWG0iDXY9RcwyDBzCtp3vg8Ip10dEEz91fm3jFVa - as ./images/people_in_swim_tube/7996930dff.jpg
SUCCESS - saved https://qph.fs.quoracdn.net/main-qimg-288114767518f755679a080c70a04e02 - as ./images/people_in_swim_tube/8109d909af.jpg
SUCCESS - saved https://fscomps.fotosearch.com/compc/SPS/SPS007/people-swim-pool-couple-swimming-stock-photography__1417r-135.jpg - as ./images/people_in_swim_tube/bcc3841fcb.jpg
SUCCESS - saved https://ae01.alicdn.com/kf/HTB1TFsaIVXXXXcbXpXXq6xXFXXX5/222222597/HTB1TFsaIVXXXXcbXpXXq6xXFXXX5.jpg - as ./images/people_in_swim_tube/633ca658e8.jpg
SUCCESS - saved https://encrypted-tbn0.gstatic.com/images?q=tbn%3AANd9GcRMEIbKOb1MJ9oGNcmVESYRwZqVPg0hfrXGtCBvdgeS2JocVz-F - as ./images/people_in_swim_tube/d65494db4f.jpg
SUCCESS - saved https://encrypted-tbn0.gstatic.com/images?q=tbn%3AANd9GcRFpWHHt3kItLOrSn3JGU4dQAWM0a77htWKP02Nkk4TU2mBmj2t - as ./images/people_in_swim_tube/075d132232.jpg
SUC

SUCCESS - saved https://images.prod.meredith.com/product/c1b4bf4c2c0bb639c1682facf0d4d7f8/1536556495080/l/swimschool-deluxe-tot-trainer-fabric-swim-training-aid-safety-strap-inflatable-tube-2-4-years-yellow - as ./images/people_in_swim_tube/e1bbc52cd8.jpg
SUCCESS - saved https://encrypted-tbn0.gstatic.com/images?q=tbn%3AANd9GcQWE_VRqAcDsXHKRwy34ChmV-Hy9iGAts8sptY4pXqzaM-7t36s - as ./images/people_in_swim_tube/c87389d685.jpg
SUCCESS - saved https://encrypted-tbn0.gstatic.com/images?q=tbn%3AANd9GcRjyr7YIpphaiT0axdpelFlCodBX9gDdw4-WJDGo0NfWcav-fx5 - as ./images/people_in_swim_tube/851a89127c.jpg
SUCCESS - saved https://encrypted-tbn0.gstatic.com/images?q=tbn%3AANd9GcRCwLxDYML1y4p-8NFkNrpfjdVmSNNUlzjGgBCtPiqwoSokFqhl - as ./images/people_in_swim_tube/c3eec66299.jpg
SUCCESS - saved https://encrypted-tbn0.gstatic.com/images?q=tbn%3AANd9GcTbsylH-fz61AC5SzSdPuPcXIyUy7kmumAE1K7X_Ac0cwvtAIIB - as ./images/people_in_swim_tube/c641dcb916.jpg
SUCCESS - saved https://encrypted-tbn0.gstatic.com/image

In [7]:
search_term = 'swimming inner tube'
search_and_download(search_term = search_term, driver_path=DRIVER_PATH)

Found: 100 search results. Extracting links from 0:100
Found: 150 image links, done!
SUCCESS - saved https://sc02.alicdn.com/kf/HTB111TgkP3z9KJjy0Fmq6xiwXXaP/200198882/HTB111TgkP3z9KJjy0Fmq6xiwXXaP.jpg - as ./images/swimming_inner_tube/6b83fa4159.jpg
SUCCESS - saved https://encrypted-tbn0.gstatic.com/images?q=tbn%3AANd9GcTPAaNZRCvxkL1A57_x7evCbJ7M7coUAcoBlPAA5-ZOD2HovbXp - as ./images/swimming_inner_tube/63cff4da18.jpg
SUCCESS - saved https://m.media-amazon.com/images/I/71LDwPhTwyL._SR500,500_.jpg - as ./images/swimming_inner_tube/7d0b3cfa64.jpg
SUCCESS - saved https://ak1.ostkcdn.com/images/products/is/images/direct/6b22eb69da765230f2e8c07f5213f19149824dc4/52%22-Chill-Red-and-White-Inflatable-Swimming-Pool-Inner-Tube-Float.jpg - as ./images/swimming_inner_tube/3a2108ed6a.jpg
SUCCESS - saved https://encrypted-tbn0.gstatic.com/images?q=tbn%3AANd9GcRoHiASErtGXE7aoJJsBPYfEo1FXVErZ4T_FJwpw-wtdmMj1BsX - as ./images/swimming_inner_tube/453a556d34.jpg
SUCCESS - saved https://encrypted-tbn0.gs

SUCCESS - saved https://marketplace.canva.com/MABoiGevmvM/1/screen_2x/canva-sexual-young-woman-floating-in-inner-tube-in-swimming-pool-MABoiGevmvM.jpg - as ./images/swimming_inner_tube/dcb0714b3e.jpg
SUCCESS - saved https://encrypted-tbn0.gstatic.com/images?q=tbn%3AANd9GcQvwmFJNvMsPF_zXRVbze5AN2mp4oZzILxzDrDF09-BTtdpji4y - as ./images/swimming_inner_tube/f99585218a.jpg
SUCCESS - saved https://encrypted-tbn0.gstatic.com/images?q=tbn%3AANd9GcRNH8_LZfitH93wS9wVDkVscRS4J_PuRT_zn3UnWBsxVvDn3jZp - as ./images/swimming_inner_tube/ecda88cd87.jpg
SUCCESS - saved https://encrypted-tbn0.gstatic.com/images?q=tbn%3AANd9GcRRQxlDPoUNvS8iSA9K5lp9987R_Rb4dc_a7QH8OdPLioEwlaVr - as ./images/swimming_inner_tube/1ea24d27e1.jpg
SUCCESS - saved https://encrypted-tbn0.gstatic.com/images?q=tbn%3AANd9GcSBwJlWYh9hrRQk3gd_rIxODetiwcMYkXiCnJ-w_Ay7NNi0MVgR - as ./images/swimming_inner_tube/126ef2d3bb.jpg
SUCCESS - saved https://encrypted-tbn0.gstatic.com/images?q=tbn%3AANd9GcQ3dfReJ9z6Kx4TyemDXSEKT8GAFYdlEwVQQWJuzX

SUCCESS - saved https://encrypted-tbn0.gstatic.com/images?q=tbn%3AANd9GcSU_nGUcbYZ3b9cY4MbnyodvrU6BAG4VCvjhWmwKFF8NHYfYART - as ./images/swimming_inner_tube/1b5f2dee41.jpg
SUCCESS - saved https://fscomps.fotosearch.com/compc/SPS/SPS007/people-swim-pool-couple-swimming-stock-photography__1417r-135.jpg - as ./images/swimming_inner_tube/bcc3841fcb.jpg
SUCCESS - saved https://i5.walmartimages.com/asr/2314f00f-f834-4153-a870-b6f46eb5c7e5_1.f41b1031b62aceeac2ea1aa1466e8996.jpeg?odnWidth=450&odnHeight=450&odnBg=ffffff - as ./images/swimming_inner_tube/a74abf8e31.jpg
SUCCESS - saved https://target.scene7.com/is/image/Target/GUEST_4c056d02-874f-41a5-b1f8-8adb506a1671?wid=488&hei=488&fmt=pjpeg - as ./images/swimming_inner_tube/6d3fd874ec.jpg
SUCCESS - saved https://encrypted-tbn0.gstatic.com/images?q=tbn%3AANd9GcRvOwafzaIne_sIHvmaSe-F1755uYWKProS2zYy-mzw8RmwBfMe - as ./images/swimming_inner_tube/5f8e449f7f.jpg
SUCCESS - saved https://ak1.ostkcdn.com/images/products/17958570/49-Jumbo-Chocolate-Fr

SUCCESS - saved https://encrypted-tbn0.gstatic.com/images?q=tbn%3AANd9GcQnXrwen0hRxN83t47uLP2ADGncQRubXawBtRY7q-YCHGLusmQ4 - as ./images/swimming_inner_tube/acbf1ad40b.jpg
SUCCESS - saved https://encrypted-tbn0.gstatic.com/images?q=tbn%3AANd9GcRf81FojLuG-UM-Qg7YvisyVthMo099OJ1X6_umVUGAlKthWkqO - as ./images/swimming_inner_tube/5dfe0249fa.jpg
SUCCESS - saved https://atthemanse.files.wordpress.com/2014/08/swimming-in-an-inner-tube.jpg - as ./images/swimming_inner_tube/1f5db5d503.jpg
SUCCESS - saved https://images.assetsdelivery.com/compings_v2/epicstockmedia/epicstockmedia1412/epicstockmedia141200110.jpg - as ./images/swimming_inner_tube/fc775c0bf8.jpg
SUCCESS - saved https://comps.canstockphoto.com/young-kid-having-fun-in-the-swimming-stock-photo_csp23689144.jpg - as ./images/swimming_inner_tube/30131915d8.jpg
