In [19]:
import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin
import os

def download_image(url, folder_path, filename):
    response = requests.get(url)
    if response.status_code == 200:
        with open(os.path.join(folder_path, filename), 'wb') as file:
            file.write(response.content)

def crawl_website(url, folder_path):
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')

    divs = soup.find_all('div', class_='profile-images')

    for div in divs:
        for img in div.find_all('img'):
            img_url = urljoin(url, img.get('src'))
            img_name = img_url.split('/')[-1]
            download_image(img_url, folder_path, img_name)

    # header_section = soup.find('section', class_='pokedex-pokemon-header')
    # if next_link:
    #     next_page_url = urljoin(url, next_link.get('href'))
    #     parse_page(next_page_url, folder_path)
    

website_url = 'https://pokemondb.net/pokedex/national'  # Replace with your target website
save_folder = 'downloaded_images'  # Replace with your desired folder
os.makedirs(save_folder, exist_ok=True)

# crawl_website(website_url, save_folder)


In [20]:
response = requests.get(website_url)
soup = BeautifulSoup(response.text, 'html.parser')
soup

<!DOCTYPE html>

<html lang="en">
<head>
<meta charset="utf-8"/>
<title>List of Pokémon (sprites gallery) | Pokémon Database</title>
<link href="https://img.pokemondb.net" rel="preconnect"/>
<link href="https://s.pokemondb.net" rel="preconnect"/>
<link as="font" crossorigin="" href="/static/fonts/fira-sans-v17-latin-400.woff2" rel="preload" type="font/woff2"/>
<link as="font" crossorigin="" href="/static/fonts/fira-sans-v17-latin-400i.woff2" rel="preload" type="font/woff2"/>
<link as="font" crossorigin="" href="/static/fonts/fira-sans-v17-latin-600.woff2" rel="preload" type="font/woff2"/>
<link href="/static/css/pokemondb-ebd6c5d43a.css" rel="stylesheet"/>
<meta content="width=device-width, initial-scale=1" name="viewport"/>
<meta content="A simple list of all 1010 Pokémon by National Dex number, with images." name="description" property="og:description"/>
<link href="https://pokemondb.net/pokedex/national" rel="canonical"/>
<meta content="https://pokemondb.net/pokedex/national" proper

In [21]:
infocards = soup.find_all('div', class_='infocard')
print(len(infocards))
infocards[:5]

1024


[<div class="infocard"><span class="infocard-lg-img"><a href="/pokedex/bulbasaur"><picture><source srcset="https://img.pokemondb.net/sprites/home/normal/2x/avif/bulbasaur.avif" type="image/avif"/><img alt="Bulbasaur" class="img-fixed img-sprite" loading="lazy" src="https://img.pokemondb.net/sprites/home/normal/2x/bulbasaur.jpg"/></picture></a></span><span class="infocard-lg-data text-muted"><small>#0001</small><br/> <a class="ent-name" href="/pokedex/bulbasaur">Bulbasaur</a><br/> <small><a class="itype grass" href="/type/grass">Grass</a> · <a class="itype poison" href="/type/poison">Poison</a></small></span></div>,
 <div class="infocard"><span class="infocard-lg-img"><a href="/pokedex/ivysaur"><picture><source srcset="https://img.pokemondb.net/sprites/home/normal/2x/avif/ivysaur.avif" type="image/avif"/><img alt="Ivysaur" class="img-fixed img-sprite" loading="lazy" src="https://img.pokemondb.net/sprites/home/normal/2x/ivysaur.jpg"/></picture></a></span><span class="infocard-lg-data tex

In [22]:
links = []
images = []
for infocard in infocards:
    cur_links = infocard.find_all('a')
    links += cur_links
    img = infocard.find('img')
    images.append(img)
print(len(links))
links[:5]

3597


[<a href="/pokedex/bulbasaur"><picture><source srcset="https://img.pokemondb.net/sprites/home/normal/2x/avif/bulbasaur.avif" type="image/avif"/><img alt="Bulbasaur" class="img-fixed img-sprite" loading="lazy" src="https://img.pokemondb.net/sprites/home/normal/2x/bulbasaur.jpg"/></picture></a>,
 <a class="ent-name" href="/pokedex/bulbasaur">Bulbasaur</a>,
 <a class="itype grass" href="/type/grass">Grass</a>,
 <a class="itype poison" href="/type/poison">Poison</a>,
 <a href="/pokedex/ivysaur"><picture><source srcset="https://img.pokemondb.net/sprites/home/normal/2x/avif/ivysaur.avif" type="image/avif"/><img alt="Ivysaur" class="img-fixed img-sprite" loading="lazy" src="https://img.pokemondb.net/sprites/home/normal/2x/ivysaur.jpg"/></picture></a>]

In [28]:
for image in images:
    img_url = urljoin(website_url, image.get('src'))
    img_name = img_url.split('/')[-1]
    download_image(img_url, save_folder, img_name)

In [24]:
print(len(images))
images[:5]

1024


[<img alt="Bulbasaur" class="img-fixed img-sprite" loading="lazy" src="https://img.pokemondb.net/sprites/home/normal/2x/bulbasaur.jpg"/>,
 <img alt="Ivysaur" class="img-fixed img-sprite" loading="lazy" src="https://img.pokemondb.net/sprites/home/normal/2x/ivysaur.jpg"/>,
 <img alt="Venusaur" class="img-fixed img-sprite" loading="lazy" src="https://img.pokemondb.net/sprites/home/normal/2x/venusaur.jpg"/>,
 <img alt="Charmander" class="img-fixed img-sprite" loading="lazy" src="https://img.pokemondb.net/sprites/home/normal/2x/charmander.jpg"/>,
 <img alt="Charmeleon" class="img-fixed img-sprite" loading="lazy" src="https://img.pokemondb.net/sprites/home/normal/2x/charmeleon.jpg"/>]

In [15]:
dedup_links = set()
for link in links:
    cur_href = link.get('href')
    if cur_href is not None and 'pokedex' in link.get('href'):
        dedup_links.add(cur_href)
print(len(dedup_links))
dedup_links

1024


{'/pokedex/murkrow',
 '/pokedex/nosepass',
 '/pokedex/noibat',
 '/pokedex/vibrava',
 '/pokedex/orthworm',
 '/pokedex/burmy',
 '/pokedex/dartrix',
 '/pokedex/bruxish',
 '/pokedex/steenee',
 '/pokedex/maschiff',
 '/pokedex/glimmora',
 '/pokedex/wailmer',
 '/pokedex/kingambit',
 '/pokedex/poltchageist',
 '/pokedex/chien-pao',
 '/pokedex/blissey',
 '/pokedex/mankey',
 '/pokedex/iron-valiant',
 '/pokedex/wishiwashi',
 '/pokedex/shuckle',
 '/pokedex/honedge',
 '/pokedex/clawitzer',
 '/pokedex/bulbasaur',
 '/pokedex/eelektross',
 '/pokedex/camerupt',
 '/pokedex/vaporeon',
 '/pokedex/staraptor',
 '/pokedex/castform',
 '/pokedex/klefki',
 '/pokedex/sinistcha',
 '/pokedex/marill',
 '/pokedex/azurill',
 '/pokedex/iron-hands',
 '/pokedex/sableye',
 '/pokedex/roselia',
 '/pokedex/politoed',
 '/pokedex/pupitar',
 '/pokedex/diggersby',
 '/pokedex/ogerpon',
 '/pokedex/unown',
 '/pokedex/iron-treads',
 '/pokedex/skarmory',
 '/pokedex/taillow',
 '/pokedex/corviknight',
 '/pokedex/milotic',
 '/pokedex/oi

In [16]:
pokemon_links = list(dedup_links)
pokemon_links

['/pokedex/murkrow',
 '/pokedex/nosepass',
 '/pokedex/noibat',
 '/pokedex/vibrava',
 '/pokedex/orthworm',
 '/pokedex/burmy',
 '/pokedex/dartrix',
 '/pokedex/bruxish',
 '/pokedex/steenee',
 '/pokedex/maschiff',
 '/pokedex/glimmora',
 '/pokedex/wailmer',
 '/pokedex/kingambit',
 '/pokedex/poltchageist',
 '/pokedex/chien-pao',
 '/pokedex/blissey',
 '/pokedex/mankey',
 '/pokedex/iron-valiant',
 '/pokedex/wishiwashi',
 '/pokedex/shuckle',
 '/pokedex/honedge',
 '/pokedex/clawitzer',
 '/pokedex/bulbasaur',
 '/pokedex/eelektross',
 '/pokedex/camerupt',
 '/pokedex/vaporeon',
 '/pokedex/staraptor',
 '/pokedex/castform',
 '/pokedex/klefki',
 '/pokedex/sinistcha',
 '/pokedex/marill',
 '/pokedex/azurill',
 '/pokedex/iron-hands',
 '/pokedex/sableye',
 '/pokedex/roselia',
 '/pokedex/politoed',
 '/pokedex/pupitar',
 '/pokedex/diggersby',
 '/pokedex/ogerpon',
 '/pokedex/unown',
 '/pokedex/iron-treads',
 '/pokedex/skarmory',
 '/pokedex/taillow',
 '/pokedex/corviknight',
 '/pokedex/milotic',
 '/pokedex/oi

In [17]:
cur_web = urljoin(website_url, pokemon_links[0])
cur_web

'https://pokemondb.net/pokedex/murkrow'

In [18]:
response = requests.get(cur_web)
soup = BeautifulSoup(response.text, 'html.parser')
soup

<!DOCTYPE html>

<html lang="en">
<head>
<meta charset="utf-8"/>
<title>Murkrow Pokédex: stats, moves, evolution &amp; locations | Pokémon Database</title>
<link href="https://img.pokemondb.net" rel="preconnect"/>
<link href="https://s.pokemondb.net" rel="preconnect"/>
<link as="font" crossorigin="" href="/static/fonts/fira-sans-v17-latin-400.woff2" rel="preload" type="font/woff2"/>
<link as="font" crossorigin="" href="/static/fonts/fira-sans-v17-latin-400i.woff2" rel="preload" type="font/woff2"/>
<link as="font" crossorigin="" href="/static/fonts/fira-sans-v17-latin-600.woff2" rel="preload" type="font/woff2"/>
<link href="/static/css/pokemondb-ebd6c5d43a.css" rel="stylesheet"/>
<link href="/static/css/type-chart-76998cbd3d.css" rel="stylesheet"/>
<link href="/static/css/evolution-6ccf58cfbe.css" rel="stylesheet"/>
<style>.cell-barchart{width:100%;min-width:150px}.barchart-bar{height:.75rem;border-radius:4px;background-color:#a3a3a3;border:1px solid #737373;border-color:rgba(0,0,0,.15)

In [None]:
soup.find_all(attrs={"data-title": f"{} official artwork"})

In [None]:
def crawl_website(url, folder_path):
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')

    divs = soup.find_all('div', class_='profile-images')
    urljoin(url, img.get('src'))

crawl_website(crawl_website, save_folder)