Note: Call per second is limited so need to save locally.
File is too big, cannot save as 1 file
Looking into removing parts that don't need to json file
https://pokeapi.co/docs/v2.html

## In order to reduce the number of call of API, we decided to save this url to local.
### Fixed limit: 100 API requests per IP address per minute

In [2]:
import requests
import json
from pprint import pprint
import csv
from typing import List, Dict
import time
import concurrent.futures
from IPython.display import clear_output
import functools

In [127]:
def request_and_save_urls(json_type: str):
    # Each url only has 20 pokemons
    url = fr"https://pokeapi.co/api/v2/{json_type}/"
    with open(f"data/{json_type}/{json_type}-urls.csv", "w") as file:
        pokemon_url_writer = csv.writer(file, delimiter=",", lineterminator="\n")

        while 1:
            # Get urls of 20 pokemons
            response = requests.get(url)
            json_data = json.loads(response.text)

            # Write to CSV file
            for pokemon in json_data["results"]:
                pokemon_url_writer.writerow(list(pokemon.values()))

            # Next 20 pokemons
            url = json_data["next"]
            if not url: break

In [126]:
request_and_save_urls("pokemon")
request_and_save_urls("pokemon-species")

### Load the csv file and request each Pokemon

In [133]:
pokemon_urls = list(csv.reader(open("data/pokemon-urls.csv", "r"), delimiter=","))
pprint(pokemon_urls)

[['bulbasaur', 'https://pokeapi.co/api/v2/pokemon/1/'],
 ['ivysaur', 'https://pokeapi.co/api/v2/pokemon/2/'],
 ['venusaur', 'https://pokeapi.co/api/v2/pokemon/3/'],
 ['charmander', 'https://pokeapi.co/api/v2/pokemon/4/'],
 ['charmeleon', 'https://pokeapi.co/api/v2/pokemon/5/'],
 ['charizard', 'https://pokeapi.co/api/v2/pokemon/6/'],
 ['squirtle', 'https://pokeapi.co/api/v2/pokemon/7/'],
 ['wartortle', 'https://pokeapi.co/api/v2/pokemon/8/'],
 ['blastoise', 'https://pokeapi.co/api/v2/pokemon/9/'],
 ['caterpie', 'https://pokeapi.co/api/v2/pokemon/10/'],
 ['metapod', 'https://pokeapi.co/api/v2/pokemon/11/'],
 ['butterfree', 'https://pokeapi.co/api/v2/pokemon/12/'],
 ['weedle', 'https://pokeapi.co/api/v2/pokemon/13/'],
 ['kakuna', 'https://pokeapi.co/api/v2/pokemon/14/'],
 ['beedrill', 'https://pokeapi.co/api/v2/pokemon/15/'],
 ['pidgey', 'https://pokeapi.co/api/v2/pokemon/16/'],
 ['pidgeotto', 'https://pokeapi.co/api/v2/pokemon/17/'],
 ['pidgeot', 'https://pokeapi.co/api/v2/pokemon/18/'],

### Download and save Pokemon

In [3]:
def request_and_cache(json_type, exclude: List[str]) -> None:
    return_json = {}
    
    # Open csv file to get urls
    pokemon_urls = list(csv.reader(open(f"data/{json_type}/{json_type}-urls.csv", "r"), delimiter=","))
    
    # Request each url and save as 1 big json file
    for i, (pokemon_name, pokemon_url) in enumerate(pokemon_urls):
        clear_output()
        print(f"{i}. Working on type: '{json_type}', pokemon: {pokemon_name}")
        pokemon_json = json.loads(requests.get(pokemon_url).text)
        
        # To reduce the file size, we exclude some parts of json_data
        return_json[pokemon_name] = {k:v for k, v in pokemon_json.items() if k not in exclude}
        
        # Make sure not to exceed to API limit
        time.sleep(1)
    json.dump(return_json, open(f"data/{json_type}/{json_type}.json", "w"))

In [4]:
with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor:
    for json_type, exclude in [("pokemon", ["game_indices", "moves", "flavor_text_entries"]), 
                               ("pokemon-species", ["genera", 
                                                    "names", 
                                                    "pokedex_numbers", 
                                                    "varieties", 
                                                    "flavor_text_entries"]),
                               ]:
        temp = functools.partial(request_and_cache, exclude=exclude)
        executor.map(temp, [json_type])

963. Working on type: 'pokemon', pokemon: necrozma-ultra


### Analyse Pokemon

In [116]:
def update_species_information(pokemon_dict: Dict, species_url: str) -> None:
    # Get Species json once to increase speed and reduce number of API calls.
    species_request_json = get_species_request(species_url)
        
    # Get Pokemon evolution chain
    pokemon_dict["evolve_from"] = get_evolves_from_species(species_request_json)
    update_evolution_chain(D["evolve_from"], pokemon_dict["name"], all_pokemons)
    
    # Get pokemon generation
    pokemon_dict["generation"] = get_generation(species_request_json)
    
    # Get
    pokemon_dict["pokedex_color"] = get_pokedex_color(species_request_json)

def get_species_request(species_url: str) -> Dict:
    return json.loads(requests.get(species_url).text)

def get_evolves_from_species(species_data: Dict) -> str:
    # becareful with eve here.
    if species_data["evolves_from_species"]:
        return species_data["evolves_from_species"]["name"]

# This is to handle Branched evolution. 
# More here: https://bulbapedia.bulbagarden.net/wiki/List_of_Pok%C3%A9mon_with_branched_evolutions
def update_evolution_chain(from_pokemon: str, to_pokemon: str, all_pokemons: dict) -> None:
    if not from_pokemon: return
    pokemon_dict = all_pokemons[from_pokemon]
    pokemon_dict["evolve_to"] = pokemon_dict.get("evolve_to", []) + [to_pokemon]    

def get_generation(species_data: Dict) -> str:
    return species_data["generation"]["name"]

def get_pokedex_color(species_data: Dict) -> str:
    return species_data["color"]["name"]
    

In [169]:
import collections

all_pokemons = collections.defaultdict(dict)
for pokemon_name, pokemon_url in pokemon_urls[:10]:
    print(f"Working on {pokemon_name}")
    time.sleep(1)
    
    D = all_pokemons[pokemon_name]
    pokemon_json = json.loads(requests.get(pokemon_url).text)

    # Get height and weight
    for key in ("height", "weight", "name"):
        D[key] = pokemon_json[key]
        
    # Get 6 stats (hp, att, def, sp. att, sp. def) for pokemon
    for stats in pokemon_json["stats"]:
        D[stats["stat"]["name"]] = stats["base_stat"]
    
    # Get list of abilities
    D["abilites"] = [ability["ability"]["name"] for ability in pokemon_json["abilities"]]
    
    # Get Pokemon Image
    D["sprites"] = pokemon_json["sprites"]["front_default"]
    
    # Get Pokemon Types
    D["type1"] = pokemon_json["types"][0]["type"]["name"]
    if len(pokemon_json["types"]) == 2:
        D["type2"] = pokemon_json["types"][1]["type"]["name"]
    
    # Update evolution chain, generation, pokedex_color. All these information are from species_url
    update_species_information(D, pokemon_json["species"]["url"])
    
    
pprint(all_pokemons)

Working on bulbasaur
Working on ivysaur
Working on venusaur
Working on charmander
Working on charmeleon
Working on charizard
Working on squirtle
Working on wartortle
Working on blastoise
Working on caterpie
defaultdict(<class 'dict'>,
            {'blastoise': {'abilites': ['rain-dish', 'torrent'],
                           'attack': 83,
                           'defense': 100,
                           'evolve_from': 'wartortle',
                           'generation': 'generation-i',
                           'height': 16,
                           'hp': 79,
                           'name': 'blastoise',
                           'pokedex_color': 'blue',
                           'special-attack': 85,
                           'special-defense': 105,
                           'speed': 78,
                           'sprites': 'https://raw.githubusercontent.com/PokeAPI/sprites/master/sprites/pokemon/9.png',
                           'type1': 'water',
                     

In [15]:
pprint(pokemon_information,depth=1)

{'abilities': [...],
 'base_experience': 64,
 'forms': [...],
 'game_indices': [...],
 'height': 7,
 'held_items': [],
 'id': 1,
 'is_default': True,
 'location_area_encounters': 'https://pokeapi.co/api/v2/pokemon/1/encounters',
 'moves': [...],
 'name': 'bulbasaur',
 'order': 1,
 'species': {...},
 'sprites': {...},
 'stats': [...],
 'types': [...],
 'weight': 69}
