- Note: Call per second is limited so need to save locally.
- File is too big, cannot save as 1 file
- Looking into removing parts that don't need to json file
    - Decide to use multithreading to download several at the same time
    
- need to explain gender rate (refer to doc)  

https://pokeapi.co/docs/v2.html

## In order to reduce the number of call of API, we decided to save this url to local.
### Fixed limit: 100 API requests per IP address per minute

In [2]:
import requests
import json
import collections
import time
import concurrent.futures
import csv
import functools
from pprint import pprint
from typing import List, Dict
from IPython.display import clear_output

Each pokemon has many different json files with different information. This will run through each pokemon in each json type (input) and save them as a csv file. This is how it looks like at the end:

```
bulbasaur,https://pokeapi.co/api/v2/pokemon-species/1/
ivysaur,https://pokeapi.co/api/v2/pokemon-species/2/
venusaur,https://pokeapi.co/api/v2/pokemon-species/3/
charmander,https://pokeapi.co/api/v2/pokemon-species/4/
charmeleon,https://pokeapi.co/api/v2/pokemon-species/5/
charizard,https://pokeapi.co/api/v2/pokemon-species/6/
squirtle,https://pokeapi.co/api/v2/pokemon-species/7/
wartortle,https://pokeapi.co/api/v2/pokemon-species/8/
blastoise,https://pokeapi.co/api/v2/pokemon-species/9/
caterpie,https://pokeapi.co/api/v2/pokemon-species/10/
...
```


In [127]:
def request_and_save_urls(json_type: str):
    # Each url only has 20 pokemons
    url = fr"https://pokeapi.co/api/v2/{json_type}/"
    with open(f"data/{json_type}/{json_type}-urls.csv", "w") as file:
        pokemon_url_writer = csv.writer(file, delimiter=",", lineterminator="\n")

        while 1:
            # Get urls of 20 pokemons
            response = requests.get(url)
            json_data = json.loads(response.text)

            # Write to CSV file
            for pokemon in json_data["results"]:
                pokemon_url_writer.writerow(list(pokemon.values()))

            # Next 20 pokemons
            url = json_data["next"]
            if not url: break

### We want the following json files

In [126]:
request_and_save_urls("pokemon")
request_and_save_urls("pokemon-species")

### Load the csv file and request each Pokemon
This is used to fetch data from pokeAPI and save it locally. We load the CSV file earlier and request each Pokemon information. We group all information by type (pokemon_species, ...) and save it as a JSON file. However, because of the **fixed limit** on how many API can be called per minute (100), there is a delay in the code which make the total run time between 15-20 minutes

We also have an exclusion list to exclude data that are not needed from the responsed JSON

In [3]:
def request_and_cache(json_type, exclude: List[str]) -> None:
    return_json = {}
    
    # Open csv file to get urls
    pokemon_urls = list(csv.reader(open(f"data/{json_type}/{json_type}-urls.csv", "r"), delimiter=","))
    
    # Request each url and save as 1 big json file
    for i, (pokemon_name, pokemon_url) in enumerate(pokemon_urls):
        clear_output()
        print(f"{i}. Working on type: '{json_type}', pokemon: {pokemon_name}")
        pokemon_json = json.loads(requests.get(pokemon_url).text)
        
        # To reduce the file size, we exclude some parts of json_data
        return_json[pokemon_name] = {k:v for k, v in pokemon_json.items() if k not in exclude}
        
        # Make sure not to exceed to API limit
        time.sleep(1)
    json.dump(return_json, open(f"data/{json_type}/{json_type}.json", "w"))

We use multithreading to make this process faster

In [4]:
with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor:
    for json_type, exclude in [("pokemon", ["game_indices", 
                                            "moves", 
                                            ]), 
                               ("pokemon-species", ["genera", 
                                                    "names", 
                                                    "pokedex_numbers", 
                                                    "varieties", 
                                                    "flavor_text_entries",
                                                   ]),
                               ]:
        temp = functools.partial(request_and_cache, exclude=exclude)
        executor.map(temp, [json_type])

963. Working on type: 'pokemon', pokemon: necrozma-ultra


### Load Pokemon Json files and Analyse

#### Pokemon

In [59]:
all_pokemons_info = collections.defaultdict(dict)

pokemon_json = json.load(open("data/pokemon/pokemon.json", "r"))
for pokemon, info in pokemon_json.items():
    pokemon_info = all_pokemons_info[pokemon]
    
    # Get height, weight, name
    for key in ("height", "weight", "name"):
        pokemon_info[key] = info[key]
        
    # Get 6 stats (hp, att, def, sp. att, sp. def) for pokemon
    for stats in info["stats"]:
        pokemon_info[stats["stat"]["name"]] = stats["base_stat"]
    
    # Get list of abilities
    pokemon_info["abilites"] = [ability["ability"]["name"] for ability in info["abilities"]]
    
    # Get Pokemon Image
    pokemon_info["sprites"] = info["sprites"]["front_default"]
    
    # Get Pokemon Types
    pokemon_info["types"] = [type_["type"]["name"] for type_ in info["types"]]
    
    # Get Pokemon Species Name
    pokemon_info["species"] = info["species"]["name"]

#### Pokemon Species

In [60]:
pokemon_species_json = json.load(open("data/pokemon-species/pokemon-species.json", "r"))
for pokemon, info in pokemon_species_json.items():
    pokemon_info = all_pokemons_info[pokemon]
    
    # Get basic info
    for key in ("base_happiness", "capture_rate", "gender_rate"):
        pokemon_info[key] = info[key]
    
    # Get list of egg groups
    pokemon_info["egg_groups"] = [group["name"] for group in info["egg_groups"]]
    
    # Get Evolution Chain
    if info["evolves_from_species"]:
        pokemon_info["evolves_from"] = info["evolves_from_species"]["name"]
        prev_pokemon = all_pokemons_info[pokemon_info["evolves_from"]]
        prev_pokemon["evolve_to"] = prev_pokemon.get("evolve_to", []) + [pokemon]    
        
    # Get basic info. These info are in a dictionary
    for key in ("color", "generation", "growth_rate", "habitat", "shape"):
        if info[key]: pokemon_info[key] = info[key]["name"]    

In [61]:
for pokemon in ("bulbasaur", "ivysaur", "venusaur", "eevee"):
    pprint([pokemon, all_pokemons_info[pokemon]])

['bulbasaur',
 {'abilites': ['chlorophyll', 'overgrow'],
  'attack': 49,
  'base_happiness': 70,
  'capture_rate': 45,
  'color': 'green',
  'defense': 49,
  'egg_groups': ['plant', 'monster'],
  'evolve_to': ['ivysaur'],
  'gender_rate': 1,
  'generation': 'generation-i',
  'growth_rate': 'medium-slow',
  'habitat': 'grassland',
  'height': 7,
  'hp': 45,
  'name': 'bulbasaur',
  'shape': 'quadruped',
  'special-attack': 65,
  'special-defense': 65,
  'species': 'bulbasaur',
  'speed': 45,
  'sprites': 'https://raw.githubusercontent.com/PokeAPI/sprites/master/sprites/pokemon/1.png',
  'types': ['poison', 'grass'],
  'weight': 69}]
['ivysaur',
 {'abilites': ['chlorophyll', 'overgrow'],
  'attack': 62,
  'base_happiness': 70,
  'capture_rate': 45,
  'color': 'green',
  'defense': 63,
  'egg_groups': ['plant', 'monster'],
  'evolve_to': ['venusaur'],
  'evolves_from': 'bulbasaur',
  'gender_rate': 1,
  'generation': 'generation-i',
  'growth_rate': 'medium-slow',
  'habitat': 'grassland'

In [62]:
json.dump(all_pokemons_info, open("data/all_pokemons_info.json", "w"))