In [5]:
import requests
from concurrent.futures import ThreadPoolExecutor, as_completed
import pandas as pd

BASE = "https://pokeapi.co/api/v2"

def list_pokemon(limit=50, offset=0):
    url = f"{BASE}/pokemon?limit={limit}&offset={offset}"
    r = requests.get(url, timeout=20)
    r.raise_for_status()
    return [it["url"] for it in r.json().get("results", [])]

def fetch_json(url, retries=2):
    for attempt in range(retries + 1):
        try:
            r = requests.get(url, timeout=20)
            r.raise_for_status()
            return r.json()
        except Exception:
            if attempt == retries:
                raise
    return None

def fetch_pokemon_batch(urls, workers=10):
    results = []
    with ThreadPoolExecutor(max_workers=workers) as ex:
        futures = {ex.submit(fetch_json, u): u for u in urls}
        for fut in as_completed(futures):
            url = futures[fut]
            try:
                data = fut.result()
                if data:
                    results.append(data)
            except Exception as e:
                print(f"Failed to fetch {url}: {e}")
    return results

def get_50_pokemon_df(limit=50, workers=10):
    urls = list_pokemon(limit=limit)
    raw = fetch_pokemon_batch(urls, workers=workers)
    if not raw:
        return pd.DataFrame()
    # Flatten JSON; adjust sep if you prefer different column names
    df = pd.json_normalize(raw, sep="_")
    return df

# Example usage
if __name__ == "__main__":
    df = get_50_pokemon_df(limit=50, workers=10)
    print(df.shape)
    print(df.columns.tolist()[:30])  # preview columns
    # now df contains one row per Pokémon with many flattened fields


(50, 145)
['abilities', 'base_experience', 'forms', 'game_indices', 'height', 'held_items', 'id', 'is_default', 'location_area_encounters', 'moves', 'name', 'order', 'past_abilities', 'past_types', 'stats', 'types', 'weight', 'cries_latest', 'cries_legacy', 'species_name', 'species_url', 'sprites_back_default', 'sprites_back_female', 'sprites_back_shiny', 'sprites_back_shiny_female', 'sprites_front_default', 'sprites_front_female', 'sprites_front_shiny', 'sprites_front_shiny_female', 'sprites_other_dream_world_front_default']


In [7]:
# rename and reorder
df = df.rename(columns={
    "hp": "HP",
    "base_experience": "XP",
    "sprites_front_default": "Sprite"
})
cols = ["id", "name", "HP", "XP", "height", "weight", "Sprite"]
df = df[[c for c in cols if c in df.columns]]

# trim long text
df["name"] = df["name"].str.strip().str.title()

# convert types
df["height"] = pd.to_numeric(df["height"], errors="coerce")
df["weight"] = pd.to_numeric(df["weight"], errors="coerce")
df["caught_at"] = pd.to_datetime(df.get("caught_at", pd.NaT))


In [11]:
df.to_excel("19.50 Pokemons Data.xlsx")

In [12]:
pip install pandas matplotlib seaborn plotly


Note: you may need to restart the kernel to use updated packages.


In [16]:
import pandas as pd
import numpy as np

# assume df is your DataFrame with Pokémon data
# common column names: "base_experience", "height", "weight"
# make a safe copy and coerce types to numeric
df_safe = df.copy()

for col in ("base_experience", "height", "weight"):
    if col in df_safe.columns:
        df_safe[col] = pd.to_numeric(df_safe[col], errors="coerce")
    else:
        # create column of NaNs if missing so the output is consistent
        df_safe[col] = np.nan

# compute means, ignoring NaNs
means = {
    "average_xp": df_safe["XP"].mean(),
    "average_height": df_safe["height"].mean(),
    "average_weight": df_safe["weight"].mean()
}

# round for nicer display
means_rounded = {k: (None if pd.isna(v) else round(v, 2)) for k, v in means.items()}

print("Averages:")
print(means_rounded)

# optional: put into a one-row DataFrame
summary_df = pd.DataFrame([means_rounded])
summary_df


Averages:
{'average_xp': np.float64(124.84), 'average_height': np.float64(9.44), 'average_weight': np.float64(234.14)}


Unnamed: 0,average_xp,average_height,average_weight
0,124.84,9.44,234.14
