In [1]:
import requests
from concurrent.futures import ThreadPoolExecutor, as_completed
import pandas as pd

BASE = "https://pokeapi.co/api/v2"

def list_pokemon(limit=50, offset=0):
    url = f"{BASE}/pokemon?limit={limit}&offset={offset}"
    r = requests.get(url, timeout=20)
    r.raise_for_status()
    return [it["url"] for it in r.json().get("results", [])]

def fetch_json(url, retries=2):
    for attempt in range(retries + 1):
        try:
            r = requests.get(url, timeout=20)
            r.raise_for_status()
            return r.json()
        except Exception:
            if attempt == retries:
                raise
    return None

def fetch_pokemon_batch(urls, workers=10):
    results = []
    with ThreadPoolExecutor(max_workers=workers) as ex:
        futures = {ex.submit(fetch_json, u): u for u in urls}
        for fut in as_completed(futures):
            url = futures[fut]
            try:
                data = fut.result()
                if data:
                    results.append(data)
            except Exception as e:
                print(f"Failed to fetch {url}: {e}")
    return results

def get_50_pokemon_df(limit=50, workers=10):
    urls = list_pokemon(limit=limit)
    raw = fetch_pokemon_batch(urls, workers=workers)
    if not raw:
        return pd.DataFrame()
    # Flatten JSON; adjust sep if you prefer different column names
    df = pd.json_normalize(raw, sep="_")
    return df

# Example usage
if __name__ == "__main__":
    df = get_50_pokemon_df(limit=50, workers=10)
    print(df.shape)
    print(df.columns.tolist()[:30])  # preview columns
    # now df contains one row per Pokémon with many flattened fields


(50, 145)
['abilities', 'base_experience', 'forms', 'game_indices', 'height', 'held_items', 'id', 'is_default', 'location_area_encounters', 'moves', 'name', 'order', 'past_abilities', 'past_types', 'stats', 'types', 'weight', 'cries_latest', 'cries_legacy', 'species_name', 'species_url', 'sprites_back_default', 'sprites_back_female', 'sprites_back_shiny', 'sprites_back_shiny_female', 'sprites_front_default', 'sprites_front_female', 'sprites_front_shiny', 'sprites_front_shiny_female', 'sprites_other_dream_world_front_default']


In [2]:
# rename and reorder
df = df.rename(columns={
    "hp": "HP",
    "base_experience": "XP",
    "sprites_front_default": "Sprite"
})
cols = ["id", "name", "HP", "XP", "height", "weight", "Sprite"]
df = df[[c for c in cols if c in df.columns]]

# trim long text
df["name"] = df["name"].str.strip().str.title()

# convert types
df["height"] = pd.to_numeric(df["height"], errors="coerce")
df["weight"] = pd.to_numeric(df["weight"], errors="coerce")
df["caught_at"] = pd.to_datetime(df.get("caught_at", pd.NaT))


In [3]:
df.to_excel("19.50 Pokemons Data.xlsx")

In [4]:
pip install pandas matplotlib seaborn plotly


Note: you may need to restart the kernel to use updated packages.


In [5]:
import pandas as pd
import numpy as np

# assume df is your DataFrame with Pokémon data
# common column names: "base_experience", "height", "weight"
# make a safe copy and coerce types to numeric
df_safe = df.copy()

for col in ("base_experience", "height", "weight"):
    if col in df_safe.columns:
        df_safe[col] = pd.to_numeric(df_safe[col], errors="coerce")
    else:
        # create column of NaNs if missing so the output is consistent
        df_safe[col] = np.nan

# compute means, ignoring NaNs
means = {
    "average_xp": df_safe["XP"].mean(),
    "average_height": df_safe["height"].mean(),
    "average_weight": df_safe["weight"].mean()
}

# round for nicer display
means_rounded = {k: (None if pd.isna(v) else round(v, 2)) for k, v in means.items()}

print("Averages:")
print(means_rounded)

# optional: put into a one-row DataFrame
summary_df = pd.DataFrame([means_rounded])
summary_df


Averages:
{'average_xp': np.float64(124.84), 'average_height': np.float64(9.44), 'average_weight': np.float64(234.14)}


Unnamed: 0,average_xp,average_height,average_weight
0,124.84,9.44,234.14


In [6]:
df = pd.read_excel('19.50 Pokemons Data.xlsx')
print(df)

    Unnamed: 0  id        name   XP  height  weight  \
0            0   1   Bulbasaur   64       7      69   
1            1   4  Charmander   62       6      85   
2            2   8   Wartortle  142      10     225   
3            3   7    Squirtle   63       5      90   
4            4   9   Blastoise  239      16     855   
5            5   5  Charmeleon  142      11     190   
6            6   6   Charizard  240      17     905   
7            7   2     Ivysaur  142      10     130   
8            8   3    Venusaur  236      20    1000   
9            9  10    Caterpie   39       3      29   
10          10  11     Metapod   72       7      99   
11          11  13      Weedle   39       3      32   
12          12  14      Kakuna   72       6     100   
13          13  12  Butterfree  178      11     320   
14          14  17   Pidgeotto  122      11     300   
15          15  16      Pidgey   50       3      18   
16          16  18     Pidgeot  216      15     395   
17        

In [7]:
# Using describe() to calculate summary statistics
res = df.describe()
print(res)

       Unnamed: 0        id          XP     height       weight  caught_at
count    50.00000  50.00000   50.000000  50.000000    50.000000        0.0
mean     24.50000  25.50000  124.840000   9.440000   234.140000        NaN
std      14.57738  14.57738   65.474519   5.855958   239.438119        NaN
min       0.00000   1.00000   39.000000   2.000000     8.000000        NaN
25%      12.25000  13.25000   60.250000   5.250000    71.250000        NaN
50%      24.50000  25.50000  128.000000   8.500000   127.500000        NaN
75%      36.75000  37.75000  172.500000  11.750000   300.000000        NaN
max      49.00000  50.00000  240.000000  35.000000  1000.000000        NaN


In [12]:

desired = ["XP", "height", "weight"]
df_numbers = df[desired]
# Calculating mean, median, and mode
mean_values = df_numbers.mean()
median_values = df_numbers.median()
mode_values = df_numbers.mode().iloc[0] 

print("Mean values:")
print(f"{mean_values}")
print("Median Values:")
print(f"{median_values}")
print("Mode Values:")
print("{mode_values}")

Mean values:
XP        124.84
height      9.44
weight    234.14
dtype: float64
Median Values:
XP        128.0
height      8.5
weight    127.5
dtype: float64
Mode Values:
{mode_values}


In [13]:


# Calculating variance and standard deviation
variance_val = df_numbers.var()
std_dev_val = df_numbers.std()

print(variance_val)
print(std_dev_val)

XP         4286.912653
height       34.292245
weight    57330.612653
dtype: float64
XP         65.474519
height      5.855958
weight    239.438119
dtype: float64


In [14]:


# Calculating skewness and kurtosis
skewness_val = df_numbers.skew()
kurtosis_val = df_numbers.kurt()

print(skewness_val)
print(kurtosis_val)

XP        0.312624
height    1.872028
weight    1.685002
dtype: float64
XP       -1.252515
height    6.202302
weight    2.422209
dtype: float64
