# Pokémon Simple Beginner Project

---

In [1]:
import pandas as pd

df = pd.read_csv('PokemonData.csv')

In [2]:
df.head(3)

Unnamed: 0,Name,Type,Total,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,url,icon
0,Bulbasaur,"['Grass', 'Poison']",318,45,49,49,65,65,45,https://pokemondb.net/pokedex/bulbasaur,https://img.pokemondb.net/sprites/sword-shield...
1,Ivysaur,"['Grass', 'Poison']",405,60,62,63,80,80,60,https://pokemondb.net/pokedex/ivysaur,https://img.pokemondb.net/sprites/sword-shield...
2,Venusaur,"['Grass', 'Poison']",525,80,82,83,100,100,80,https://pokemondb.net/pokedex/venusaur,https://img.pokemondb.net/sprites/sword-shield...


---

# 1. Remove unwanted columns

In [3]:
df = df.drop(columns = ['url','icon'])
df

Unnamed: 0,Name,Type,Total,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed
0,Bulbasaur,"['Grass', 'Poison']",318,45,49,49,65,65,45
1,Ivysaur,"['Grass', 'Poison']",405,60,62,63,80,80,60
2,Venusaur,"['Grass', 'Poison']",525,80,82,83,100,100,80
3,Charmander,"['Fire', '']",309,39,52,43,60,50,65
4,Charmeleon,"['Fire', '']",405,58,64,58,80,65,80
...,...,...,...,...,...,...,...,...,...
893,Regieleki,"['Electric', '']",580,80,100,50,100,50,200
894,Regidrago,"['Dragon', '']",580,200,100,50,100,50,80
895,Glastrier,"['Ice', '']",580,100,145,130,65,110,30
896,Spectrier,"['Ghost', '']",580,100,65,60,145,80,130


---

# 2. Separate Type column into two

In [4]:
df[['Type 1', 'Type 2']] = df['Type'].str.split(",", expand=True)
    # Type 1 and Type 2 will be the name of the new column
    # str.split(",") comma is used as the delimiter to separate the data
    # str.split(expand=True) the Type 1 and Type 2 will be created as a new column
    
df

Unnamed: 0,Name,Type,Total,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Type 1,Type 2
0,Bulbasaur,"['Grass', 'Poison']",318,45,49,49,65,65,45,['Grass','Poison']
1,Ivysaur,"['Grass', 'Poison']",405,60,62,63,80,80,60,['Grass','Poison']
2,Venusaur,"['Grass', 'Poison']",525,80,82,83,100,100,80,['Grass','Poison']
3,Charmander,"['Fire', '']",309,39,52,43,60,50,65,['Fire','']
4,Charmeleon,"['Fire', '']",405,58,64,58,80,65,80,['Fire','']
...,...,...,...,...,...,...,...,...,...,...,...
893,Regieleki,"['Electric', '']",580,80,100,50,100,50,200,['Electric','']
894,Regidrago,"['Dragon', '']",580,200,100,50,100,50,80,['Dragon','']
895,Glastrier,"['Ice', '']",580,100,145,130,65,110,30,['Ice','']
896,Spectrier,"['Ghost', '']",580,100,65,60,145,80,130,['Ghost','']


---

# 3. Remove unwanted characters (brackets)

In [5]:
df['Type 1'] = df['Type 1'].str.strip(r"[' '")
df['Type 2'] = df['Type 2'].str.strip(r"' ']")
    # .strip(r"' ']") removes the metacharacter/regex characters 
    
df = df.drop(['Type'], axis=1)
     # Remove unwanted columns.

df.head(10)

Unnamed: 0,Name,Total,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Type 1,Type 2
0,Bulbasaur,318,45,49,49,65,65,45,Grass,Poison
1,Ivysaur,405,60,62,63,80,80,60,Grass,Poison
2,Venusaur,525,80,82,83,100,100,80,Grass,Poison
3,Charmander,309,39,52,43,60,50,65,Fire,
4,Charmeleon,405,58,64,58,80,65,80,Fire,
5,Charizard,534,78,84,78,109,85,100,Fire,Flying
6,Squirtle,314,44,48,65,50,64,43,Water,
7,Wartortle,405,59,63,80,65,80,58,Water,
8,Blastoise,530,79,83,100,85,105,78,Water,
9,Caterpie,195,45,30,35,20,20,45,Bug,


---

# 4. Move columns into specified position

In [6]:
# Series Type

# df.insert(position_index, 'New_column_name', column_data)

df.insert(1, "Type 1", df.pop("Type 1"))

df.insert(2, "Type 2", df.pop("Type 2"))

df.insert(9, "Total", df.pop("Total"))

df = df

df

Unnamed: 0,Name,Type 1,Type 2,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Total
0,Bulbasaur,Grass,Poison,45,49,49,65,65,45,318
1,Ivysaur,Grass,Poison,60,62,63,80,80,60,405
2,Venusaur,Grass,Poison,80,82,83,100,100,80,525
3,Charmander,Fire,,39,52,43,60,50,65,309
4,Charmeleon,Fire,,58,64,58,80,65,80,405
...,...,...,...,...,...,...,...,...,...,...
893,Regieleki,Electric,,80,100,50,100,50,200,580
894,Regidrago,Dragon,,200,100,50,100,50,80,580
895,Glastrier,Ice,,100,145,130,65,110,30,580
896,Spectrier,Ghost,,100,65,60,145,80,130,580


---

# 5. Determine the summary statistics.

In [7]:
df.describe().round(2)
    # .describe() returns the summary statistics of the dataframe
    # .round() returns the .describe() in two decimal places 

Unnamed: 0,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Total
count,898.0,898.0,898.0,898.0,898.0,898.0,898.0
mean,69.03,76.59,71.92,69.68,69.91,65.95,423.08
std,26.21,29.78,29.6,29.37,27.09,28.46,112.1
min,1.0,5.0,5.0,10.0,20.0,5.0,175.0
25%,50.0,55.0,50.0,46.25,50.0,45.0,320.0
50%,65.0,75.0,67.0,65.0,65.0,65.0,440.0
75%,80.0,95.0,90.0,90.0,85.0,85.0,500.0
max,255.0,181.0,230.0,173.0,230.0,200.0,720.0


---

# 6. Determine the elements present in Type 1

In [8]:
df[['Type 1']].drop_duplicates().reset_index()
    # .drop_duplicates() returns the values in Type 1 without duplicates
    # .reset_index() resets the index to start in ascending order 

Unnamed: 0,index,Type 1
0,0,Grass
1,3,Fire
2,6,Water
3,9,Bug
4,15,Normal
5,22,Poison
6,24,Electric
7,26,Ground
8,34,Fairy
9,55,Fighting


---

# 7. Determine the count of each elements in Type 1

In [9]:
df[['Type 1']].value_counts()

Type 1  
Water       123
Normal      109
Grass        86
Bug          75
Fire         58
Psychic      58
Rock         50
Electric     49
Fighting     36
Dark         36
Ground       35
Poison       35
Ghost        31
Dragon       31
Steel        30
Ice          28
Fairy        21
Flying        7
dtype: int64

---

# 8. Separate dual elemented from single elemented Pokémons

In [10]:
df['Type 2'] = df['Type 2'].replace('', pd.NA)
    # .replace('', pd.NA) replaces the white space with missing value (NA).

df

Unnamed: 0,Name,Type 1,Type 2,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Total
0,Bulbasaur,Grass,Poison,45,49,49,65,65,45,318
1,Ivysaur,Grass,Poison,60,62,63,80,80,60,405
2,Venusaur,Grass,Poison,80,82,83,100,100,80,525
3,Charmander,Fire,,39,52,43,60,50,65,309
4,Charmeleon,Fire,,58,64,58,80,65,80,405
...,...,...,...,...,...,...,...,...,...,...
893,Regieleki,Electric,,80,100,50,100,50,200,580
894,Regidrago,Dragon,,200,100,50,100,50,80,580
895,Glastrier,Ice,,100,145,130,65,110,30,580
896,Spectrier,Ghost,,100,65,60,145,80,130,580


In [11]:
s_elem = df[df['Type 2'].isna()]

s_elem

Unnamed: 0,Name,Type 1,Type 2,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Total
3,Charmander,Fire,,39,52,43,60,50,65,309
4,Charmeleon,Fire,,58,64,58,80,65,80,405
6,Squirtle,Water,,44,48,65,50,64,43,314
7,Wartortle,Water,,59,63,80,65,80,58,405
8,Blastoise,Water,,79,83,100,85,105,78,530
...,...,...,...,...,...,...,...,...,...,...
890,Kubfu,Fighting,,60,90,60,53,50,72,385
893,Regieleki,Electric,,80,100,50,100,50,200,580
894,Regidrago,Dragon,,200,100,50,100,50,80,580
895,Glastrier,Ice,,100,145,130,65,110,30,580


In [12]:
d_elem = df[df['Type 2'].notna()]

d_elem

Unnamed: 0,Name,Type 1,Type 2,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Total
0,Bulbasaur,Grass,Poison,45,49,49,65,65,45,318
1,Ivysaur,Grass,Poison,60,62,63,80,80,60,405
2,Venusaur,Grass,Poison,80,82,83,100,100,80,525
5,Charizard,Fire,Flying,78,84,78,109,85,100,534
11,Butterfree,Bug,Flying,60,45,50,90,80,70,395
...,...,...,...,...,...,...,...,...,...,...
888,Zamazenta,Fighting,Steel,92,130,145,80,145,128,720
889,Eternatus,Poison,Dragon,140,85,95,145,95,130,690
891,Urshifu,Fighting,Dark,100,130,100,63,60,97,550
892,Zarude,Dark,Grass,105,120,105,70,95,105,600


---

# 9. Sort names in alphabetical order

In [13]:
df.sort_values(by=['Name'], ascending=True)

Unnamed: 0,Name,Type 1,Type 2,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Total
459,Abomasnow,Grass,Ice,90,92,75,92,85,60,494
62,Abra,Psychic,,25,20,15,105,55,90,310
358,Absol,Dark,,65,130,60,75,60,75,465
616,Accelgor,Bug,,80,70,40,100,60,145,495
680,Aegislash,Steel,Ghost,60,50,140,50,140,60,500
...,...,...,...,...,...,...,...,...,...,...
570,Zoroark,Dark,,60,105,60,120,60,105,510
569,Zorua,Dark,,40,65,40,80,40,65,330
40,Zubat,Poison,Flying,40,45,35,30,40,55,245
633,Zweilous,Dark,Dragon,72,85,70,65,70,58,420


---

# 10. Save cleaned dataset

---

# Extra

In [14]:
df.groupby('Type 1')['Total'].sum()


Type 1
Bug         28011
Dark        15708
Dragon      15227
Electric    21295
Fairy        8994
Fighting    15437
Fire        25733
Flying       2940
Ghost       13443
Grass       35081
Ground      14843
Ice         12103
Normal      43231
Poison      14436
Psychic     26087
Rock        21967
Steel       14083
Water       51308
Name: Total, dtype: int64

In [15]:
df.groupby('Type 1')['Total'].agg(sum)

Type 1
Bug         28011
Dark        15708
Dragon      15227
Electric    21295
Fairy        8994
Fighting    15437
Fire        25733
Flying       2940
Ghost       13443
Grass       35081
Ground      14843
Ice         12103
Normal      43231
Poison      14436
Psychic     26087
Rock        21967
Steel       14083
Water       51308
Name: Total, dtype: int64