# Pokemon for Data Science

Using the Pokemon Dataset for playing with and grokking different libraries and exploratory analysis/machine learning (how can I apply ML on Pokemon? No idea (yet)).

Some libraries of my interest:

pandas <br>
numpy <br>
matplotlib <br>
seaborn <br>
bokeh <br>
holoviz (https://github.com/holoviz-community/HoloViz_KDD2022) <br>
scikit-learn <br>
tensorflow <br>
turicreate <br>

#### Notice: I may interchangeably use english and brazilian portuguese in my commentaries, as I don't particularly keep tabs on what language I'm using for thinking. Also, this is supposed to be merely a toy project. Sorry if this may inconvenience you.

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [3]:
df = pd.read_csv('Pokemon.csv')

In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 801 entries, 0 to 800
Data columns (total 41 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   abilities          801 non-null    object 
 1   against_bug        801 non-null    float64
 2   against_dark       801 non-null    float64
 3   against_dragon     801 non-null    float64
 4   against_electric   801 non-null    float64
 5   against_fairy      801 non-null    float64
 6   against_fight      801 non-null    float64
 7   against_fire       801 non-null    float64
 8   against_flying     801 non-null    float64
 9   against_ghost      801 non-null    float64
 10  against_grass      801 non-null    float64
 11  against_ground     801 non-null    float64
 12  against_ice        801 non-null    float64
 13  against_normal     801 non-null    float64
 14  against_poison     801 non-null    float64
 15  against_psychic    801 non-null    float64
 16  against_rock       801 non

We can see here that we have MANY columns... Depending on the analysis we may need to cut some loose in order to better see things.

In [5]:
# getting the columns with null values -> But attention! There may be also other wrong/missing values
df.isnull().sum()[df.isnull().sum()>0]

height_m            20
percentage_male     98
type2              384
weight_kg           20
dtype: int64

In [6]:
columns = df.columns
print(columns.tolist(), '\n'*2 ,'Total columns is ', len(columns))

['abilities', 'against_bug', 'against_dark', 'against_dragon', 'against_electric', 'against_fairy', 'against_fight', 'against_fire', 'against_flying', 'against_ghost', 'against_grass', 'against_ground', 'against_ice', 'against_normal', 'against_poison', 'against_psychic', 'against_rock', 'against_steel', 'against_water', 'attack', 'base_egg_steps', 'base_happiness', 'base_total', 'capture_rate', 'classfication', 'defense', 'experience_growth', 'height_m', 'hp', 'japanese_name', 'name', 'percentage_male', 'pokedex_number', 'sp_attack', 'sp_defense', 'speed', 'type1', 'type2', 'weight_kg', 'generation', 'is_legendary'] 

 Total columns is  41


In [7]:
for col in columns.tolist():
    print(col)

abilities
against_bug
against_dark
against_dragon
against_electric
against_fairy
against_fight
against_fire
against_flying
against_ghost
against_grass
against_ground
against_ice
against_normal
against_poison
against_psychic
against_rock
against_steel
against_water
attack
base_egg_steps
base_happiness
base_total
capture_rate
classfication
defense
experience_growth
height_m
hp
japanese_name
name
percentage_male
pokedex_number
sp_attack
sp_defense
speed
type1
type2
weight_kg
generation
is_legendary


In [8]:
df.head()

Unnamed: 0,abilities,against_bug,against_dark,against_dragon,against_electric,against_fairy,against_fight,against_fire,against_flying,against_ghost,...,percentage_male,pokedex_number,sp_attack,sp_defense,speed,type1,type2,weight_kg,generation,is_legendary
0,"['Overgrow', 'Chlorophyll']",1.0,1.0,1.0,0.5,0.5,0.5,2.0,2.0,1.0,...,88.1,1,65,65,45,grass,poison,6.9,1,0
1,"['Overgrow', 'Chlorophyll']",1.0,1.0,1.0,0.5,0.5,0.5,2.0,2.0,1.0,...,88.1,2,80,80,60,grass,poison,13.0,1,0
2,"['Overgrow', 'Chlorophyll']",1.0,1.0,1.0,0.5,0.5,0.5,2.0,2.0,1.0,...,88.1,3,122,120,80,grass,poison,100.0,1,0
3,"['Blaze', 'Solar Power']",0.5,1.0,1.0,1.0,0.5,1.0,0.5,1.0,1.0,...,88.1,4,60,50,65,fire,,8.5,1,0
4,"['Blaze', 'Solar Power']",0.5,1.0,1.0,1.0,0.5,1.0,0.5,1.0,1.0,...,88.1,5,80,65,80,fire,,19.0,1,0


In [9]:
df.describe()

Unnamed: 0,against_bug,against_dark,against_dragon,against_electric,against_fairy,against_fight,against_fire,against_flying,against_ghost,against_grass,...,height_m,hp,percentage_male,pokedex_number,sp_attack,sp_defense,speed,weight_kg,generation,is_legendary
count,801.0,801.0,801.0,801.0,801.0,801.0,801.0,801.0,801.0,801.0,...,781.0,801.0,703.0,801.0,801.0,801.0,801.0,781.0,801.0,801.0
mean,0.996255,1.057116,0.968789,1.07397,1.068976,1.065543,1.135456,1.192884,0.985019,1.03402,...,1.163892,68.958801,55.155761,401.0,71.305868,70.911361,66.334582,61.378105,3.690387,0.087391
std,0.597248,0.438142,0.353058,0.654962,0.522167,0.717251,0.691853,0.604488,0.558256,0.788896,...,1.080326,26.576015,20.261623,231.373075,32.353826,27.942501,28.907662,109.354766,1.93042,0.282583
min,0.25,0.25,0.0,0.0,0.25,0.0,0.25,0.25,0.0,0.25,...,0.1,1.0,0.0,1.0,10.0,20.0,5.0,0.1,1.0,0.0
25%,0.5,1.0,1.0,0.5,1.0,0.5,0.5,1.0,1.0,0.5,...,0.6,50.0,50.0,201.0,45.0,50.0,45.0,9.0,2.0,0.0
50%,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,65.0,50.0,401.0,65.0,66.0,65.0,27.3,4.0,0.0
75%,1.0,1.0,1.0,1.0,1.0,1.0,2.0,1.0,1.0,1.0,...,1.5,80.0,50.0,601.0,91.0,90.0,85.0,64.8,5.0,0.0
max,4.0,4.0,2.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,...,14.5,255.0,100.0,801.0,194.0,230.0,180.0,999.9,7.0,1.0


In [10]:
df.name

0       Bulbasaur
1         Ivysaur
2        Venusaur
3      Charmander
4      Charmeleon
          ...    
796    Celesteela
797       Kartana
798      Guzzlord
799      Necrozma
800      Magearna
Name: name, Length: 801, dtype: object

In [11]:
# pegando index da coluna against_water para ver até qual coluna devo dropar
df.columns.get_loc('against_water')

18

In [12]:
# df.drop("column_name", axis=1, inplace=True)
# pegando as colunas que quero dropar
columns_to_drop = df.columns.tolist()[1:19]
print(columns_to_drop)

# dropando elas
df_no_against = df.drop(columns_to_drop, axis=1)

['against_bug', 'against_dark', 'against_dragon', 'against_electric', 'against_fairy', 'against_fight', 'against_fire', 'against_flying', 'against_ghost', 'against_grass', 'against_ground', 'against_ice', 'against_normal', 'against_poison', 'against_psychic', 'against_rock', 'against_steel', 'against_water']


In [13]:
df_no_against.head()

Unnamed: 0,abilities,attack,base_egg_steps,base_happiness,base_total,capture_rate,classfication,defense,experience_growth,height_m,...,percentage_male,pokedex_number,sp_attack,sp_defense,speed,type1,type2,weight_kg,generation,is_legendary
0,"['Overgrow', 'Chlorophyll']",49,5120,70,318,45,Seed Pokémon,49,1059860,0.7,...,88.1,1,65,65,45,grass,poison,6.9,1,0
1,"['Overgrow', 'Chlorophyll']",62,5120,70,405,45,Seed Pokémon,63,1059860,1.0,...,88.1,2,80,80,60,grass,poison,13.0,1,0
2,"['Overgrow', 'Chlorophyll']",100,5120,70,625,45,Seed Pokémon,123,1059860,2.0,...,88.1,3,122,120,80,grass,poison,100.0,1,0
3,"['Blaze', 'Solar Power']",52,5120,70,309,45,Lizard Pokémon,43,1059860,0.6,...,88.1,4,60,50,65,fire,,8.5,1,0
4,"['Blaze', 'Solar Power']",64,5120,70,405,45,Flame Pokémon,58,1059860,1.1,...,88.1,5,80,65,80,fire,,19.0,1,0


In [14]:
df_no_against[['abilities','name']].head(20)

Unnamed: 0,abilities,name
0,"['Overgrow', 'Chlorophyll']",Bulbasaur
1,"['Overgrow', 'Chlorophyll']",Ivysaur
2,"['Overgrow', 'Chlorophyll']",Venusaur
3,"['Blaze', 'Solar Power']",Charmander
4,"['Blaze', 'Solar Power']",Charmeleon
5,"['Blaze', 'Solar Power']",Charizard
6,"['Torrent', 'Rain Dish']",Squirtle
7,"['Torrent', 'Rain Dish']",Wartortle
8,"['Torrent', 'Rain Dish']",Blastoise
9,"['Shield Dust', 'Run Away']",Caterpie


In [21]:
df_no_against.shape

(801, 23)

In [22]:
df_no_against.columns

Index(['abilities', 'attack', 'base_egg_steps', 'base_happiness', 'base_total',
       'capture_rate', 'classfication', 'defense', 'experience_growth',
       'height_m', 'hp', 'japanese_name', 'name', 'percentage_male',
       'pokedex_number', 'sp_attack', 'sp_defense', 'speed', 'type1', 'type2',
       'weight_kg', 'generation', 'is_legendary'],
      dtype='object')

In [24]:
# escolhendo uma row de um pokemon específicio do meu interesse
df_no_against.loc[df_no_against.name == "Dragonite", 'hp':'pokedex_number']

Unnamed: 0,hp,japanese_name,name,percentage_male,pokedex_number
148,91,Kairyuカイリュー,Dragonite,50.0,149


In [29]:
# Select multiple rows based on a list of names (specific values)
# df.loc[df['column_name'].isin(some_values)]
df_no_against.loc[df_no_against['name'].isin(["Charmander", "Squirtle", "Dragonair", "Mew"])]

# NOTE: se quiser usar múltiplas condições df.loc[(df['column_name'] >= A) & (df['column_name'] <= B)]

Unnamed: 0,abilities,attack,base_egg_steps,base_happiness,base_total,capture_rate,classfication,defense,experience_growth,height_m,...,percentage_male,pokedex_number,sp_attack,sp_defense,speed,type1,type2,weight_kg,generation,is_legendary
3,"['Blaze', 'Solar Power']",52,5120,70,309,45,Lizard Pokémon,43,1059860,0.6,...,88.1,4,60,50,65,fire,,8.5,1,0
6,"['Torrent', 'Rain Dish']",48,5120,70,314,45,Tiny Turtle Pokémon,65,1059860,0.5,...,88.1,7,50,64,43,water,,9.0,1,0
147,"['Shed Skin', 'Marvel Scale']",84,10240,35,420,45,Dragon Pokémon,65,1250000,4.0,...,50.0,148,70,70,70,dragon,,16.5,1,0
150,['Synchronize'],100,30720,100,600,45,New Species Pokémon,100,1059860,0.4,...,,151,100,100,100,psychic,,4.0,1,1


In [50]:
# DESMEMBRANDO O COMANDO ANTERIOR df_no_against.loc[df_no_against['name'].isin(["Charmander", "Squirtle", "Dragonair", "Mew"])]

df_no_against['name'].isin(["Charmander", "Squirtle", "Dragonair", "Mew"])

0      False
1      False
2      False
3       True
4      False
       ...  
796    False
797    False
798    False
799    False
800    False
Name: name, Length: 801, dtype: bool

isin pega o df_no_against['name'] (que no caso é uma Series), e testa cada elemento para ver se pertence à nossa lista de nomes especificados. Caso sim, ele retorna True para este valor específico em um novo dataframe/Series, caso não, retorna false. 

Ou seja, no final, teremos uma Series com False, False, False,...., True, False, etc.

Ao "alimentarmos" isso em df_no_against.loc[nosso returned Series aqui], o loc vai retornar apenas as rows de nosso interesse.

In [32]:
df_no_against.loc[df_no_against['name'].isin(["Charmander", "Squirtle", "Dragonair", "Mew"])]

Unnamed: 0,abilities,attack,base_egg_steps,base_happiness,base_total,capture_rate,classfication,defense,experience_growth,height_m,...,percentage_male,pokedex_number,sp_attack,sp_defense,speed,type1,type2,weight_kg,generation,is_legendary
3,"['Blaze', 'Solar Power']",52,5120,70,309,45,Lizard Pokémon,43,1059860,0.6,...,88.1,4,60,50,65,fire,,8.5,1,0
6,"['Torrent', 'Rain Dish']",48,5120,70,314,45,Tiny Turtle Pokémon,65,1059860,0.5,...,88.1,7,50,64,43,water,,9.0,1,0
147,"['Shed Skin', 'Marvel Scale']",84,10240,35,420,45,Dragon Pokémon,65,1250000,4.0,...,50.0,148,70,70,70,dragon,,16.5,1,0
150,['Synchronize'],100,30720,100,600,45,New Species Pokémon,100,1059860,0.4,...,,151,100,100,100,psychic,,4.0,1,1


In [63]:
# Vamos criar uma lista, para então converter em Series, para então alimentar um .loc para retornarmos valores específicos (no caso,pokemons com index par)
x = []
for i in range(0, 801):
    x.append((bool(i%2))) # Obs! Aqui temos que converter 1 e 0 para True e False! Senão o .loc interpreta errado, vai entender como index!
print(x)

[False, True, False, True, False, True, False, True, False, True, False, True, False, True, False, True, False, True, False, True, False, True, False, True, False, True, False, True, False, True, False, True, False, True, False, True, False, True, False, True, False, True, False, True, False, True, False, True, False, True, False, True, False, True, False, True, False, True, False, True, False, True, False, True, False, True, False, True, False, True, False, True, False, True, False, True, False, True, False, True, False, True, False, True, False, True, False, True, False, True, False, True, False, True, False, True, False, True, False, True, False, True, False, True, False, True, False, True, False, True, False, True, False, True, False, True, False, True, False, True, False, True, False, True, False, True, False, True, False, True, False, True, False, True, False, True, False, True, False, True, False, True, False, True, False, True, False, True, False, True, False, True, False, True

In [65]:
# Agora podemos inserir essa lista de True/False dentro do nosso .loc, para retornar apenas as colunas como True
df_no_against.loc[x]

Unnamed: 0,abilities,attack,base_egg_steps,base_happiness,base_total,capture_rate,classfication,defense,experience_growth,height_m,...,percentage_male,pokedex_number,sp_attack,sp_defense,speed,type1,type2,weight_kg,generation,is_legendary
1,"['Overgrow', 'Chlorophyll']",62,5120,70,405,45,Seed Pokémon,63,1059860,1.0,...,88.1,2,80,80,60,grass,poison,13.0,1,0
3,"['Blaze', 'Solar Power']",52,5120,70,309,45,Lizard Pokémon,43,1059860,0.6,...,88.1,4,60,50,65,fire,,8.5,1,0
5,"['Blaze', 'Solar Power']",104,5120,70,634,45,Flame Pokémon,78,1059860,1.7,...,88.1,6,159,115,100,fire,flying,90.5,1,0
7,"['Torrent', 'Rain Dish']",63,5120,70,405,45,Turtle Pokémon,80,1059860,1.0,...,88.1,8,65,80,58,water,,22.5,1,0
9,"['Shield Dust', 'Run Away']",30,3840,70,195,255,Worm Pokémon,35,1000000,0.3,...,50.0,10,20,20,45,bug,,2.9,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
791,['Shadow Shield'],113,30720,0,680,45,Moone Pokémon,89,1250000,4.0,...,,792,137,107,97,psychic,ghost,120.0,7,1
793,['Beast Boost'],139,30720,0,570,25,Swollen Pokémon,139,1250000,2.4,...,,794,53,53,79,bug,fighting,333.6,7,1
795,['Beast Boost'],89,30720,0,570,30,Glowing Pokémon,71,1250000,3.8,...,,796,173,71,83,electric,,100.0,7,1
797,['Beast Boost'],181,30720,0,570,255,Drawn Sword Pokémon,131,1250000,0.3,...,,798,59,31,109,grass,steel,0.1,7,1


In [66]:
# Poderíamos também converter o x em uma Series e jogar dentro do .loc
mySeries = pd.Series(x) 
mySeries

0      False
1       True
2      False
3       True
4      False
       ...  
796    False
797     True
798    False
799     True
800    False
Length: 801, dtype: bool

In [68]:
df_no_against.loc[mySeries]

Unnamed: 0,abilities,attack,base_egg_steps,base_happiness,base_total,capture_rate,classfication,defense,experience_growth,height_m,...,percentage_male,pokedex_number,sp_attack,sp_defense,speed,type1,type2,weight_kg,generation,is_legendary
1,"['Overgrow', 'Chlorophyll']",62,5120,70,405,45,Seed Pokémon,63,1059860,1.0,...,88.1,2,80,80,60,grass,poison,13.0,1,0
3,"['Blaze', 'Solar Power']",52,5120,70,309,45,Lizard Pokémon,43,1059860,0.6,...,88.1,4,60,50,65,fire,,8.5,1,0
5,"['Blaze', 'Solar Power']",104,5120,70,634,45,Flame Pokémon,78,1059860,1.7,...,88.1,6,159,115,100,fire,flying,90.5,1,0
7,"['Torrent', 'Rain Dish']",63,5120,70,405,45,Turtle Pokémon,80,1059860,1.0,...,88.1,8,65,80,58,water,,22.5,1,0
9,"['Shield Dust', 'Run Away']",30,3840,70,195,255,Worm Pokémon,35,1000000,0.3,...,50.0,10,20,20,45,bug,,2.9,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
791,['Shadow Shield'],113,30720,0,680,45,Moone Pokémon,89,1250000,4.0,...,,792,137,107,97,psychic,ghost,120.0,7,1
793,['Beast Boost'],139,30720,0,570,25,Swollen Pokémon,139,1250000,2.4,...,,794,53,53,79,bug,fighting,333.6,7,1
795,['Beast Boost'],89,30720,0,570,30,Glowing Pokémon,71,1250000,3.8,...,,796,173,71,83,electric,,100.0,7,1
797,['Beast Boost'],181,30720,0,570,255,Drawn Sword Pokémon,131,1250000,0.3,...,,798,59,31,109,grass,steel,0.1,7,1
