In [1]:
import pandas as pd

import warnings
warnings.filterwarnings('ignore')

## About Dataset

This data set includes 721 Pokemon, including their number, name, first and second type, and basic stats: HP, Attack, Defense, Special Attack, Special Defense, and Speed.

- #: ID for each pokemon
- Name: Name of each pokemon
- Type 1: Each pokemon has a type, this determines weakness/resistance to attacks
- Type 2: Some pokemon are dual type and have 2
- Total: sum of all stats that come after this, a general guide to how strong a pokemon is
- HP: hit points, or health, defines how much damage a pokemon can withstand before fainting
- Attack: the base modifier for normal attacks (eg. Scratch, Punch)
- Defense: the base damage resistance against normal attacks
- SP Atk: special attack, the base modifier for special attacks (e.g. fire blast, bubble beam)
- SP Def: the base damage resistance against special attacks
- Speed: determines which pokemon attacks first each round

In [2]:
pokemon = pd.read_csv("Pokemon.csv", index_col=0)
pokemon.head()

Unnamed: 0_level_0,Name,Type 1,Type 2,Total,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary
#,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
1,Bulbasaur,Grass,Poison,318,45,49,49,65,65,45,1,False
2,Ivysaur,Grass,Poison,405,60,62,63,80,80,60,1,False
3,Venusaur,Grass,Poison,525,80,82,83,100,100,80,1,False
3,VenusaurMega Venusaur,Grass,Poison,625,80,100,123,122,120,80,1,False
4,Charmander,Fire,,309,39,52,43,60,50,65,1,False


In [3]:
# Tidy Data

melted = pd.melt(frame=pokemon.head(), id_vars='Name', value_vars=['Attack','Defense'])
melted

Unnamed: 0,Name,variable,value
0,Bulbasaur,Attack,49
1,Ivysaur,Attack,62
2,Venusaur,Attack,82
3,VenusaurMega Venusaur,Attack,100
4,Charmander,Attack,52
5,Bulbasaur,Defense,49
6,Ivysaur,Defense,63
7,Venusaur,Defense,83
8,VenusaurMega Venusaur,Defense,123
9,Charmander,Defense,43


In [4]:
# Pivoting Data

melted.pivot(index='Name', columns='variable', values='value')

variable,Attack,Defense
Name,Unnamed: 1_level_1,Unnamed: 2_level_1
Bulbasaur,49,49
Charmander,52,43
Ivysaur,62,63
Venusaur,82,83
VenusaurMega Venusaur,100,123


In [5]:
data1 = pokemon.head()
data2 = pokemon.tail()
concat_data = pd.concat([data1, data2], axis=0, ignore_index=True)
concat_data

Unnamed: 0,Name,Type 1,Type 2,Total,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary
0,Bulbasaur,Grass,Poison,318,45,49,49,65,65,45,1,False
1,Ivysaur,Grass,Poison,405,60,62,63,80,80,60,1,False
2,Venusaur,Grass,Poison,525,80,82,83,100,100,80,1,False
3,VenusaurMega Venusaur,Grass,Poison,625,80,100,123,122,120,80,1,False
4,Charmander,Fire,,309,39,52,43,60,50,65,1,False
5,Diancie,Rock,Fairy,600,50,100,150,100,150,50,6,True
6,DiancieMega Diancie,Rock,Fairy,700,50,160,110,160,110,110,6,True
7,HoopaHoopa Confined,Psychic,Ghost,600,80,110,60,150,130,70,6,True
8,HoopaHoopa Unbound,Psychic,Dark,680,80,160,60,170,130,80,6,True
9,Volcanion,Fire,Water,600,80,110,120,130,90,70,6,True


In [6]:
# Data Types

pokemon.dtypes

Name          object
Type 1        object
Type 2        object
Total          int64
HP             int64
Attack         int64
Defense        int64
Sp. Atk        int64
Sp. Def        int64
Speed          int64
Generation     int64
Legendary       bool
dtype: object

In [7]:
# lets convert data types

pokemon['Type 1'] = pokemon['Type 1'].astype('category')
pokemon['Speed'] = pokemon['Speed'].astype('float')
pokemon['Attack'] = pokemon['Attack'].astype('float')

In [8]:
pokemon.dtypes

Name            object
Type 1        category
Type 2          object
Total            int64
HP               int64
Attack         float64
Defense          int64
Sp. Atk          int64
Sp. Def          int64
Speed          float64
Generation       int64
Legendary         bool
dtype: object

In [9]:
# Missing data

pokemon.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 800 entries, 1 to 721
Data columns (total 12 columns):
 #   Column      Non-Null Count  Dtype   
---  ------      --------------  -----   
 0   Name        800 non-null    object  
 1   Type 1      800 non-null    category
 2   Type 2      414 non-null    object  
 3   Total       800 non-null    int64   
 4   HP          800 non-null    int64   
 5   Attack      800 non-null    float64 
 6   Defense     800 non-null    int64   
 7   Sp. Atk     800 non-null    int64   
 8   Sp. Def     800 non-null    int64   
 9   Speed       800 non-null    float64 
 10  Generation  800 non-null    int64   
 11  Legendary   800 non-null    bool    
dtypes: bool(1), category(1), float64(2), int64(6), object(2)
memory usage: 71.0+ KB


In [10]:
pokemon["Type 2"].value_counts(dropna=False)

NaN         386
Flying       97
Ground       35
Poison       34
Psychic      33
Fighting     26
Grass        25
Fairy        23
Steel        22
Dark         20
Dragon       18
Ice          14
Rock         14
Water        14
Ghost        14
Fire         12
Electric      6
Normal        4
Bug           3
Name: Type 2, dtype: int64

In [11]:
# Lets drop nan values

data = pokemon.dropna(subset=['Type 2',])
assert data['Type 2'].notnull().all() # return nothing because it is true

In [12]:
data.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 414 entries, 1 to 721
Data columns (total 12 columns):
 #   Column      Non-Null Count  Dtype   
---  ------      --------------  -----   
 0   Name        414 non-null    object  
 1   Type 1      414 non-null    category
 2   Type 2      414 non-null    object  
 3   Total       414 non-null    int64   
 4   HP          414 non-null    int64   
 5   Attack      414 non-null    float64 
 6   Defense     414 non-null    int64   
 7   Sp. Atk     414 non-null    int64   
 8   Sp. Def     414 non-null    int64   
 9   Speed       414 non-null    float64 
 10  Generation  414 non-null    int64   
 11  Legendary   414 non-null    bool    
dtypes: bool(1), category(1), float64(2), int64(6), object(2)
memory usage: 37.1+ KB
