# Intro to Data Science with Pokemon
***

### Arrays

In [1]:
food = ["apple",  "banana", "cherry", "grape"]

In [2]:
print(food)

['apple', 'banana', 'cherry', 'grape']


In [3]:
print(food[2])

cherry


In [4]:
sliced_food_list = food[0:3]

In [5]:
print(sliced_food_list)

['apple', 'banana', 'cherry']


### Import Libraries
***

In [6]:
import pandas as pd
import numpy as py
import seaborn as sb

In [7]:
print("PANDAS version" + pd.__version__)
print("numpy version" + py.__version__)
print("Seaborn version" + sb.__version__)

PANDAS version2.2.3
numpy version2.1.1
Seaborn version0.13.2


### Loading Data
***

In [8]:
df = pd.read_csv("pokemon.csv")

### Displaying Full Data Frames
***

In [9]:
#pd.set_option("display.max_rows", None) 

In [10]:
df.head()

Unnamed: 0,Per,Nat,Pokemon,HP,Atk,Def,SpA,SpD,Spe,Total,...,Catch,EXP,Evolve,Per.1,Nat.1,Joh,Hoe,Sin,Un,Pokemon.1
0,1.0,1.0,Bulbasaur,45,49,49,65,65,45,318,...,45,1059860,,1.0,1.0,226.0,,,,Bulbasaur
1,2.0,2.0,Ivysaur,60,62,63,80,80,60,405,...,45,1059860,Lv. 16,2.0,2.0,227.0,,,,Ivysaur
2,3.0,3.0,Venusaur,80,82,83,100,100,80,525,...,45,1059860,Lv. 32,3.0,3.0,228.0,,,,Venusaur
3,4.0,4.0,Charmander,39,52,43,60,50,65,309,...,45,1059860,,4.0,4.0,229.0,,,,Charmander
4,5.0,5.0,Charmeleon,58,64,58,80,65,80,405,...,45,1059860,Lv. 16,5.0,5.0,230.0,,,,Charmeleon


This is the pokemon data set, note that all pokemon are present except the latest new generation

### Accessing Columns
***

In [11]:
pokemon_column = df.Pokemon

In [12]:
pokemon_column[0]

'Bulbasaur'

In [13]:
pokemon_column[0:3]

0    Bulbasaur
1      Ivysaur
2     Venusaur
Name: Pokemon, dtype: object

### Accessing Multiple Columns 
***

You can access multiple columns by using a variable 

In [17]:
pkm_atk_hp = df[['Pokemon', 'HP', 'Atk']] 

In [18]:
pkm_atk_hp.head(10)

Unnamed: 0,Pokemon,HP,Atk
0,Bulbasaur,45,49
1,Ivysaur,60,62
2,Venusaur,80,82
3,Charmander,39,52
4,Charmeleon,58,64
5,Charizard,78,84
6,Squirtle,44,48
7,Wartortle,59,63
8,Blastoise,79,83
9,Caterpie,45,30


### Accessing Rows 
***

You can access rows by using `iloc[]` which allows you to specify index location for a row. 

In [19]:
bulbasaur_row = df.iloc[0]

In [20]:
print(bulbasaur_row)

Per                       1.0
Nat                       1.0
Pokemon             Bulbasaur
HP                         45
Atk                        49
Def                        49
SpA                        65
SpD                        65
Spe                        45
Total                     318
Type I                  Grass
Type II                Poison
Tier                       NU
Ability I            Overgrow
Ability II                NaN
Hidden Ability    Chlorophyll
Mass                   6.9 kG
LK/GK                      20
EV Worth                1 SpA
EXPV                       64
Color                   Green
Hatch                    5120
Gender              M (87.5%)
Egg Group I           Monster
Egg Group II            Grass
Catch                      45
EXP                   1059860
Evolve                    NaN
Per.1                     1.0
Nat.1                     1.0
Joh                     226.0
Hoe                       NaN
Sin                       NaN
Un        

### Useful PANDAS Methods 
***

In [22]:
df.Pokemon.sort_values() #sorts values of the pokemon column

464     Abomasnow
62           Abra
358         Absol
629      Accelgor
141    Aerodactyl
          ...    
262     Zigzagoon
583       Zoroark
582         Zorua
40          Zubat
646      Zweilous
Name: Pokemon, Length: 663, dtype: object

In [24]:
df.Pokemon[0:10].sort_values() #sorts index 0 to 10 alphabetically 

8     Blastoise
0     Bulbasaur
9      Caterpie
5     Charizard
3    Charmander
4    Charmeleon
1       Ivysaur
6      Squirtle
2      Venusaur
7     Wartortle
Name: Pokemon, dtype: object

In [25]:
df.HP.mean() #Gets mean HP value

np.float64(68.34087481146305)

In [26]:
df.SpA.describe()

count    663.000000
mean      68.962293
std       29.246841
min       10.000000
25%       45.000000
50%       65.000000
75%       90.000000
max      180.000000
Name: SpA, dtype: float64

### Filtering Data
***

In [29]:
chu_pokemon = df[df['Pokemon'].str.contains('chu')] #Looks for 'chu'

In [28]:
print(chu_pokemon)

       Per    Nat   Pokemon  HP  Atk  Def  SpA  SpD  Spe  Total  ... Catch  \
24    26.0   25.0   Pikachu  35   55   30   50   40   90    300  ...   190   
25    27.0   26.0    Raichu  60   90   55   90   80  100    475  ...    75   
171   25.0  172.0     Pichu  20   40   15   35   35   60    205  ...   190   
237  143.0  238.0  Smoochum  45   30   15   85   65   65    305  ...    45   

         EXP        Evolve  Per.1  Nat.1    Joh    Hoe    Sin  Un  Pokemon.1  
24   1000000    Friendship   26.0   25.0   22.0  156.0  104.0 NaN    Pikachu  
25   1000000  Thunderstone   27.0   26.0   23.0  157.0  105.0 NaN     Raichu  
171  1000000           NaN   25.0  172.0   21.0  155.0  103.0 NaN      Pichu  
237  1000000           NaN  143.0  238.0  152.0    NaN    NaN NaN   Smoochum  

[4 rows x 35 columns]


In [30]:
spec_atk_70 = df[df['SpA']== 70]

In [31]:
print(spec_atk_70)

       Per    Nat     Pokemon   HP  Atk  Def  SpA  SpD  Spe  Total  ... Catch  \
17    18.0   18.0     Pidgeot   83   80   75   70   70   91    469  ...    45   
57    63.0   58.0   Growlithe   55   70   45   70   50   60    350  ...   190   
61    67.0   62.0   Poliwrath   90   85   95   70   90   70    500  ...    45   
68    75.0   69.0  Bellsprout   50   75   35   70   30   40    300  ...   255   
86    95.0   87.0     Dewgong   90   70   80   70   95   70    475  ...    75   
115  132.0  116.0      Horsea   30   40   70   70   25   60    295  ...   225   
119  137.0  120.0      Staryu   30   45   55   70   55   85    340  ...   225   
147  179.0  148.0   Dragonair   61   84   65   70   70   70    420  ...    45   
168   46.0  169.0      Crobat   85   90   80   70   80  130    535  ...    90   
176  205.0  177.0        Natu   40   50   45   70   45   70    320  ...   190   
217  248.0  218.0      Slugma   40   40   40   70   40   20    250  ...   190   
239  148.0  240.0       Magb