# Loading Data into Pandas

In [2]:
import pandas as pd

In [53]:
# Data frame is a main object in pandas. It is used to represent data with rows and columns (tabular or excel spreadsheet data).

df = pd.read_csv("pokemon_data.csv") # header=None is used to remove the header from the data
print(df.head(5))

   #                   Name Type 1  Type 2  HP  Attack  Defense  Sp. Atk  \
0  1              Bulbasaur  Grass  Poison  45      49       49       65   
1  2                Ivysaur  Grass  Poison  60      62       63       80   
2  3               Venusaur  Grass  Poison  80      82       83      100   
3  3  VenusaurMega Venusaur  Grass  Poison  80     100      123      122   
4  4             Charmander   Fire     NaN  39      52       43       60   

   Sp. Def  Speed  Generation  Legendary  
0       65     45           1      False  
1       80     60           1      False  
2      100     80           1      False  
3      120     80           1      False  
4       50     65           1      False  


In [ ]:
# Pandas can also read data from excel files, like so:

df_xlsx = pd.read_excel("pokemon_data.xlsx", header=None)
print(df_xlsx.head(3)) # .head() is used to display the first 5 rows of the data

In [50]:
#

df_txt = pd.read_csv("pokemon_data.txt", header=None, delimiter="\t") # delimiter is used to specify the separator in the data, we need to specify the delimiter in this case because the data is in a text file, the \t is used to specify that the data is separated by tabs

print(df_txt)

      0                      1        2       3   4       5        6   \
0      #                   Name   Type 1  Type 2  HP  Attack  Defense   
1      1              Bulbasaur    Grass  Poison  45      49       49   
2      2                Ivysaur    Grass  Poison  60      62       63   
3      3               Venusaur    Grass  Poison  80      82       83   
4      3  VenusaurMega Venusaur    Grass  Poison  80     100      123   
..   ...                    ...      ...     ...  ..     ...      ...   
796  719                Diancie     Rock   Fairy  50     100      150   
797  719    DiancieMega Diancie     Rock   Fairy  50     160      110   
798  720    HoopaHoopa Confined  Psychic   Ghost  80     110       60   
799  720     HoopaHoopa Unbound  Psychic    Dark  80     160       60   
800  721              Volcanion     Fire   Water  80     110      120   

          7        8      9           10         11  
0    Sp. Atk  Sp. Def  Speed  Generation  Legendary  
1         65   

# Reading Data in Pandas

In [72]:
# Read Headers
print("I'm the file who contains the headers, and they are: \n", df.columns) # Interesting fact, if you declare "header=None" when loading the data, the columns will be numbered from 0 to n-1, like below:

print("\n -------------------- \n")

df_noHeader = pd.read_csv("pokemon_data.csv", header=None)
print("I'm the one who was dfined with the 'header=None' statement, that's my output: \n", df_noHeader.columns) # notice about the dtype, it was an Object, now it's an int64

I'm the file who contains the headers, and they are: 
 Index(['#', 'Name', 'Type 1', 'Type 2', 'HP', 'Attack', 'Defense', 'Sp. Atk',
       'Sp. Def', 'Speed', 'Generation', 'Legendary'],
      dtype='object')

 -------------------- 

I'm the one who was dfined with the 'header=None' statement, that's my output: 
 Index([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11], dtype='int64')


In [67]:
# Read each column
print(df["Name"]) # This is used to display the column "Name" values.

print("\n -------------------- \n")

# Read the head of 5 values of a column
print(df["Name"][0:5]) # This is used to display the first 5 values of the column

print("\n -------------------- \n")

# Read multiple columns
print(df[['Name', 'Type 1', 'HP']]) # This will display the columns "Name", "Type 1" and "HP"

0                  Bulbasaur
1                    Ivysaur
2                   Venusaur
3      VenusaurMega Venusaur
4                 Charmander
               ...          
795                  Diancie
796      DiancieMega Diancie
797      HoopaHoopa Confined
798       HoopaHoopa Unbound
799                Volcanion
Name: Name, Length: 800, dtype: object

 -------------------- 

0                Bulbasaur
1                  Ivysaur
2                 Venusaur
3    VenusaurMega Venusaur
4               Charmander
Name: Name, dtype: object
                      Name   Type 1  HP
0                Bulbasaur    Grass  45
1                  Ivysaur    Grass  60
2                 Venusaur    Grass  80
3    VenusaurMega Venusaur    Grass  80
4               Charmander     Fire  39
..                     ...      ...  ..
795                Diancie     Rock  50
796    DiancieMega Diancie     Rock  50
797    HoopaHoopa Confined  Psychic  80
798     HoopaHoopa Unbound  Psychic  80
799             

In [79]:
# Read each row
print(df.iloc[1]) # This will display the first 4 rows of the data *iloc is used to display the rows by index {iloc = Integer Location}

print("\n -------------------- \n")

# Multiple rows
print(df.iloc[1:4])

print("\n -------------------- \n")

# Read a specific location (R,C)
print(df.iloc[2, 1]) # This will display the value in the row 2 and column 1

print("\n -------------------- \n")

# Recommended way to iterate over the rows
for index, row in df.iterrows():
    # print(index,row) # This will display the index and the row, that's not pretty, so we can do it like below:
    
    print(index, row['Name']) # This will display the index and the value of the column "Name" in the row
    
print("\n -------------------- \n")

# Read a specific row, where all the Pokémon are of the type "Fire"
firePokemons = df.loc[df['Type 1'] == "Fire"]  # This will display the rows where the value of the column "Type 1" is "Fire"
print(firePokemons)

print("\n -------------------- \n")

grassPokemons = df.loc[df['Type 1'] == "Grass"]
print(grassPokemons)


#                   2
Name          Ivysaur
Type 1          Grass
Type 2         Poison
HP                 60
Attack             62
Defense            63
Sp. Atk            80
Sp. Def            80
Speed              60
Generation          1
Legendary       False
Name: 1, dtype: object

 -------------------- 

   #                   Name Type 1  Type 2  HP  Attack  Defense  Sp. Atk  \
1  2                Ivysaur  Grass  Poison  60      62       63       80   
2  3               Venusaur  Grass  Poison  80      82       83      100   
3  3  VenusaurMega Venusaur  Grass  Poison  80     100      123      122   

   Sp. Def  Speed  Generation  Legendary  
1       80     60           1      False  
2      100     80           1      False  
3      120     80           1      False  

 -------------------- 

Venusaur

 -------------------- 

0 Bulbasaur
1 Ivysaur
2 Venusaur
3 VenusaurMega Venusaur
4 Charmander
5 Charmeleon
6 Charizard
7 CharizardMega Charizard X
8 CharizardMega Charizard Y
9