# Lesson 18c: Data Series - continuation 

## Import libraries

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import math as math

## Load the data

In [7]:
pd.read_csv("pokemon.csv")

# We use the parameter "index_col" to choose which column will be our index 
# (remember that this column must be also added to our series)

ser_1 = pd.read_csv("pokemon.csv", usecols = ["Attack", "#"], index_col = "#").squeeze()

In [9]:
ser_1[2:7]

#
3     82
3    100
4     52
5     64
6     84
Name: Attack, dtype: int64

In [12]:
ser_2 = pd.read_csv("pokemon.csv", usecols = ["Name", "Attack"], index_col = "Name").squeeze()
ser_2.head()

Name
Bulbasaur                 49
Ivysaur                   62
Venusaur                  82
VenusaurMega Venusaur    100
Charmander                52
Name: Attack, dtype: int64

In [13]:
ser_2.count()

800

In [19]:
ser_2.value_counts()

100    40
65     39
50     37
80     37
85     33
       ..
46      1
190     1
106     1
132     1
33      1
Name: Attack, Length: 111, dtype: int64

In [16]:
ser_2.idxmin()  # Findig an index for which the value is minimal.

'Chansey'

In [21]:
ser_2.loc[ser_2.idxmin()]  # Displaying the value for the index found before.

5

In [20]:
ser_2.idxmax()

'MewtwoMega Mewtwo X'

In [22]:
ser_2.loc[ser_2.idxmax()]

190

In [23]:
ser_2.mean()

79.00125

In [25]:
ser_2.median()

75.0

In [26]:
ser_2.std()

32.45736586949843

## Data modification

In [27]:
ser_2.head()

Name
Bulbasaur                 49
Ivysaur                   62
Venusaur                  82
VenusaurMega Venusaur    100
Charmander                52
Name: Attack, dtype: int64

In [29]:
# Multiplying the values by some number:
ser_2_mult = ser_2 * 100
ser_2_mult

Name
Bulbasaur                 4900
Ivysaur                   6200
Venusaur                  8200
VenusaurMega Venusaur    10000
Charmander                5200
                         ...  
Diancie                  10000
DiancieMega Diancie      16000
HoopaHoopa Confined      11000
HoopaHoopa Unbound       16000
Volcanion                11000
Name: Attack, Length: 800, dtype: int64

In [33]:
# Working not with numbers but with cathegorical values:

ser_3 = pd.read_csv("pokemon.csv", usecols = ["Type 1", "Name"], index_col = "Name").squeeze()
ser_3.head()

Name
Bulbasaur                Grass
Ivysaur                  Grass
Venusaur                 Grass
VenusaurMega Venusaur    Grass
Charmander                Fire
Name: Type 1, dtype: object

In [35]:
# If we have words and not number we need to add "str" after the series. 
# Then we can modify the values, for example, we can change all letters into upper case:

ser_3.str.upper()

Name
Bulbasaur                  GRASS
Ivysaur                    GRASS
Venusaur                   GRASS
VenusaurMega Venusaur      GRASS
Charmander                  FIRE
                          ...   
Diancie                     ROCK
DiancieMega Diancie         ROCK
HoopaHoopa Confined      PSYCHIC
HoopaHoopa Unbound       PSYCHIC
Volcanion                   FIRE
Name: Type 1, Length: 800, dtype: object

In [39]:
# If I want to add some word to all values:

"TYPE:" + ser_3.str.upper()

Name
Bulbasaur                  TYPE:GRASS
Ivysaur                    TYPE:GRASS
Venusaur                   TYPE:GRASS
VenusaurMega Venusaur      TYPE:GRASS
Charmander                  TYPE:FIRE
                             ...     
Diancie                     TYPE:ROCK
DiancieMega Diancie         TYPE:ROCK
HoopaHoopa Confined      TYPE:PSYCHIC
HoopaHoopa Unbound       TYPE:PSYCHIC
Volcanion                   TYPE:FIRE
Name: Type 1, Length: 800, dtype: object

In [42]:
ser_3.value_counts()

Water       112
Normal       98
Grass        70
Bug          69
Psychic      57
Fire         52
Electric     44
Rock         44
Dragon       32
Ground       32
Ghost        32
Dark         31
Poison       28
Steel        27
Fighting     27
Ice          24
Fairy        17
Flying        4
Name: Type 1, dtype: int64

In [51]:
# If we want to change some values of the series into something different we can define a function:

def Replace(old_word):
    if old_word == "Grass" or old_word == "Ground":
        return "Nature"
    else:
        return old_word

In [53]:
Replace("Ground")

'Nature'

In [54]:
# If we want to use this new-defined function to all values of the series we have to the method apply():
ser_3.apply(Replace)

Name
Bulbasaur                 Nature
Ivysaur                   Nature
Venusaur                  Nature
VenusaurMega Venusaur     Nature
Charmander                  Fire
                          ...   
Diancie                     Rock
DiancieMega Diancie         Rock
HoopaHoopa Confined      Psychic
HoopaHoopa Unbound       Psychic
Volcanion                   Fire
Name: Type 1, Length: 800, dtype: object

In [56]:
# Now we want again change the letters of values into upper case. 
# We use "lambda" expression. Then we do not have to define a function outside the series,
# but it works inside it with simplified structure:

ser_3.apply(lambda word: word.upper())

Name
Bulbasaur                  GRASS
Ivysaur                    GRASS
Venusaur                   GRASS
VenusaurMega Venusaur      GRASS
Charmander                  FIRE
                          ...   
Diancie                     ROCK
DiancieMega Diancie         ROCK
HoopaHoopa Confined      PSYCHIC
HoopaHoopa Unbound       PSYCHIC
Volcanion                   FIRE
Name: Type 1, Length: 800, dtype: object

## Map method

In [62]:
# Map works as JOIN in SQL.

# We need to define 2 series:

data_1 = [3,3,4,1,3,2]
index_1 = ["Andy", "Bod", "Kate", "Dirk", "Emma", "Greg"]

team = pd.Series(data_1, index_1)
team

Andy    3
Bod     3
Kate    4
Dirk    1
Emma    3
Greg    2
dtype: int64

In [60]:
data_2 = ["C", "B", "A", "A+"]
index_2 = [1, 2, 3, 4]

notes = pd.Series(data_2, index_2)
notes

1     C
2     B
3     A
4    A+
dtype: object

In [64]:
team.map(notes)  # This combines two series by matching the same numbers.

# Instead of the series "notes" I could define a dictionary and I would get the same result.

Andy     A
Bod      A
Kate    A+
Dirk     C
Emma     A
Greg     B
dtype: object