In [8]:
import pandas as pd



# Series is kind of a mix of python list and dictionary

## Generate Series from a list

In [4]:
weekdays = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday']
# generates a Series object (index and value)
# by default, index is zero-based value
# dtype of object is string
pd.Series(weekdays)


0       Monday
1      Tuesday
2    Wednesday
3     Thursday
4       Friday
dtype: object

In [5]:
numbers = [1, 5, 65, 78]
pd.Series(numbers)

0     1
1     5
2    65
3    78
dtype: int64

In [6]:
x = [True, False, True, False, True]
pd.Series(x)

0     True
1    False
2     True
3    False
4     True
dtype: bool

## Generate Series from Dictionary

In [7]:
d={"Mon": 1223, "Tue": 3432, "Wed": 2213}
pd.Series(d)

# notice the index is no longer numbers, but instead the keys of dict

Mon    1223
Tue    3432
Wed    2213
dtype: int64

## Some Attributes


In [8]:
weekdays = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday']
s = pd.Series(weekdays)
# returns an array of values
s.values


array(['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday'],
      dtype=object)

In [9]:
# index is a range that starts at 0, ends at 5, and increments buy 1
s.index


RangeIndex(start=0, stop=5, step=1)

In [10]:
# dtype attribute
# dtype('O') is short for Object
s.dtype

dtype('O')

In [39]:
# returns whether all the values are unique
s.is_unique

True

In [40]:
# dimensions
s.ndim

1

In [41]:
# get rows, columns
s.shape

(800,)

In [42]:
# get size of series
s.size

800

In [44]:
# get name of series
s.name = 'MyName'
s.name

'MyName'

## Some methods


In [13]:
prices = [1.45, 2.42, 2.55, 3.14, 5.33]
s = pd.Series(prices)
s

0    1.45
1    2.42
2    2.55
dtype: float64

In [14]:
# get sum of all values in series
s.sum()

6.42

In [15]:
# get a product of all values
s.product()

8.947949999999999

In [16]:
# get average of all values
s.mean()

2.14

In [17]:
# get median of all values
s.median()


2.42

In [18]:
# A more complete example
weekdays = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday']
data = [1.45, 2.42, 2.55, 3.14, 5.33]
s = pd.Series(data=data, index=weekdays, dtype=float, name="Price")
s

Monday       1.45
Tuesday      2.42
Wednesday    2.55
Thursday     3.14
Friday       5.33
Name: Price, dtype: float64

## Read csv

In [20]:
# use read_csv to import data and use the column as a Series
s = pd.read_csv("data/pokemon.csv", usecols=['Name'], squeeze=True)
s

0                  Bulbasaur
1                    Ivysaur
2                   Venusaur
3      VenusaurMega Venusaur
4                 Charmander
               ...          
795                  Diancie
796      DiancieMega Diancie
797      HoopaHoopa Confined
798       HoopaHoopa Unbound
799                Volcanion
Name: Name, Length: 800, dtype: object

In [22]:
s = pd.read_csv('data/google_stock_price.csv', squeeze=True)
s


0        50.12
1        54.10
2        54.65
3        52.38
4        52.95
         ...  
3007    772.88
3008    771.07
3009    773.18
3010    771.61
3011    782.22
Name: Stock Price, Length: 3012, dtype: float64

In [26]:
# return a new series with the first 5 rows
s.head()

0    50.12
1    54.10
2    54.65
3    52.38
4    52.95
Name: Stock Price, dtype: float64

In [27]:
# return a new series with last 8 rows
s.tail(8)

3004    741.77
3005    745.91
3006    768.79
3007    772.88
3008    771.07
3009    773.18
3010    771.61
3011    782.22
Name: Stock Price, dtype: float64

In [28]:
# get the mean of last 8 values
s.tail(8).mean()


765.92875

## inertaction with python functions

In [32]:
s = pd.read_csv("data/pokemon.csv", usecols=['Name'], squeeze=True)
# get type
type(s)

pandas.core.series.Series

In [33]:
# get length of series
len(s)

800

In [31]:
# use dir method to get all attributes of object
dir(s)

['T',
 '_AXIS_ALIASES',
 '_AXIS_IALIASES',
 '_AXIS_LEN',
 '_AXIS_NAMES',
 '_AXIS_NUMBERS',
 '_AXIS_ORDERS',
 '_AXIS_REVERSED',
 '_HANDLED_TYPES',
 '__abs__',
 '__add__',
 '__and__',
 '__annotations__',
 '__array__',
 '__array_priority__',
 '__array_ufunc__',
 '__array_wrap__',
 '__bool__',
 '__class__',
 '__contains__',
 '__copy__',
 '__deepcopy__',
 '__delattr__',
 '__delitem__',
 '__dict__',
 '__dir__',
 '__div__',
 '__divmod__',
 '__doc__',
 '__eq__',
 '__finalize__',
 '__float__',
 '__floordiv__',
 '__format__',
 '__ge__',
 '__getattr__',
 '__getattribute__',
 '__getitem__',
 '__getstate__',
 '__gt__',
 '__hash__',
 '__iadd__',
 '__iand__',
 '__ifloordiv__',
 '__imod__',
 '__imul__',
 '__init__',
 '__init_subclass__',
 '__int__',
 '__invert__',
 '__ior__',
 '__ipow__',
 '__isub__',
 '__iter__',
 '__itruediv__',
 '__ixor__',
 '__le__',
 '__len__',
 '__long__',
 '__lt__',
 '__matmul__',
 '__mod__',
 '__module__',
 '__mul__',
 '__ne__',
 '__neg__',
 '__new__',
 '__nonzero__',
 '__or__

In [34]:
# convert series into a dict
dict(s)


{0: 'Bulbasaur',
 1: 'Ivysaur',
 2: 'Venusaur',
 3: 'VenusaurMega Venusaur',
 4: 'Charmander',
 5: 'Charmeleon',
 6: 'Charizard',
 7: 'CharizardMega Charizard X',
 8: 'CharizardMega Charizard Y',
 9: 'Squirtle',
 10: 'Wartortle',
 11: 'Blastoise',
 12: 'BlastoiseMega Blastoise',
 13: 'Caterpie',
 14: 'Metapod',
 15: 'Butterfree',
 16: 'Weedle',
 17: 'Kakuna',
 18: 'Beedrill',
 19: 'BeedrillMega Beedrill',
 20: 'Pidgey',
 21: 'Pidgeotto',
 22: 'Pidgeot',
 23: 'PidgeotMega Pidgeot',
 24: 'Rattata',
 25: 'Raticate',
 26: 'Spearow',
 27: 'Fearow',
 28: 'Ekans',
 29: 'Arbok',
 30: 'Pikachu',
 31: 'Raichu',
 32: 'Sandshrew',
 33: 'Sandslash',
 34: 'Nidoran♀',
 35: 'Nidorina',
 36: 'Nidoqueen',
 37: 'Nidoran♂',
 38: 'Nidorino',
 39: 'Nidoking',
 40: 'Clefairy',
 41: 'Clefable',
 42: 'Vulpix',
 43: 'Ninetales',
 44: 'Jigglypuff',
 45: 'Wigglytuff',
 46: 'Zubat',
 47: 'Golbat',
 48: 'Oddish',
 49: 'Gloom',
 50: 'Vileplume',
 51: 'Paras',
 52: 'Parasect',
 53: 'Venonat',
 54: 'Venomoth',
 55: 'D

In [35]:
# convert series to list
list(s)


['Bulbasaur',
 'Ivysaur',
 'Venusaur',
 'VenusaurMega Venusaur',
 'Charmander',
 'Charmeleon',
 'Charizard',
 'CharizardMega Charizard X',
 'CharizardMega Charizard Y',
 'Squirtle',
 'Wartortle',
 'Blastoise',
 'BlastoiseMega Blastoise',
 'Caterpie',
 'Metapod',
 'Butterfree',
 'Weedle',
 'Kakuna',
 'Beedrill',
 'BeedrillMega Beedrill',
 'Pidgey',
 'Pidgeotto',
 'Pidgeot',
 'PidgeotMega Pidgeot',
 'Rattata',
 'Raticate',
 'Spearow',
 'Fearow',
 'Ekans',
 'Arbok',
 'Pikachu',
 'Raichu',
 'Sandshrew',
 'Sandslash',
 'Nidoran♀',
 'Nidorina',
 'Nidoqueen',
 'Nidoran♂',
 'Nidorino',
 'Nidoking',
 'Clefairy',
 'Clefable',
 'Vulpix',
 'Ninetales',
 'Jigglypuff',
 'Wigglytuff',
 'Zubat',
 'Golbat',
 'Oddish',
 'Gloom',
 'Vileplume',
 'Paras',
 'Parasect',
 'Venonat',
 'Venomoth',
 'Diglett',
 'Dugtrio',
 'Meowth',
 'Persian',
 'Psyduck',
 'Golduck',
 'Mankey',
 'Primeape',
 'Growlithe',
 'Arcanine',
 'Poliwag',
 'Poliwhirl',
 'Poliwrath',
 'Abra',
 'Kadabra',
 'Alakazam',
 'AlakazamMega Alakaz

In [47]:
# sort values using series
# sort_values returns a new series
s.sort_values(ascending=False).tail()

393            AbsolMega Absol
392                      Absol
68                        Abra
511    AbomasnowMega Abomasnow
510                  Abomasnow
Name: MyName, dtype: object

In [36]:
# sort series
sorted(s)

['Abomasnow',
 'AbomasnowMega Abomasnow',
 'Abra',
 'Absol',
 'AbsolMega Absol',
 'Accelgor',
 'AegislashBlade Forme',
 'AegislashShield Forme',
 'Aerodactyl',
 'AerodactylMega Aerodactyl',
 'Aggron',
 'AggronMega Aggron',
 'Aipom',
 'Alakazam',
 'AlakazamMega Alakazam',
 'Alomomola',
 'Altaria',
 'AltariaMega Altaria',
 'Amaura',
 'Ambipom',
 'Amoonguss',
 'Ampharos',
 'AmpharosMega Ampharos',
 'Anorith',
 'Arbok',
 'Arcanine',
 'Arceus',
 'Archen',
 'Archeops',
 'Ariados',
 'Armaldo',
 'Aromatisse',
 'Aron',
 'Articuno',
 'Audino',
 'AudinoMega Audino',
 'Aurorus',
 'Avalugg',
 'Axew',
 'Azelf',
 'Azumarill',
 'Azurill',
 'Bagon',
 'Baltoy',
 'Banette',
 'BanetteMega Banette',
 'Barbaracle',
 'Barboach',
 'Basculin',
 'Bastiodon',
 'Bayleef',
 'Beartic',
 'Beautifly',
 'Beedrill',
 'BeedrillMega Beedrill',
 'Beheeyem',
 'Beldum',
 'Bellossom',
 'Bellsprout',
 'Bergmite',
 'Bibarel',
 'Bidoof',
 'Binacle',
 'Bisharp',
 'Blastoise',
 'BlastoiseMega Blastoise',
 'Blaziken',
 'BlazikenMe

In [37]:
# get the last value of series (For int, the max value)
max(s)


'Zygarde50% Forme'

In [38]:
# get an array of values
s.values

array(['Bulbasaur', 'Ivysaur', 'Venusaur', 'VenusaurMega Venusaur',
       'Charmander', 'Charmeleon', 'Charizard',
       'CharizardMega Charizard X', 'CharizardMega Charizard Y',
       'Squirtle', 'Wartortle', 'Blastoise', 'BlastoiseMega Blastoise',
       'Caterpie', 'Metapod', 'Butterfree', 'Weedle', 'Kakuna',
       'Beedrill', 'BeedrillMega Beedrill', 'Pidgey', 'Pidgeotto',
       'Pidgeot', 'PidgeotMega Pidgeot', 'Rattata', 'Raticate', 'Spearow',
       'Fearow', 'Ekans', 'Arbok', 'Pikachu', 'Raichu', 'Sandshrew',
       'Sandslash', 'Nidoran♀', 'Nidorina', 'Nidoqueen', 'Nidoran♂',
       'Nidorino', 'Nidoking', 'Clefairy', 'Clefable', 'Vulpix',
       'Ninetales', 'Jigglypuff', 'Wigglytuff', 'Zubat', 'Golbat',
       'Oddish', 'Gloom', 'Vileplume', 'Paras', 'Parasect', 'Venonat',
       'Venomoth', 'Diglett', 'Dugtrio', 'Meowth', 'Persian', 'Psyduck',
       'Golduck', 'Mankey', 'Primeape', 'Growlithe', 'Arcanine',
       'Poliwag', 'Poliwhirl', 'Poliwrath', 'Abra', 'Kadabra',

In [37]:
# get an array of index values
s.index


## apply, map, mapapply

In [17]:
google = pd.read_csv('data/google_stock_price.csv', squeeze=True)

# apply function / lambda
google.apply(lambda stock_price: stock_price + 1).head()  # increment each value by 1


0    51.12
1    55.10
2    55.65
3    53.38
4    53.95
Name: Stock Price, dtype: float64

In [16]:
google.map(lambda stock_price: stock_price + 2).head()

0        52.12
1        56.10
2        56.65
3        54.38
4        54.95
         ...  
3007    774.88
3008    773.07
3009    775.18
3010    773.61
3011    784.22
Name: Stock Price, Length: 3012, dtype: float64

## inplace Parameter


In [13]:
pokemon = pd.read_csv("data/pokemon.csv", usecols=['Name'], squeeze=True)
google = pd.read_csv('data/google_stock_price.csv', squeeze=True)

In [60]:
google.head()

0    50.12
1    54.10
2    54.65
3    52.38
4    52.95
Name: Stock Price, dtype: float64

In [61]:
google.sort_values()
google.head()

0    50.12
1    54.10
2    54.65
3    52.38
4    52.95
Name: Stock Price, dtype: float64

In [62]:
# setting inplace True will sort the original series
google.sort_values(inplace=True)
google.head()

11    49.95
9     50.07
0     50.12
10    50.70
12    50.74
Name: Stock Price, dtype: float64

## python's in keyword to check if value exists


In [66]:
pokemon = pd.read_csv("data/pokemon.csv", usecols=['Name'], squeeze=True)
google = pd.read_csv('data/google_stock_price.csv', squeeze=True)

In [67]:
# by default in will look in index
100 in pokemon

True

In [69]:
"Ivysaur" in pokemon

False

In [70]:
# use values attribute to search in values
"Ivysaur" in pokemon.values

True

## extract value by index


In [71]:
# get the value at index 3
pokemon[3]


'VenusaurMega Venusaur'

In [73]:
# get multiple values (pass an array of index)
pokemon[[1, 4, 500]]

1         Ivysaur
4      Charmander
500     Hippowdon
Name: Name, dtype: object

In [74]:
# get by range of index
pokemon[10:25]

10                  Wartortle
11                  Blastoise
12    BlastoiseMega Blastoise
13                   Caterpie
14                    Metapod
15                 Butterfree
16                     Weedle
17                     Kakuna
18                   Beedrill
19      BeedrillMega Beedrill
20                     Pidgey
21                  Pidgeotto
22                    Pidgeot
23        PidgeotMega Pidgeot
24                    Rattata
Name: Name, dtype: object

In [75]:
# get everything up to an index
pokemon[:8]


0                    Bulbasaur
1                      Ivysaur
2                     Venusaur
3        VenusaurMega Venusaur
4                   Charmander
5                   Charmeleon
6                    Charizard
7    CharizardMega Charizard X
Name: Name, dtype: object

In [76]:
# get all everything after the -10 index
pokemon[-10:]

790                 Noibat
791                Noivern
792                Xerneas
793                Yveltal
794       Zygarde50% Forme
795                Diancie
796    DiancieMega Diancie
797    HoopaHoopa Confined
798     HoopaHoopa Unbound
799              Volcanion
Name: Name, dtype: object

## get series by index label

In [10]:
pokemon = pd.read_csv("data/pokemon.csv", index_col='Name', usecols=['Name', 'Type 1'], squeeze=True)
pokemon.head()

Name
Bulbasaur                Grass
Ivysaur                  Grass
Venusaur                 Grass
VenusaurMega Venusaur    Grass
Charmander                Fire
Name: Type 1, dtype: object

In [81]:
# use index label to get value
pokemon["Charmander"]

'Fire'

In [82]:
pokemon[["Ivysaur", "Charmander"]]

Name
Ivysaur       Grass
Charmander     Fire
Name: Type 1, dtype: object

In [2]:
# will generate an exception if label doesn't exist
# https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#deprecate-loc-reindex-listlike
try:
    pokemon[["Ivysaur", "Charmander", "hahahah"]]
except KeyError as ke:
    print(ke)
    print("one of the index labels doesn't exist")



NameError: name 'pokemon' is not defined

In [92]:
# get values by range
pokemon['Bulbasaur': 'Charizard']

Name
Bulbasaur                Grass
Ivysaur                  Grass
Venusaur                 Grass
VenusaurMega Venusaur    Grass
Charmander                Fire
Charmeleon                Fire
Charizard                 Fire
Name: Type 1, dtype: object

## value using get method

In [93]:
# use get method
pokemon.get('Bulbasaur')

'Grass'

In [95]:
# with get, if key doesn't exist it returns None
x = pokemon.get('hahaaha')
print(x)  # None

None


In [98]:
pokemon.get(['Bulbasaur', 'Charizard'])


Name
Bulbasaur    Grass
Charizard     Fire
Name: Type 1, dtype: object

In [101]:
# even if one of the index labels is invalid, the entire result is None
x = pokemon.get(['Bulbasaur', 'Charizard', 'hahahah'])
print(x)

no!!!!


In [104]:
# can set default value in case None is returned
pokemon.get(['Bulbasaur', 'Charizard', 'hahahah'], default='no!!!!')


'no!!!!'

## count vs len (count will ignore missing values)

## describe method

In [107]:
google.describe()

count    3012.000000
mean      334.310093
std       173.187205
min        49.950000
25%       218.045000
50%       283.315000
75%       443.000000
max       782.220000
Name: Stock Price, dtype: float64

## idxmax, idxmin

In [108]:
google.idxmax()

3011

In [109]:
google.idxmin()

11

In [110]:
# get min value using idxmin
google[google.idxmin()]

49.95

## value_counts

In [12]:
pokemon.value_counts()

Water       112
Normal       98
Grass        70
Bug          69
Psychic      57
Fire         52
Rock         44
Electric     44
Ground       32
Ghost        32
Dragon       32
Dark         31
Poison       28
Fighting     27
Steel        27
Ice          24
Fairy        17
Flying        4
Name: Type 1, dtype: int64