# The Series

In [2]:
import pandas as pd

In [3]:
pd.Series?

In [4]:
animals = ['Tiger', 'Bear', 'Dog']
pd.Series(animals)

0    Tiger
1     Bear
2      Dog
dtype: object

In [5]:
numbers = [1,2,3]
pd.Series(numbers)

0    1
1    2
2    3
dtype: int64

In [6]:
animals = ['Dog', 'Cat', None]
pd.Series(animals)

0     Dog
1     Cat
2    None
dtype: object

In [7]:
numbers = [1,2,None]
pd.Series(numbers)

0    1.0
1    2.0
2    NaN
dtype: float64

In [8]:
import numpy as np

In [9]:
np.nan == None #NaN is not None

False

In [10]:
np.nan == np.nan # NaN can't be compared, even to itself.

False

In [12]:
np.isnan(np.nan) # To check if something is NaN, use isnan()

True

In [15]:
sports = {'Archery': 'Bhutan',
          'Golf': 'Scotland',
          'Sumo': 'Japan',
          'Taekwondo': 'South Korea'}
s = pd.Series(sports)
s

Archery           Bhutan
Golf            Scotland
Sumo               Japan
Taekwondo    South Korea
dtype: object

In [16]:
s.index

Index(['Archery', 'Golf', 'Sumo', 'Taekwondo'], dtype='object')

In [19]:
# Can pass in the index as a param of Series
s2 = pd.Series(['Tiger', 'Bear', 'Moose'], index=['India', 'America', 'Canada'])
s2

India      Tiger
America     Bear
Canada     Moose
dtype: object

In [20]:
sports

{'Archery': 'Bhutan',
 'Golf': 'Scotland',
 'Sumo': 'Japan',
 'Taekwondo': 'South Korea'}

In [23]:
# s3 will only contain the specified index. 
# 'Hockey' is not in sports so its value is NaN
s3 = pd.Series(sports, index=['Golf', 'Sumo', 'Hockey'])
s3

Golf      Scotland
Sumo         Japan
Hockey         NaN
dtype: object

# Queries a Series

In [24]:
sports = {'Archery': 'Bhutan',
          'Golf': 'Scotland',
          'Sumo': 'Japan',
          'Taekwondo': 'South Korea'}

s = pd.Series(sports)
s

Archery           Bhutan
Golf            Scotland
Sumo               Japan
Taekwondo    South Korea
dtype: object

In [25]:
s.iloc[3] # indexing

'South Korea'

In [26]:
s.loc['Golf'] # using key

'Scotland'

In [29]:
# This works similarly to .iloc[] but if the keys are integers, pandas
# can't tell, so it's safer to use .iloc[]
s[3]

'South Korea'

In [28]:
s['Golf']

'Scotland'

In [30]:
sports = {99: 'Bhutan',
          100: 'Scotland',
          101: 'Japan',
          102: 'South Korea'}
s = pd.Series(sports)

In [32]:
# s[0] # key error
s.iloc[0]

'Bhutan'

In [33]:
s = pd.Series([100.00, 120.00, 101.00, 3.00])
s

0    100.0
1    120.0
2    101.0
3      3.0
dtype: float64

In [34]:
total = 0
for item in s:
    total += item
print(total)

324.0


In [50]:
import numpy as np

total = np.sum(s)
print(total)

5034211


In [38]:
s = pd.Series(np.random.randint(0, 1000, 10000))
s.head() # return the first n rows, default is 5

0    906
1    174
2    262
3    754
4    785
dtype: int64

In [39]:
len(s)

10000

In [51]:
# %% specify magic function. timeit times the execution.
%%timeit -n 100 # how many iterations
summary = 0
for item in s:
    summary += item

UsageError: Line magic function `%%timeit` not found.


In [42]:
%%timeit -n 100
summary = np.sum(s)

96 µs ± 9.83 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [43]:
s += 2
s.head()

0    908
1    176
2    264
3    756
4    787
dtype: int64

In [46]:
# another approach
for label, value in s.iteritems():
    s.loc[label] = value + 2
s.head()

0    914
1    182
2    270
3    762
4    793
dtype: int64

In [54]:
# Let's time the 2 approaches!
%%timeit -n 10
s = pd.Series(np.random.randint(0, 1000, 10000))
for label, value in s.iteritems():
    s.loc[label] = value + 2

UsageError: Line magic function `%%timeit` not found.


In [52]:
%%timeit -n 10
s = pd.Series(np.random.randint(0,1000,10000))
s+=2

395 µs ± 51.6 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [55]:
s = pd.Series([1, 2, 3])

In [56]:
s

0    1
1    2
2    3
dtype: int64

In [57]:
s.loc['Animal'] = 'Dog' # add to s

In [58]:
s

0           1
1           2
2           3
Animal    Dog
dtype: object

In [59]:
original_sports = pd.Series({'Archery': 'Bhutan',
                             'Golf': 'Scotland',
                             'Sumo': 'Japan',
                             'Taekwondo': 'South Korea'})

cricket_loving_countries = pd.Series(['Australia',
                                      'Barbados',
                                      'Pakistan',
                                      'England'], 
                                   index=['Cricket',
                                          'Cricket',
                                          'Cricket',
                                          'Cricket'])
original_sports

Archery           Bhutan
Golf            Scotland
Sumo               Japan
Taekwondo    South Korea
dtype: object

In [60]:
cricket_loving_countries

Cricket    Australia
Cricket     Barbados
Cricket     Pakistan
Cricket      England
dtype: object

In [64]:
# Append doesn't change the object in place, it returns a new object
all_countries = original_sports.append(cricket_loving_countries)

In [65]:
all_countries

Archery           Bhutan
Golf            Scotland
Sumo               Japan
Taekwondo    South Korea
Cricket        Australia
Cricket         Barbados
Cricket         Pakistan
Cricket          England
dtype: object

In [66]:
original_sports

Archery           Bhutan
Golf            Scotland
Sumo               Japan
Taekwondo    South Korea
dtype: object

In [67]:
cricket_loving_countries

Cricket    Australia
Cricket     Barbados
Cricket     Pakistan
Cricket      England
dtype: object

In [68]:
all_countries.loc['Cricket']

Cricket    Australia
Cricket     Barbados
Cricket     Pakistan
Cricket      England
dtype: object