# The Series Data Structure

In [1]:
import pandas as pd
pd.Series?

In [2]:
animals = ['Tiger', 'Bear', 'Moose']
pd.Series(animals)

0    Tiger
1     Bear
2    Moose
dtype: object

In [3]:
numbers = [1, 2, 3]
pd.Series(numbers)

0    1
1    2
2    3
dtype: int64

In [4]:
animals = ['Tiger', 'Bear', None]
pd.Series(animals)

0    Tiger
1     Bear
2     None
dtype: object

In [5]:
numbers = [1, 2, None]
pd.Series(numbers)

0    1.0
1    2.0
2    NaN
dtype: float64

In [6]:
import numpy as np
np.nan == None

False

In [7]:
np.nan == np.nan

False

In [8]:
np.isnan(np.nan)

True

In [9]:
sports = {'Archery': 'Bhutan',
         'Golf': 'Scotland',
         'Sumo': 'Japan',
         'Taekwondo': 'South Korea'}
s = pd.Series(sports)
s

Archery           Bhutan
Golf            Scotland
Sumo               Japan
Taekwondo    South Korea
dtype: object

In [10]:
s.index

Index([u'Archery', u'Golf', u'Sumo', u'Taekwondo'], dtype='object')

In [11]:
s = pd.Series(['Tiger', 'Bear', 'Moose'], index=['India', 'America', 'Cananda'])

In [12]:
sports = {'Archery': 'Bhutan',
          'Golf': 'Scotland',
          'Sumo': 'Japan',
          'Taekwondo': 'South Korea'}
s = pd.Series(sports, index=['Golf', 'Sumo', 'Hockey'])
s

Golf      Scotland
Sumo         Japan
Hockey         NaN
dtype: object

# Querying a Series

In [13]:
sports = {'Archery': 'Bhutan',
          'Golf': 'Scotland',
          'Sumo': 'Japan',
          'Taekwondo': 'South Korea'}
s = pd.Series(sports)
s

Archery           Bhutan
Golf            Scotland
Sumo               Japan
Taekwondo    South Korea
dtype: object

In [14]:
s.iloc[3]

'South Korea'

In [15]:
s.loc['Golf']

'Scotland'

In [16]:
s[3]

'South Korea'

In [17]:
s['Golf']

'Scotland'

In [18]:
sports = {99: 'Bhutan',
         100: 'Scotland',
         101: 'Japan',
         102: 'South Korea'}
s = pd.Series(sports)

In [19]:
#s[0] # This won't call s.iloc[0] as one might expect, it generates an error

In [20]:
s = pd.Series([100.00, 120.00, 101.00, 3.00])
s

0    100.0
1    120.0
2    101.0
3      3.0
dtype: float64

In [21]:
total = 0
for item in s :
    total += item
print(total)

324.0


In [22]:
import numpy as np

total = np.sum(s)
print(total)

324.0


In [23]:
# This creates a big series of random numbers
s = pd.Series(np.random.randint(0, 1000, 10000))
s.head()

0    352
1    321
2    131
3      8
4    444
dtype: int64

In [24]:
len(s)

10000

In [25]:
%%timeit -n 100
summary = 0
for item in s:
    summary += item

100 loops, best of 3: 2.76 ms per loop


In [26]:
%%timeit -n 100
summary = np.sum(s)

100 loops, best of 3: 61.3 µs per loop


In [32]:
s += 2 # Adds two to each item in s using broadcasting
s.head()

0    358
1    327
2    137
3     14
4    450
dtype: int64

In [28]:
for label, value in s.iteritems():
    s.set_value(label, value + 2)
s.head()

0    356
1    325
2    135
3     12
4    448
dtype: int64

In [31]:
%%timeit -n 10
s = pd.Series(np.random.randint(0, 1000, 10000))
for label, value in s.iteritems():
    s.loc[label] = value + 2

10 loops, best of 3: 1.24 s per loop


In [30]:
%%timeit -n 10
s = pd.Series(np.random.randint(0, 1000, 10000))
s += 2

10 loops, best of 3: 387 µs per loop
