Pandas Series //
Create a Series

In [1]:
from pandas import Series, DataFrame
import pandas as pd
import numpy as np

In [2]:
# create a Series through a one-dimension array
# ndarray have the same dtype, while Series elements can have different dtype
arr = np.array([1,3,5, np.NaN, 10])   # default index 0, 1, 2, ...,n-1
series1 = pd.Series(arr)
series1

0     1.0
1     3.0
2     5.0
3     NaN
4    10.0
dtype: float64

In [3]:
# create a Series through a dictionary
series2 = pd.Series({
    'a': 10,    # key becomes an index
    'b': 20,    # value becomes a value of Series 
    'c': 30, 
})
series2

a    10
b    20
c    30
dtype: int64

In [4]:
s = pd.Series([1,2,3,4,5])
s[6] = 10
s.index

Int64Index([0, 1, 2, 3, 4, 6], dtype='int64')

There are two ways to insert index to a Series.
one is indexing after the Series has been created.
The other is to index when create a Series

In [5]:
# create index when a Series is available
series1 = pd.Series([1,2,3,4])
series1.index = ['a', 'b', 'c', 'd']
series1

a    1
b    2
c    3
d    4
dtype: int64

In [6]:
# create index when create a Series
series1 = pd.Series(np.array([1,2,4,3]), dtype = np.float64, index=['m', 'n', 'o', 'p'])
series1

m    1.0
n    2.0
o    4.0
p    3.0
dtype: float64

In [7]:
# Index CAN BE duplicate
series2 = pd.Series(np.array([1,2,4,3, 77]), dtype = np.float64, index=['m', 'n', 'o', 'p', 'm'])
series2

m     1.0
n     2.0
o     4.0
p     3.0
m    77.0
dtype: float64

In [8]:
# other ways to create Series
series3 = pd.Series(np.arange(5))
series3

s_values = np.arange(5)
s_index = np.arange(9, 4, -1)
series4 = pd.Series(s_values, index = s_index)
series4

# pd.Series(10) only create ONE entry. 
series5 = pd.Series(10, index = ['a', 'b','c'])
series5

a    10
b    10
c    10
dtype: int64

##### Select value from Series

In [9]:
series1['m']    # index selection, 

1.0

In [10]:
series2['m']   # index can be duplicate

m     1.0
m    77.0
dtype: float64

In [11]:
series2[2]   # location selection, automatically generated, coexist with self-defined index name

4.0

In [12]:
series2[-2]

3.0

In [13]:
series2[:]

m     1.0
n     2.0
o     4.0
p     3.0
m    77.0
dtype: float64

In [14]:
series2[1:3]      # not include 3

n    2.0
o    4.0
dtype: float64

In [15]:
series3 = pd.Series(np.array([1,2,4,3, 77]), dtype = np.float64, index=['m', 'n', 'o', 'p', 's'])
series3

m     1.0
n     2.0
o     4.0
p     3.0
s    77.0
dtype: float64

In [16]:
series3['m':'p']    # include 'p'

m    1.0
n    2.0
o    4.0
p    3.0
dtype: float64

In [17]:
series4 = pd.Series({
    'a': -1,
    'b': 2,
    'c': 3,
})
series4


a   -1
b    2
c    3
dtype: int64

Most of the array operations in numpy is retained in pd.Series.

In [18]:
series4[series4 > 1]

b    2
c    3
dtype: int64

In [19]:
series5 = series4 + 1
series5

a    0
b    3
c    4
dtype: int64

In [20]:
series4 / 10

a   -0.1
b    0.2
c    0.3
dtype: float64

In [21]:
np.exp(series4)

a     0.367879
b     7.389056
c    20.085537
dtype: float64

In [22]:
np.fabs(series4)

a    1.0
b    2.0
c    3.0
dtype: float64

In [23]:
series4.median()

2.0

In [24]:
series4.get('c', 88)   # get value of index 'c' 

3

In [25]:
series4.get('d',99)    # get value of index 'd', if havent the index, return default 99
series4                # but not change the original series4

a   -1
b    2
c    3
dtype: int64

In [26]:
series5 = series4.copy()     # copy, updates on series5 have no impact on original series4
series5.index = ['aa', 'bb', 'cc']
series5

aa   -1
bb    2
cc    3
dtype: int64

In [27]:
# Series map() apply self-defined function to *every* elements
def f(x):
    return x * 2 + 2

series4.map(f)

a    0
b    6
c    8
dtype: int64

Missing Value

In [28]:
new_index = ['a','b', 'c', 'd']
series6 = pd.Series(series4, index = new_index)
series6          #NaN stands for missing value

a   -1.0
b    2.0
c    3.0
d    NaN
dtype: float64

In [29]:
pd.isnull(series5)  # return which one is NaN

aa    False
bb    False
cc    False
dtype: bool

In [30]:
pd.notnull(series5)  # return which one is not NaN

aa    True
bb    True
cc    True
dtype: bool

In [31]:
series5.fillna(0)  # fillna() replace NaN with a arbitary value

aa   -1
bb    2
cc    3
dtype: int64

Index auto alignment when +/- operation

In [32]:
series6 = pd.Series({
    'idx1': 10,
    'idx2': 20,
    'idx3': 30
})
series7 = pd.Series({
    'idx4': 70,
    'idx2': 50,
    'idx3': 50
})
series6 + series7

# only 'idx2', 'idx3' are aligned in series6/7, 'idx1' and 'idx4' are NaN

idx1     NaN
idx2    70.0
idx3    80.0
idx4     NaN
dtype: float64

In [33]:
print(series7.name)    #Series object and Series index object has 'name' attribute,None by default.

None


In [34]:
print(series7.index.name)

None


In [35]:
series7.name = "Series"
series7.index.name = "index"
series7.name
series7.index.name

'index'

##### axis on Series Operations

In [42]:
s1 = pd.Series([0,1], index = ['a', 'b'])
s2 = pd.Series([2,3,4], index = ['c', 'd', 'e'])
s3 = pd.Series([5,6], index = ['f', 'g'])
s3

f    5
g    6
dtype: int64

In [51]:
s4 = pd.concat([s1, s2, s3])   # default axis =0 moves up and down
s4

a    0
b    1
c    2
d    3
e    4
f    5
g    6
dtype: int64

In [48]:
s5 = pd.concat([s1, s2, s3], axis = 0)
s5

a    0
b    1
c    2
d    3
e    4
f    5
g    6
dtype: int64

In [50]:
s6 = pd.concat([s1, s2, s3], axis = 1, sort = False)
s6

Unnamed: 0,0,1,2
a,0.0,,
b,1.0,,
c,,2.0,
d,,3.0,
e,,4.0,
f,,,5.0
g,,,6.0
