In [1]:
import pandas as pd
import numpy as np

In [2]:
series = {
    'index':[0,1,2,3],
    'data':[145,142,38,13],
    'name':'songs'
}

In [3]:
series

{'index': [0, 1, 2, 3], 'data': [145, 142, 38, 13], 'name': 'songs'}

In [4]:
def get(series, idx):
    value_idx = series['index'].index(idx)
    return series['data'][value_idx]

In [5]:
get(series,1)

142

In [6]:
songs = {
    'index':['Paul','John','George','Ringo'],
    'data':[145,142,38,13],
    'name':'counts'
}

In [7]:
get(songs, 'John')

142

Pandas series

In [8]:
songs2 = pd.Series([145,142,38,13],name ='counts')

In [9]:
songs2

0    145
1    142
2     38
3     13
Name: counts, dtype: int64

In [10]:
songs2.index

RangeIndex(start=0, stop=4, step=1)

The index can also be string based

In [11]:
songs3 = pd.Series([145,142,38,13],name = 'counts', index = ['Paul','John','George','Ringo'])

In [12]:
songs3

Paul      145
John      142
George     38
Ringo      13
Name: counts, dtype: int64

In [13]:
songs3.index

Index(['Paul', 'John', 'George', 'Ringo'], dtype='object')

data inside the series do not have to be numeric or homogeneous. Python Objects cab also be inserted into the series

In [14]:
class Foo:
    pass

In [15]:
ringo = pd.Series(
    ['Richard','Stacy',13,Foo()], name = 'ringo'
)

In [16]:
ringo

0                                    Richard
1                                      Stacy
2                                         13
3    <__main__.Foo object at 0x7f927bdd6cd0>
Name: ringo, dtype: object

In [17]:
nan_series = pd.Series([2,np.nan], index = ['Ono','Clapton'])
nan_series

Ono        2.0
Clapton    NaN
dtype: float64

In [18]:
nan_series.count()

1

In [19]:
nan_series.size

2

If there is np.nan in the series or dataframe, the pandas usually converts it to float64 instead of int as it doesnt support nan. 

- but if we wish to support na in a int data then we have to provide None in the same format and provide the dtype

In [20]:
nan_Series2 = pd.Series([2,None],index = ['axe','Ronn'],dtype = 'Int64')
nan_Series2

axe        2
Ronn    <NA>
dtype: Int64

In [21]:
nan_Series2.count()

1

Converting nan_series to int64

In [22]:
nan_series.astype('Int64')

Ono           2
Clapton    <NA>
dtype: Int64

A series functions similar to numpy array

In [23]:
numpy_ser = np.array([145,142,38,13])

In [24]:
songs3[1], numpy_ser[1]

(142, 142)

In [25]:
songs3.mean(), numpy_ser.mean()

(84.5, 84.5)

Filtering a series with boolean array

In [26]:
mask = songs3 > songs3.median()

In [27]:
songs3[mask]

Paul    145
John    142
Name: counts, dtype: int64

Equivalent version in numpy

In [28]:
numpy_ser[numpy_ser >np.median(numpy_ser)]

array([145, 142])

Categorical data

In [30]:
s = pd.Series(['m','l','xs','s','xl'], dtype = 'category')
s

0     m
1     l
2    xs
3     s
4    xl
dtype: category
Categories (5, object): ['l', 'm', 's', 'xl', 'xs']

Categories don't have any ordering

In [32]:
s.cat.ordered # it can be verified by using .cat attribute

False

To convert non-categorical series into categorical

In [34]:
s2 = pd.Series(['m','l','xs','s','xl'])
s2

0     m
1     l
2    xs
3     s
4    xl
dtype: object

In [35]:
size_type = pd.api.types.CategoricalDtype(categories=['m','l','xl'], ordered=True)

In [36]:
s3 = s2.astype(size_type)
s3

0      m
1      l
2    NaN
3    NaN
4     xl
dtype: category
Categories (3, object): ['m' < 'l' < 'xl']

In [40]:
s3 > 'm' # if we have ordered the cat data then we can do comparison

0    False
1     True
2    False
3    False
4     True
dtype: bool

In [41]:
s.cat.reorder_categories(['xs','s','m','l','xl'], ordered=True)

0     m
1     l
2    xs
3     s
4    xl
dtype: category
Categories (5, object): ['xs' < 's' < 'm' < 'l' < 'xl']

String and datetime and category have str and dt attribute

In [43]:
s3.str.upper()

0      M
1      L
2    NaN
3    NaN
4     XL
dtype: object