# Essential Functionalry

In [1]:
import numpy as np
import pandas as pd
from pandas import Series, DataFrame

## Reindexing

In [2]:
obj = pd.Series([4.5, 7.2, -5.3, 3.6], index=list('dbac'))
obj

d    4.5
b    7.2
a   -5.3
c    3.6
dtype: float64

In [3]:
obj2 = obj.reindex(list('abcde'))
obj2

a   -5.3
b    7.2
c    3.6
d    4.5
e    NaN
dtype: float64

### reindex with forward-fill

In [5]:
obj3 = pd.Series(['blue', 'purple', 'yellow'], index=[0,2,4])
obj3

0      blue
2    purple
4    yellow
dtype: object

In [6]:
obj3.reindex(range(6))

0      blue
1       NaN
2    purple
3       NaN
4    yellow
5       NaN
dtype: object

In [7]:
obj3.reindex(range(6),method='ffill')

0      blue
1      blue
2    purple
3    purple
4    yellow
5    yellow
dtype: object

## Dropping Entries from an Axis

### drop from pd.Series

In [9]:
obj = pd.Series(np.arange(5.), index=list('abcde'))
obj

a    0.0
b    1.0
c    2.0
d    3.0
e    4.0
dtype: float64

In [11]:
new_obj = obj.drop('c')
new_obj

a    0.0
b    1.0
d    3.0
e    4.0
dtype: float64

In [12]:
new_obj = obj.drop(['d', 'c'])
new_obj

a    0.0
b    1.0
e    4.0
dtype: float64

### drop from a dataframe

In [15]:
data = pd.DataFrame(
    np.arange(16).reshape((4,4)),
    index=['Ohio', 'Colorado', 'Utah', 'New York'],
    columns=['one', 'two', 'three', 'four'],
)
data

Unnamed: 0,one,two,three,four
Ohio,0,1,2,3
Colorado,4,5,6,7
Utah,8,9,10,11
New York,12,13,14,15


In [16]:
data.drop(['Colorado', 'Ohio'])

Unnamed: 0,one,two,three,four
Utah,8,9,10,11
New York,12,13,14,15


In [17]:
data.drop('two', axis=1)

Unnamed: 0,one,three,four
Ohio,0,2,3
Colorado,4,6,7
Utah,8,10,11
New York,12,14,15


In [18]:
data.drop(['two', 'four'], axis='columns')

Unnamed: 0,one,three
Ohio,0,2
Colorado,4,6
Utah,8,10
New York,12,14


## Indexing, Selection, and Filtering

In [19]:
data = pd.DataFrame(
    np.arange(16).reshape((4,4)),
    index=['Ohio', 'Colorado', 'Utah', 'New York'],
    columns=['one', 'two', 'three', 'four'],
)
data

Unnamed: 0,one,two,three,four
Ohio,0,1,2,3
Colorado,4,5,6,7
Utah,8,9,10,11
New York,12,13,14,15


In [20]:
data['two']

Ohio         1
Colorado     5
Utah         9
New York    13
Name: two, dtype: int64

In [21]:
data[['three', 'two']]

Unnamed: 0,three,two
Ohio,2,1
Colorado,6,5
Utah,10,9
New York,14,13


In [22]:
data[:2]

Unnamed: 0,one,two,three,four
Ohio,0,1,2,3
Colorado,4,5,6,7


In [26]:
data.loc['Ohio']

one      0
two      0
three    0
four     0
Name: Ohio, dtype: int64

In [23]:
data[data['three']>5]

Unnamed: 0,one,two,three,four
Colorado,4,5,6,7
Utah,8,9,10,11
New York,12,13,14,15


In [24]:
data[data < 5]

Unnamed: 0,one,two,three,four
Ohio,0.0,1.0,2.0,3.0
Colorado,4.0,,,
Utah,,,,
New York,,,,


In [25]:
data[data < 5] = 0
data

Unnamed: 0,one,two,three,four
Ohio,0,0,0,0
Colorado,0,5,6,7
Utah,8,9,10,11
New York,12,13,14,15


## selection with loc and iloc

In [27]:
data = pd.DataFrame(
    np.arange(16).reshape((4,4)),
    index=['Ohio', 'Colorado', 'Utah', 'New York'],
    columns=['one', 'two', 'three', 'four'],
)
data

Unnamed: 0,one,two,three,four
Ohio,0,1,2,3
Colorado,4,5,6,7
Utah,8,9,10,11
New York,12,13,14,15


In [28]:
data.loc['Colorado', ['two', 'three']]

two      5
three    6
Name: Colorado, dtype: int64

In [29]:
data.iloc[2, [3,0,1]]

four    11
one      8
two      9
Name: Utah, dtype: int64

In [30]:
data.iloc[2]

one       8
two       9
three    10
four     11
Name: Utah, dtype: int64

In [31]:
data.iloc[[1,2], [3,0,1]]

Unnamed: 0,four,one,two
Colorado,7,4,5
Utah,11,8,9


In [32]:
data.loc[:'Utah', 'two']

Ohio        1
Colorado    5
Utah        9
Name: two, dtype: int64

In [33]:
data.iloc[:, :3][data.three > 5]

Unnamed: 0,one,two,three
Colorado,4,5,6
Utah,8,9,10
New York,12,13,14


## Function Application and Mapping

In [34]:
frame = pd.DataFrame(
    np.random.randn(4,3),
    columns=list('bde'),
    index=['Utah', 'Ohio', 'Texas', 'Oregon'],
)
frame

Unnamed: 0,b,d,e
Utah,-0.31646,-1.773229,1.003906
Ohio,0.48369,0.017337,-1.076568
Texas,-1.187031,-1.812141,-0.806381
Oregon,0.780538,1.271751,0.223149


In [35]:
np.abs(frame)

Unnamed: 0,b,d,e
Utah,0.31646,1.773229,1.003906
Ohio,0.48369,0.017337,1.076568
Texas,1.187031,1.812141,0.806381
Oregon,0.780538,1.271751,0.223149


In [36]:
f = lambda x: x.max() - x.min()

In [37]:
frame.apply(f)

b    1.967569
d    3.083892
e    2.080474
dtype: float64

In [38]:
frame.apply(f, axis='columns')

Utah      2.777135
Ohio      1.560258
Texas     1.005760
Oregon    1.048602
dtype: float64

### return non-scalar

In [42]:
def f(x):
    return pd.Series([x.min(), x.max()], index=['min', 'max'])

In [43]:
frame.apply(f)

Unnamed: 0,b,d,e
min,-1.187031,-1.812141,-1.076568
max,0.780538,1.271751,1.003906


### element-wise 

In [47]:
format = lambda x: f'{x:.2f}'

In [48]:
frame.applymap(format)

Unnamed: 0,b,d,e
Utah,-0.32,-1.77,1.0
Ohio,0.48,0.02,-1.08
Texas,-1.19,-1.81,-0.81
Oregon,0.78,1.27,0.22


## Sorting and Ranking

In [52]:
obj = pd.Series(reversed(range(4)), index=list('dabc'))
obj

d    3
a    2
b    1
c    0
dtype: int64

In [53]:
obj.sort_index()

a    2
b    1
c    0
d    3
dtype: int64

In [54]:
obj.sort_values()

c    0
b    1
a    2
d    3
dtype: int64

In [55]:
frame = pd.DataFrame({
    'b': [4,7,-3,2],
    'a': [0,1,0,1],
    }
)
frame

Unnamed: 0,b,a
0,4,0
1,7,1
2,-3,0
3,2,1


In [56]:
frame.sort_values(by='b')

Unnamed: 0,b,a
2,-3,0
3,2,1
0,4,0
1,7,1


In [57]:
frame.sort_values(by=['a', 'b'])

Unnamed: 0,b,a
2,-3,0
0,4,0
3,2,1
1,7,1


In [59]:
obj = pd.Series([7, -5, 7, 4, 2, 0, 4])
obj

0    7
1   -5
2    7
3    4
4    2
5    0
6    4
dtype: int64

In [60]:
obj.rank()

0    6.5
1    1.0
2    6.5
3    4.5
4    3.0
5    2.0
6    4.5
dtype: float64

In [61]:
obj.rank(method='first')

0    6.0
1    1.0
2    7.0
3    4.0
4    3.0
5    2.0
6    5.0
dtype: float64

In [62]:
obj.rank(method='max')

0    7.0
1    1.0
2    7.0
3    5.0
4    3.0
5    2.0
6    5.0
dtype: float64