In [1]:
import pandas as pd
import numpy as np

### reindex

In [2]:
obj = pd.Series([4.5, 6.2, 9.1, 8.8], index = ['d', 'c', 'a', 'b'])
obj

d    4.5
c    6.2
a    9.1
b    8.8
dtype: float64

In [3]:
obj.reindex(['a', 'b', 'c', 'd', 'e'], fill_value = 0)

a    9.1
b    8.8
c    6.2
d    4.5
e    0.0
dtype: float64

In [4]:
obj_2 = pd.Series(['blue', 'purple', 'yellow'], index = [0, 2, 4])
obj_2.reindex(range(6), method = 'ffill')
# ffill - fill values forward
# bfill - fill values backward

0      blue
1      blue
2    purple
3    purple
4    yellow
5    yellow
dtype: object

In [5]:
frame = pd.DataFrame(np.arange(9).reshape((3, 3)), 
                     index = ['a', 'c', 'd'], 
                     columns = ['Wuhan', 'Beijing', 'Guangzhou'])
frame

Unnamed: 0,Wuhan,Beijing,Guangzhou
a,0,1,2
c,3,4,5
d,6,7,8


In [6]:
frame.reindex(['a', 'b', 'c', 'd'], method = 'bfill')

Unnamed: 0,Wuhan,Beijing,Guangzhou
a,0,1,2
b,3,4,5
c,3,4,5
d,6,7,8


In [7]:
frame.reindex(columns = ['Wuhan', 'Guangzhou', 'Beijing', 'Hangzhou'])

Unnamed: 0,Wuhan,Guangzhou,Beijing,Hangzhou
a,0,2,1,
c,3,5,4,
d,6,8,7,


### drop

In [8]:
obj.drop('c')  # drop entries without modifying the original data

d    4.5
a    9.1
b    8.8
dtype: float64

In [9]:
frame.drop('c')  # DataFrame drop

Unnamed: 0,Wuhan,Beijing,Guangzhou
a,0,1,2
d,6,7,8


In [10]:
frame.drop(['Beijing', 'Guangzhou'], axis = 1)

Unnamed: 0,Wuhan
a,0
c,3
d,6


### indexing, selection, filtering

In [11]:
frame_2 = frame.reindex(['a', 'b', 'c', 'd'], 
                        columns = ['Wuhan', 'Beijing', 'Guangzhou', 'Hangzhou'], 
                        fill_value = 0)
frame_2

Unnamed: 0,Wuhan,Beijing,Guangzhou,Hangzhou
a,0,1,2,0
b,0,0,0,0
c,3,4,5,0
d,6,7,8,0


In [12]:
frame_2[['Wuhan', 'Beijing']]

Unnamed: 0,Wuhan,Beijing
a,0,1
b,0,0
c,3,4
d,6,7


In [13]:
frame_2.loc[['a', 'b']]

Unnamed: 0,Wuhan,Beijing,Guangzhou,Hangzhou
a,0,1,2,0
b,0,0,0,0


In [14]:
frame_2.iloc[:2]

Unnamed: 0,Wuhan,Beijing,Guangzhou,Hangzhou
a,0,1,2,0
b,0,0,0,0


In [15]:
frame_2[frame_2['Wuhan'] == 0]

Unnamed: 0,Wuhan,Beijing,Guangzhou,Hangzhou
a,0,1,2,0
b,0,0,0,0


In [16]:
frame_2[frame_2 == 0] = 9
frame_2

Unnamed: 0,Wuhan,Beijing,Guangzhou,Hangzhou
a,9,1,2,9
b,9,9,9,9
c,3,4,5,9
d,6,7,8,9


In [17]:
frame_2.loc[:'b', ['Wuhan', 'Beijing']]

Unnamed: 0,Wuhan,Beijing
a,9,1
b,9,9


### Axis indexes with duplicate values

In [18]:
obj_3 = pd.Series(range(5), index = ['a', 'a', 'b', 'b', 'c'])
obj_3.index.is_unique

False

In [19]:
obj_3['a']

a    0
a    1
dtype: int64

### Function application and mapping

In [30]:
frame_4 = pd.DataFrame(np.random.randn(4, 3), 
                       columns = list('bde'), 
                       index = ['Utah', 'Ohio', 'Texas', 'Oregon'])
frame_4

Unnamed: 0,b,d,e
Utah,-0.47684,-1.384932,-1.858661
Ohio,-0.541811,-1.103029,-0.901488
Texas,1.261412,-0.419383,1.519637
Oregon,-0.70291,1.689866,-0.640856


In [31]:
np.abs(frame_4)

Unnamed: 0,b,d,e
Utah,0.47684,1.384932,1.858661
Ohio,0.541811,1.103029,0.901488
Texas,1.261412,0.419383,1.519637
Oregon,0.70291,1.689866,0.640856


In [32]:
f = lambda x: x.max() - x.min()
frame_4.apply(f)

b    1.964322
d    3.074798
e    3.378298
dtype: float64

In [33]:
frame_4.apply(f, axis = 1)  # apply with be applied to columns or indexes

Utah      1.381820
Ohio      0.561218
Texas     1.939020
Oregon    2.392776
dtype: float64

In [34]:
format = lambda x: '%.2f' % x
frame_4.applymap(format)  # applymap with be applied to every elements

Unnamed: 0,b,d,e
Utah,-0.48,-1.38,-1.86
Ohio,-0.54,-1.1,-0.9
Texas,1.26,-0.42,1.52
Oregon,-0.7,1.69,-0.64


### Sorting and ranking

In [35]:
# sort_index(), sort indexes in Series and DataFrames
# order(), sort values in Series
# rank(), return values' rank 