In [None]:
#Reindexing changes the row labels and column labels of a DataFrame. 
#To reindex means to conform the data to match a given set of 
#labels along a particular axis.

#Multiple operations can be accomplished through indexing like −

#Reorder the existing data to match a new set of labels.

#Insert missing value (NA) markers in label locations where 
#no data for the label existed.

In [5]:
import pandas as pd
import numpy as np
N=20
df=pd.DataFrame({
    'A':pd.date_range(start='2016-01-01',periods=N, freq='D'),
    'x':np.linspace(0,stop=N-1, num=N),
    'y':np.random.rand(N),
    'C':np.random.choice(['Low','Medium','High'],N).tolist(),
    'D':np.random.normal(100,10,size=(N)).tolist()
})
df

Unnamed: 0,A,C,D,x,y
0,2016-01-01,Low,106.327025,0.0,0.222001
1,2016-01-02,High,90.017946,1.0,0.007988
2,2016-01-03,High,99.325655,2.0,0.715747
3,2016-01-04,Low,113.567042,3.0,0.379605
4,2016-01-05,Low,94.607217,4.0,0.1545
5,2016-01-06,Medium,101.8613,5.0,0.929979
6,2016-01-07,High,100.71547,6.0,0.878553
7,2016-01-08,High,96.577014,7.0,0.447894
8,2016-01-09,Medium,105.695138,8.0,0.638014
9,2016-01-10,Medium,120.328792,9.0,0.063358


In [14]:
#reindex the DataFrame (few columns) selected rows i.e. index
df_reindexed=df.reindex(index=[0,2,5], columns=['A','C','B'])
df_reindexed

Unnamed: 0,A,C,B
0,2016-01-01,Low,
2,2016-01-03,High,
5,2016-01-06,Medium,


In [15]:
#reindex the DataFrame (all columns) selected rows i.e. index
df_reindexed=df.reindex(index=[0,2,5,7,8,9,10], columns=['A','C','B','x','y'])
df_reindexed

Unnamed: 0,A,C,B,x,y
0,2016-01-01,Low,,0.0,0.222001
2,2016-01-03,High,,2.0,0.715747
5,2016-01-06,Medium,,5.0,0.929979
7,2016-01-08,High,,7.0,0.447894
8,2016-01-09,Medium,,8.0,0.638014
9,2016-01-10,Medium,,9.0,0.063358
10,2016-01-11,Low,,10.0,0.601869


In [22]:
#Reindex to Align with Other Objects
#You may wish to take an object and reindex its axes to be 
#labeled the same as another object.

df1 = pd.DataFrame(np.random.randn(10,3),columns=['col1','col2','col3'])
df2 = pd.DataFrame(np.random.randn(7,3),columns=['col1','col2','col3'])

df1 = df1.reindex_like(df2)
df1
#Note − Here, the df1 DataFrame is altered and reindexed like df2. The 
#column names should be matched or else NAN will be added for the 
#entire column label.
#Also note that # of rows is now same as df2

Unnamed: 0,col1,col2,col3
0,-0.103311,0.701785,-0.693445
1,-1.120773,-0.026215,1.556497
2,0.295748,0.309299,1.322607
3,-0.765473,1.45032,0.277289
4,-0.188495,0.455449,-0.765019
5,1.104974,-0.453515,0.244623
6,1.052556,-1.539519,0.097856


In [29]:
#Filling while ReIndexing
#reindex() takes an optional parameter method which is a 
#filling method with values as follows −

#   pad/ffill − Fill values forward
#   bfill/backfill − Fill values backward
#   nearest − Fill from the nearest index values

df1 = pd.DataFrame(np.random.randn(6,3),columns=['col1','col2','col3'])
df2 = pd.DataFrame(np.random.randn(2,3),columns=['col1','col2','col3'])

# Padding NAN's
df2.reindex_like(df1)
#df2.reindex_like(df1,method='ffill')

Unnamed: 0,col1,col2,col3
0,-0.516223,-1.031144,-0.174424
1,-1.407283,0.988463,0.853809
2,,,
3,,,
4,,,
5,,,


In [27]:
# Now Fill the NAN's with preceding Values
print ("Data Frame with Forward Fill:")
print (df2.reindex_like(df1,method='ffill'))
#Note − The last four rows are padded.

Data Frame with Forward Fill:
       col1      col2      col3
0 -1.476632 -0.474715  0.455691
1 -1.157286 -1.039969  0.039157
2 -1.157286 -1.039969  0.039157
3 -1.157286 -1.039969  0.039157
4 -1.157286 -1.039969  0.039157
5 -1.157286 -1.039969  0.039157


In [31]:
#Limits on Filling while Reindexing
#The limit argument provides additional control over filling 
#while reindexing. Limit specifies the maximum count of consecutive 
#matches. 
 
df1 = pd.DataFrame(np.random.randn(6,3),columns=['col1','col2','col3'])
df2 = pd.DataFrame(np.random.randn(2,3),columns=['col1','col2','col3'])

# Padding NAN's
df2.reindex_like(df1)

Unnamed: 0,col1,col2,col3
0,0.72343,0.188471,0.040334
1,-0.420147,0.479378,2.2243
2,,,
3,,,
4,,,
5,,,


In [32]:
# Now Fill the NAN's with preceding Values
print ("Data Frame with Forward Fill limiting to 1:")
print (df2.reindex_like(df1,method='ffill',limit=1))
#Note − Observe, only the 7th row is filled by the preceding 6th row.
#Then, the rows are left as they are.

Data Frame with Forward Fill limiting to 1:
       col1      col2      col3
0  0.723430  0.188471  0.040334
1 -0.420147  0.479378  2.224300
2 -0.420147  0.479378  2.224300
3       NaN       NaN       NaN
4       NaN       NaN       NaN
5       NaN       NaN       NaN


In [33]:
#Renaming
#The rename() method allows you to relabel an axis based on some 
#mapping (a dict or Series) or an arbitrary function.
df1 = pd.DataFrame(np.random.randn(6,3),columns=['col1','col2','col3'])
df1



Unnamed: 0,col1,col2,col3
0,-0.054883,-0.877456,-0.57094
1,-1.95007,-0.657739,0.512889
2,-0.842349,-1.23703,-0.003818
3,2.22463,0.385197,-2.251532
4,-0.879922,0.420647,-1.799155
5,-0.723534,-0.642503,0.157744


In [35]:
print ("After renaming the rows and columns:")
print (df1.rename(columns={'col1' : 'c1', 'col2' : 'c2'}, 
                  index = {0 : 'apple', 1 : 'banana', 2 : 'durian'}))

#The rename() method provides an inplace named parameter, 
#which by default is False and copies the underlying data. 
#Pass inplace=True to rename the data in place

After renaming the rows and columns:
              c1        c2      col3
apple  -0.054883 -0.877456 -0.570940
banana -1.950070 -0.657739  0.512889
durian -0.842349 -1.237030 -0.003818
3       2.224630  0.385197 -2.251532
4      -0.879922  0.420647 -1.799155
5      -0.723534 -0.642503  0.157744
