## DataFrames:

#### We can think of DataFrames as a bunch of series, that share the common index. Let's explore DataFrames in detail.

In [2]:
import numpy as np
import pandas as pd

In [6]:
# lets go ahead and quickly create a random DataFrame

df = pd.DataFrame(data = np.random.randn(5,4), index = 'A B C D E'.split(), columns = 'W X Y Z'.split())

In [7]:
df

Unnamed: 0,W,X,Y,Z
A,-0.41009,0.162665,0.388813,-0.567844
B,0.443548,-0.457618,-0.617701,1.273738
C,-1.273016,0.336221,1.457579,-0.196704
D,-2.23389,-1.149824,1.914845,-0.206768
E,-0.28342,-1.084131,0.510533,1.645422


## Selections And Indexing:

In [13]:
ser = pd.Series(data = 'umair obaid junaid farhan tahir'.split())

In [14]:
ser

0     umair
1     obaid
2    junaid
3    farhan
4     tahir
dtype: object

In [17]:
ser[2] # reports back the corresponding data at 2 index number of the series

'junaid'

In [18]:
df

Unnamed: 0,W,X,Y,Z
A,-0.41009,0.162665,0.388813,-0.567844
B,0.443548,-0.457618,-0.617701,1.273738
C,-1.273016,0.336221,1.457579,-0.196704
D,-2.23389,-1.149824,1.914845,-0.206768
E,-0.28342,-1.084131,0.510533,1.645422


In [20]:
df['W'] # reports back 'W' column from DataFrame

A   -0.410090
B    0.443548
C   -1.273016
D   -2.233890
E   -0.283420
Name: W, dtype: float64

In [23]:
df['X']  # reports back the 'X' column from DataFrame

# *notice we have index common for every column

A    0.162665
B   -0.457618
C    0.336221
D   -1.149824
E   -1.084131
Name: X, dtype: float64

In [24]:
# we can also pass a list of column names

df[['W','Y']]

Unnamed: 0,W,Y
A,-0.41009,0.388813
B,0.443548,-0.617701
C,-1.273016,1.457579
D,-2.23389,1.914845
E,-0.28342,0.510533


In [27]:
df[['W','Z']]

Unnamed: 0,W,Z
A,-0.41009,-0.567844
B,0.443548,1.273738
C,-1.273016,-0.196704
D,-2.23389,-0.206768
E,-0.28342,1.645422


In [30]:
df[['W','Y','Z']]

Unnamed: 0,W,Y,Z
A,-0.41009,0.388813,-0.567844
B,0.443548,-0.617701,1.273738
C,-1.273016,1.457579,-0.196704
D,-2.23389,1.914845,-0.206768
E,-0.28342,0.510533,1.645422


In [36]:
# SQL style syntax

df.W  # not recommended

A   -0.410090
B    0.443548
C   -1.273016
D   -2.233890
E   -0.283420
Name: W, dtype: float64

In [38]:
type(df['W'])  # reports back that its a pandas series

pandas.core.series.Series

In [40]:
type(df[['W','X']])  # reports back its a pandas DataFrame

pandas.core.frame.DataFrame

## Creating a new column:

In [41]:
df['Column Name'] = df['W'] + df['X']

In [51]:
df

Unnamed: 0,W,X,Y,Z,Column Name
A,-0.41009,0.162665,0.388813,-0.567844,-0.247426
B,0.443548,-0.457618,-0.617701,1.273738,-0.014069
C,-1.273016,0.336221,1.457579,-0.196704,-0.936795
D,-2.23389,-1.149824,1.914845,-0.206768,-3.383714
E,-0.28342,-1.084131,0.510533,1.645422,-1.367551


In [52]:
df['Other'] = df['Y'] + df['Z']

In [53]:
df

Unnamed: 0,W,X,Y,Z,Column Name,Other
A,-0.41009,0.162665,0.388813,-0.567844,-0.247426,-0.179032
B,0.443548,-0.457618,-0.617701,1.273738,-0.014069,0.656037
C,-1.273016,0.336221,1.457579,-0.196704,-0.936795,1.260875
D,-2.23389,-1.149824,1.914845,-0.206768,-3.383714,1.708077
E,-0.28342,-1.084131,0.510533,1.645422,-1.367551,2.155955


## Removing columns:

In [54]:
df.drop('Column Name', axis = 1)

Unnamed: 0,W,X,Y,Z,Other
A,-0.41009,0.162665,0.388813,-0.567844,-0.179032
B,0.443548,-0.457618,-0.617701,1.273738,0.656037
C,-1.273016,0.336221,1.457579,-0.196704,1.260875
D,-2.23389,-1.149824,1.914845,-0.206768,1.708077
E,-0.28342,-1.084131,0.510533,1.645422,2.155955


In [60]:
# df.drop('Column Name', axis = 1, inplace = True) # inplace = True removes the corresponding column permanently

In [61]:
df

Unnamed: 0,W,X,Y,Z,Other
A,-0.41009,0.162665,0.388813,-0.567844,-0.179032
B,0.443548,-0.457618,-0.617701,1.273738,0.656037
C,-1.273016,0.336221,1.457579,-0.196704,1.260875
D,-2.23389,-1.149824,1.914845,-0.206768,1.708077
E,-0.28342,-1.084131,0.510533,1.645422,2.155955


In [62]:
# can also drop rows this way

df.drop('E', axis = 0) # axis = 0,1 for rows,columns

Unnamed: 0,W,X,Y,Z,Other
A,-0.41009,0.162665,0.388813,-0.567844,-0.179032
B,0.443548,-0.457618,-0.617701,1.273738,0.656037
C,-1.273016,0.336221,1.457579,-0.196704,1.260875
D,-2.23389,-1.149824,1.914845,-0.206768,1.708077


In [64]:
df.loc['B'] # grabs the row at index 'B'

W        0.443548
X       -0.457618
Y       -0.617701
Z        1.273738
Other    0.656037
Name: B, dtype: float64

In [66]:
df.iloc[1] # reports back the row at index location 1 --> iloc = index location

W        0.443548
X       -0.457618
Y       -0.617701
Z        1.273738
Other    0.656037
Name: B, dtype: float64

In [67]:
# once more time

# to grab a column we have

df['W']

A   -0.410090
B    0.443548
C   -1.273016
D   -2.233890
E   -0.283420
Name: W, dtype: float64

In [68]:
# to grab a list of columns we have

df[['W','X']]

Unnamed: 0,W,X
A,-0.41009,0.162665
B,0.443548,-0.457618
C,-1.273016,0.336221
D,-2.23389,-1.149824
E,-0.28342,-1.084131


In [69]:
# to drop a column we have

df.drop('W',axis = 1)

Unnamed: 0,X,Y,Z,Other
A,0.162665,0.388813,-0.567844,-0.179032
B,-0.457618,-0.617701,1.273738,0.656037
C,0.336221,1.457579,-0.196704,1.260875
D,-1.149824,1.914845,-0.206768,1.708077
E,-1.084131,0.510533,1.645422,2.155955


In [70]:
# to drop a list of columns we have

df.drop(['X','Y'], axis = 1)

Unnamed: 0,W,Z,Other
A,-0.41009,-0.567844,-0.179032
B,0.443548,1.273738,0.656037
C,-1.273016,-0.196704,1.260875
D,-2.23389,-0.206768,1.708077
E,-0.28342,1.645422,2.155955


In [71]:
# to grab a row we have

df.loc['B']

W        0.443548
X       -0.457618
Y       -0.617701
Z        1.273738
Other    0.656037
Name: B, dtype: float64

In [72]:
# again

df.loc['C']

W       -1.273016
X        0.336221
Y        1.457579
Z       -0.196704
Other    1.260875
Name: C, dtype: float64

In [74]:
# we can also grab rows from index locations

df.iloc[2]

W       -1.273016
X        0.336221
Y        1.457579
Z       -0.196704
Other    1.260875
Name: C, dtype: float64

In [75]:
df.iloc[3]

W       -2.233890
X       -1.149824
Y        1.914845
Z       -0.206768
Other    1.708077
Name: D, dtype: float64

In [77]:
# to grab list of rows we have

df.loc[['E','D']]


Unnamed: 0,W,X,Y,Z,Other
E,-0.28342,-1.084131,0.510533,1.645422,2.155955
D,-2.23389,-1.149824,1.914845,-0.206768,1.708077


In [79]:
# to grab a list of row using index location

df.iloc[[2,3]]

Unnamed: 0,W,X,Y,Z,Other
C,-1.273016,0.336221,1.457579,-0.196704,1.260875
D,-2.23389,-1.149824,1.914845,-0.206768,1.708077


In [80]:
# to drop a row

df.drop('E', axis = 0)

Unnamed: 0,W,X,Y,Z,Other
A,-0.41009,0.162665,0.388813,-0.567844,-0.179032
B,0.443548,-0.457618,-0.617701,1.273738,0.656037
C,-1.273016,0.336221,1.457579,-0.196704,1.260875
D,-2.23389,-1.149824,1.914845,-0.206768,1.708077


In [81]:
# to drop list of rows

df.drop(['E','D'], axis = 0)

Unnamed: 0,W,X,Y,Z,Other
A,-0.41009,0.162665,0.388813,-0.567844,-0.179032
B,0.443548,-0.457618,-0.617701,1.273738,0.656037
C,-1.273016,0.336221,1.457579,-0.196704,1.260875


In [83]:
# grab a row

df.loc['E']

W       -0.283420
X       -1.084131
Y        0.510533
Z        1.645422
Other    2.155955
Name: E, dtype: float64

In [84]:
# grab a column

df['W']

A   -0.410090
B    0.443548
C   -1.273016
D   -2.233890
E   -0.283420
Name: W, dtype: float64

In [85]:
# grab a row at index 3

df.iloc[3]

W       -2.233890
X       -1.149824
Y        1.914845
Z       -0.206768
Other    1.708077
Name: D, dtype: float64

In [86]:
# grab a list of rows

df.loc[['E','D']]

Unnamed: 0,W,X,Y,Z,Other
E,-0.28342,-1.084131,0.510533,1.645422,2.155955
D,-2.23389,-1.149824,1.914845,-0.206768,1.708077


In [87]:
# grab a list of columns

df[['W','X']]

Unnamed: 0,W,X
A,-0.41009,0.162665
B,0.443548,-0.457618
C,-1.273016,0.336221
D,-2.23389,-1.149824
E,-0.28342,-1.084131


## Selecting subsets of rows and columns:

In [88]:
df.loc['B','Y'] # 'B' represents row 'B' and 'Y' represents columns 'Y'

-0.6177010607121989

In [90]:
df # just for confirmation

Unnamed: 0,W,X,Y,Z,Other
A,-0.41009,0.162665,0.388813,-0.567844,-0.179032
B,0.443548,-0.457618,-0.617701,1.273738,0.656037
C,-1.273016,0.336221,1.457579,-0.196704,1.260875
D,-2.23389,-1.149824,1.914845,-0.206768,1.708077
E,-0.28342,-1.084131,0.510533,1.645422,2.155955


In [91]:
# similarly 

df.columns

Index(['W', 'X', 'Y', 'Z', 'Other'], dtype='object')

In [92]:
# lets change the tagging of columns in order to better understand what we are gonna do with our DataFrame

In [94]:
df.columns = [1,2,3,4,5]

In [95]:
df

Unnamed: 0,1,2,3,4,5
A,-0.41009,0.162665,0.388813,-0.567844,-0.179032
B,0.443548,-0.457618,-0.617701,1.273738,0.656037
C,-1.273016,0.336221,1.457579,-0.196704,1.260875
D,-2.23389,-1.149824,1.914845,-0.206768,1.708077
E,-0.28342,-1.084131,0.510533,1.645422,2.155955


In [96]:
df.head()

Unnamed: 0,1,2,3,4,5
A,-0.41009,0.162665,0.388813,-0.567844,-0.179032
B,0.443548,-0.457618,-0.617701,1.273738,0.656037
C,-1.273016,0.336221,1.457579,-0.196704,1.260875
D,-2.23389,-1.149824,1.914845,-0.206768,1.708077
E,-0.28342,-1.084131,0.510533,1.645422,2.155955


In [98]:
# now lets grab the chunks of values from our DataFrame

# row --> D, column --> 4

df.loc['D',4]  # reports back the above mentioned value

-0.20676825721008243

In [99]:
# row -> E, column -> 2

df.loc['E',2]

-1.084131487310038

In [102]:
type(df.loc['D',1])  # float integer of 64 bits

numpy.float64

In [103]:
# now we want to grab chunks of our DataFrame rather than individual values

df.loc[['A','B'],[1,2]] # reports back rows --> A,B and columns --> 1,2

Unnamed: 0,1,2
A,-0.41009,0.162665
B,0.443548,-0.457618


In [104]:
df

Unnamed: 0,1,2,3,4,5
A,-0.41009,0.162665,0.388813,-0.567844,-0.179032
B,0.443548,-0.457618,-0.617701,1.273738,0.656037
C,-1.273016,0.336221,1.457579,-0.196704,1.260875
D,-2.23389,-1.149824,1.914845,-0.206768,1.708077
E,-0.28342,-1.084131,0.510533,1.645422,2.155955


In [105]:
# lets grab another DataFrame chunk

# rows --> B,C
# columns --> 2,3

df.loc[['B','C'],[2,3]]

Unnamed: 0,2,3
B,-0.457618,-0.617701
C,0.336221,1.457579


In [106]:
df

Unnamed: 0,1,2,3,4,5
A,-0.41009,0.162665,0.388813,-0.567844,-0.179032
B,0.443548,-0.457618,-0.617701,1.273738,0.656037
C,-1.273016,0.336221,1.457579,-0.196704,1.260875
D,-2.23389,-1.149824,1.914845,-0.206768,1.708077
E,-0.28342,-1.084131,0.510533,1.645422,2.155955


In [107]:
# rows --> B,D
# columns --> 2,4

df.loc[['B','D'],[2,4]]

Unnamed: 0,2,4
B,-0.457618,1.273738
D,-1.149824,-0.206768


In [109]:
df.iloc[2,3]

-0.19670448466085977

In [112]:
df.iloc[[2,3],[3,4]]  # reports back corresponding indexed rows and colmns

Unnamed: 0,4,5
C,-0.196704,1.260875
D,-0.206768,1.708077


## Conditional selections:

In [113]:
# selections using conditional operators

df

Unnamed: 0,1,2,3,4,5
A,-0.41009,0.162665,0.388813,-0.567844,-0.179032
B,0.443548,-0.457618,-0.617701,1.273738,0.656037
C,-1.273016,0.336221,1.457579,-0.196704,1.260875
D,-2.23389,-1.149824,1.914845,-0.206768,1.708077
E,-0.28342,-1.084131,0.510533,1.645422,2.155955


In [118]:
df > 0 # reports back boolean results for Ture and False

Unnamed: 0,1,2,3,4,5
A,False,True,True,False,False
B,True,False,False,True,True
C,False,True,True,False,True
D,False,False,True,False,True
E,False,False,True,True,True


In [119]:
df[df > 0] # reports back actual values, where conditions is True, else NaN

Unnamed: 0,1,2,3,4,5
A,,0.162665,0.388813,,
B,0.443548,,,1.273738,0.656037
C,,0.336221,1.457579,,1.260875
D,,,1.914845,,1.708077
E,,,0.510533,1.645422,2.155955


In [120]:
df < 0

Unnamed: 0,1,2,3,4,5
A,True,False,False,True,True
B,False,True,True,False,False
C,True,False,False,True,False
D,True,True,False,True,False
E,True,True,False,False,False


In [121]:
df[df < 0]

Unnamed: 0,1,2,3,4,5
A,-0.41009,,,-0.567844,-0.179032
B,,-0.457618,-0.617701,,
C,-1.273016,,,-0.196704,
D,-2.23389,-1.149824,,-0.206768,
E,-0.28342,-1.084131,,,


In [122]:
df > 1

Unnamed: 0,1,2,3,4,5
A,False,False,False,False,False
B,False,False,False,True,False
C,False,False,True,False,True
D,False,False,True,False,True
E,False,False,False,True,True


In [123]:
df[df > 1]

Unnamed: 0,1,2,3,4,5
A,,,,,
B,,,,1.273738,
C,,,1.457579,,1.260875
D,,,1.914845,,1.708077
E,,,,1.645422,2.155955


In [127]:
# we can also select values for particular columns or rows

# for column --> 1

df[df[1]>0]

Unnamed: 0,1,2,3,4,5
B,0.443548,-0.457618,-0.617701,1.273738,0.656037


In [128]:
df[1] > 0

A    False
B     True
C    False
D    False
E    False
Name: 1, dtype: bool

In [133]:
df.columns = 'V W X Y Z'.split()

In [134]:
df

Unnamed: 0,V,W,X,Y,Z
A,-0.41009,0.162665,0.388813,-0.567844,-0.179032
B,0.443548,-0.457618,-0.617701,1.273738,0.656037
C,-1.273016,0.336221,1.457579,-0.196704,1.260875
D,-2.23389,-1.149824,1.914845,-0.206768,1.708077
E,-0.28342,-1.084131,0.510533,1.645422,2.155955


In [135]:
df['W'] > 0

A     True
B    False
C     True
D    False
E    False
Name: W, dtype: bool

In [137]:
df[df['W'] > 0]  # reports back values where 'W' column is greater than 0

Unnamed: 0,V,W,X,Y,Z
A,-0.41009,0.162665,0.388813,-0.567844,-0.179032
C,-1.273016,0.336221,1.457579,-0.196704,1.260875


In [139]:
df[df['X'] > 1]

Unnamed: 0,V,W,X,Y,Z
C,-1.273016,0.336221,1.457579,-0.196704,1.260875
D,-2.23389,-1.149824,1.914845,-0.206768,1.708077


In [140]:
df

Unnamed: 0,V,W,X,Y,Z
A,-0.41009,0.162665,0.388813,-0.567844,-0.179032
B,0.443548,-0.457618,-0.617701,1.273738,0.656037
C,-1.273016,0.336221,1.457579,-0.196704,1.260875
D,-2.23389,-1.149824,1.914845,-0.206768,1.708077
E,-0.28342,-1.084131,0.510533,1.645422,2.155955


In [143]:
df[df['Y'] < 0] # reports back DataFrame where column 'Y' is less then '0'

Unnamed: 0,V,W,X,Y,Z
A,-0.41009,0.162665,0.388813,-0.567844,-0.179032
C,-1.273016,0.336221,1.457579,-0.196704,1.260875
D,-2.23389,-1.149824,1.914845,-0.206768,1.708077


In [144]:
df[df['Z'] > 0]  # reports back the DataFrame where column 'Z' is greater then '0'

Unnamed: 0,V,W,X,Y,Z
B,0.443548,-0.457618,-0.617701,1.273738,0.656037
C,-1.273016,0.336221,1.457579,-0.196704,1.260875
D,-2.23389,-1.149824,1.914845,-0.206768,1.708077
E,-0.28342,-1.084131,0.510533,1.645422,2.155955


In [145]:
df

Unnamed: 0,V,W,X,Y,Z
A,-0.41009,0.162665,0.388813,-0.567844,-0.179032
B,0.443548,-0.457618,-0.617701,1.273738,0.656037
C,-1.273016,0.336221,1.457579,-0.196704,1.260875
D,-2.23389,-1.149824,1.914845,-0.206768,1.708077
E,-0.28342,-1.084131,0.510533,1.645422,2.155955


In [147]:
df[df['V'] < 1] # reports back the DataFrame where column 'V' is less then '1'

Unnamed: 0,V,W,X,Y,Z
A,-0.41009,0.162665,0.388813,-0.567844,-0.179032
B,0.443548,-0.457618,-0.617701,1.273738,0.656037
C,-1.273016,0.336221,1.457579,-0.196704,1.260875
D,-2.23389,-1.149824,1.914845,-0.206768,1.708077
E,-0.28342,-1.084131,0.510533,1.645422,2.155955


In [148]:
# Grabbing the values of one column with respect to other column

df[df['W'] > 0]['Y'] # reports back values of column 'Y' where column 'W' is greater then '0'

A   -0.567844
C   -0.196704
Name: Y, dtype: float64

In [149]:
df[df['X'] < 1]['Z'] # reports back the values of column 'Z' where column 'X' is less then '1'

A   -0.179032
B    0.656037
E    2.155955
Name: Z, dtype: float64

In [151]:
# similarly we can also pass in the list of columns

# like

df[df['Y'] > 0][['V','Z']] # reports back the values of columns 'V' and 'Z' where column 'Y' is greater then '0'

Unnamed: 0,V,Z
B,0.443548,0.656037
E,-0.28342,2.155955


In [154]:
df[df['W'] < 1][['X','Y']]

Unnamed: 0,X,Y
A,0.388813,-0.567844
B,-0.617701,1.273738
C,1.457579,-0.196704
D,1.914845,-0.206768
E,0.510533,1.645422


In [155]:
df[df['X'] > 0][['Y','Z']] 

Unnamed: 0,Y,Z
A,-0.567844,-0.179032
C,-0.196704,1.260875
D,-0.206768,1.708077
E,1.645422,2.155955


In [156]:
# for two conditions we can use '&' and '|' operators

In [159]:
df[(df['W'] > 0) & (df['Y'] < 1)] # will report back the DataFrame where, column 'W' > 0 AND column 'Y' < 1

Unnamed: 0,V,W,X,Y,Z
A,-0.41009,0.162665,0.388813,-0.567844,-0.179032
C,-1.273016,0.336221,1.457579,-0.196704,1.260875


In [158]:
df

Unnamed: 0,V,W,X,Y,Z
A,-0.41009,0.162665,0.388813,-0.567844,-0.179032
B,0.443548,-0.457618,-0.617701,1.273738,0.656037
C,-1.273016,0.336221,1.457579,-0.196704,1.260875
D,-2.23389,-1.149824,1.914845,-0.206768,1.708077
E,-0.28342,-1.084131,0.510533,1.645422,2.155955


In [163]:
df[(df['W'] > 0) & (df['Z'] < 0)] # reports back the DataFrame where column 'W' > 0 AND column 'Z' < 0

Unnamed: 0,V,W,X,Y,Z
A,-0.41009,0.162665,0.388813,-0.567844,-0.179032


In [166]:
df[(df['V'] < 0) | (df['Z'] > 1)]

Unnamed: 0,V,W,X,Y,Z
A,-0.41009,0.162665,0.388813,-0.567844,-0.179032
C,-1.273016,0.336221,1.457579,-0.196704,1.260875
D,-2.23389,-1.149824,1.914845,-0.206768,1.708077
E,-0.28342,-1.084131,0.510533,1.645422,2.155955


In [168]:
df[(df['V'] > 1) & (df['X'] > 1)]  # reports back the DataFrame where column 'V' > 1 AND column 'X' > 1

Unnamed: 0,V,W,X,Y,Z


## More indexing details:

In [169]:
df

Unnamed: 0,V,W,X,Y,Z
A,-0.41009,0.162665,0.388813,-0.567844,-0.179032
B,0.443548,-0.457618,-0.617701,1.273738,0.656037
C,-1.273016,0.336221,1.457579,-0.196704,1.260875
D,-2.23389,-1.149824,1.914845,-0.206768,1.708077
E,-0.28342,-1.084131,0.510533,1.645422,2.155955


In [170]:
# reset index to default 0,1,2,..

df.reset_index()

Unnamed: 0,index,V,W,X,Y,Z
0,A,-0.41009,0.162665,0.388813,-0.567844,-0.179032
1,B,0.443548,-0.457618,-0.617701,1.273738,0.656037
2,C,-1.273016,0.336221,1.457579,-0.196704,1.260875
3,D,-2.23389,-1.149824,1.914845,-0.206768,1.708077
4,E,-0.28342,-1.084131,0.510533,1.645422,2.155955


In [171]:
df.reset_index()

Unnamed: 0,index,V,W,X,Y,Z
0,A,-0.41009,0.162665,0.388813,-0.567844,-0.179032
1,B,0.443548,-0.457618,-0.617701,1.273738,0.656037
2,C,-1.273016,0.336221,1.457579,-0.196704,1.260875
3,D,-2.23389,-1.149824,1.914845,-0.206768,1.708077
4,E,-0.28342,-1.084131,0.510533,1.645422,2.155955


In [178]:
# Adding new index

new_ind = 'KHI LHR PSW RWP ISB'.split()

In [173]:
df['Cities'] = new_ind

In [177]:
df.set_index('Cities')

Unnamed: 0_level_0,V,W,X,Y,Z
Cities,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
KHI,-0.41009,0.162665,0.388813,-0.567844,-0.179032
LHR,0.443548,-0.457618,-0.617701,1.273738,0.656037
PSW,-1.273016,0.336221,1.457579,-0.196704,1.260875
RWP,-2.23389,-1.149824,1.914845,-0.206768,1.708077
ISB,-0.28342,-1.084131,0.510533,1.645422,2.155955


In [179]:
# lets say we want to set column 'V' as index

df.set_index('V')

Unnamed: 0_level_0,W,X,Y,Z,Cities
V,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
-0.41009,0.162665,0.388813,-0.567844,-0.179032,KHI
0.443548,-0.457618,-0.617701,1.273738,0.656037,LHR
-1.273016,0.336221,1.457579,-0.196704,1.260875,PSW
-2.23389,-1.149824,1.914845,-0.206768,1.708077,RWP
-0.28342,-1.084131,0.510533,1.645422,2.155955,ISB


In [180]:
# againg setting Cities column as index..

df.set_index('Cities')

Unnamed: 0_level_0,V,W,X,Y,Z
Cities,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
KHI,-0.41009,0.162665,0.388813,-0.567844,-0.179032
LHR,0.443548,-0.457618,-0.617701,1.273738,0.656037
PSW,-1.273016,0.336221,1.457579,-0.196704,1.260875
RWP,-2.23389,-1.149824,1.914845,-0.206768,1.708077
ISB,-0.28342,-1.084131,0.510533,1.645422,2.155955


In [181]:
df.set_index('Cities', inplace = True)

In [182]:
df

Unnamed: 0_level_0,V,W,X,Y,Z
Cities,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
KHI,-0.41009,0.162665,0.388813,-0.567844,-0.179032
LHR,0.443548,-0.457618,-0.617701,1.273738,0.656037
PSW,-1.273016,0.336221,1.457579,-0.196704,1.260875
RWP,-2.23389,-1.149824,1.914845,-0.206768,1.708077
ISB,-0.28342,-1.084131,0.510533,1.645422,2.155955


## Multi-Index And Index Hierarchy:

In [184]:
# lets first create a multi-index DataFrame first

outer_index = 'G1 G1 G1 G2 G2 G2'.split()
inner_index = [1,2,3,1,2,3]
hier_index = list(zip(outer_index,inner_index))

In [185]:
hier_index

[('G1', 1), ('G1', 2), ('G1', 3), ('G2', 1), ('G2', 2), ('G2', 3)]

In [186]:
hier_index = pd.MultiIndex.from_tuples(hier_index)

In [187]:
hier_index

MultiIndex(levels=[['G1', 'G2'], [1, 2, 3]],
           codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]])

In [190]:
df = pd.DataFrame(np.random.randn(6,2),index=hier_index,columns=['A','B'])

In [192]:
df

Unnamed: 0,Unnamed: 1,A,B
G1,1,-0.891374,-1.073333
G1,2,0.173286,-0.839165
G1,3,1.143653,-0.176153
G2,1,0.535412,1.956043
G2,2,-0.983348,0.0484
G2,3,0.488516,-1.415226


In [194]:
# lets create our own hier DataFrame

outer = 'O1 O1 O1 O2 O2 O2'.split()
inner = 'A B C X Y Z'.split()
hier = list(zip(outer,inner))

In [195]:
hier = pd.MultiIndex.from_tuples(hier)

In [196]:
hier

MultiIndex(levels=[['O1', 'O2'], ['A', 'B', 'C', 'X', 'Y', 'Z']],
           codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 3, 4, 5]])

In [197]:
df_1 = pd.DataFrame(data = np.random.randn(6,3), columns = 'A B C'.split(), index = hier)

In [199]:
df_1 # that's how you can create a multi index DataFrame

Unnamed: 0,Unnamed: 1,A,B,C
O1,A,-1.33238,1.224214,-0.190375
O1,B,0.161885,0.640056,-0.282391
O1,C,-0.957009,-1.391016,-0.303468
O2,X,-0.287981,0.442585,0.132143
O2,Y,-0.611567,-0.313358,-1.030987
O2,Z,-1.105768,-1.355911,1.246052


In [200]:
# now lets grab some values off of our hier_indexed DataFrame

# for rows we use df.loc
# for columns we simly grab desired column df['X']

df.loc['G1']

Unnamed: 0,A,B
1,-0.891374,-1.073333
2,0.173286,-0.839165
3,1.143653,-0.176153


In [201]:
df.loc['G2']

Unnamed: 0,A,B
1,0.535412,1.956043
2,-0.983348,0.0484
3,0.488516,-1.415226


In [202]:
df.loc['G1'].loc[2]

A    0.173286
B   -0.839165
Name: 2, dtype: float64

In [203]:
df

Unnamed: 0,Unnamed: 1,A,B
G1,1,-0.891374,-1.073333
G1,2,0.173286,-0.839165
G1,3,1.143653,-0.176153
G2,1,0.535412,1.956043
G2,2,-0.983348,0.0484
G2,3,0.488516,-1.415226


In [211]:
df.loc['G2'].loc[3] # reports back label 'G2' and row at index location 3

A    0.488516
B   -1.415226
Name: 3, dtype: float64

In [210]:
df.loc['G2']['A']  # reports back label 'G2' column 'A'

1    0.535412
2   -0.983348
3    0.488516
Name: A, dtype: float64

In [212]:
df.index.names  # Frozen list, b/c we haven't set any names for our labels yet

FrozenList([None, None])

In [215]:
df.index.names = 'Groups Numbers'.split()

In [216]:
df

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B
Groups,Numbers,Unnamed: 2_level_1,Unnamed: 3_level_1
G1,1,-0.891374,-1.073333
G1,2,0.173286,-0.839165
G1,3,1.143653,-0.176153
G2,1,0.535412,1.956043
G2,2,-0.983348,0.0484
G2,3,0.488516,-1.415226


In [217]:
df.index.names

FrozenList(['Groups', 'Numbers'])

In [218]:
df.loc['G1']

Unnamed: 0_level_0,A,B
Numbers,Unnamed: 1_level_1,Unnamed: 2_level_1
1,-0.891374,-1.073333
2,0.173286,-0.839165
3,1.143653,-0.176153


In [219]:
# we can use 'xs' instead if 'loc' as well

df.xs('G1')

Unnamed: 0_level_0,A,B
Numbers,Unnamed: 1_level_1,Unnamed: 2_level_1
1,-0.891374,-1.073333
2,0.173286,-0.839165
3,1.143653,-0.176153


In [221]:
df.xs(['G1',2]) # reports back values of label 'G1', and location index '2'

A    0.173286
B   -0.839165
Name: (G1, 2), dtype: float64

In [224]:
df.xs(['G2',1]) # reporst back values of label 'G2' and location index '1'

A    0.535412
B    1.956043
Name: (G2, 1), dtype: float64

In [225]:
df.xs(['G2',3])

A    0.488516
B   -1.415226
Name: (G2, 3), dtype: float64

In [227]:
# and we can use .loc notation as well

df.loc['G2'].loc[3]

A    0.488516
B   -1.415226
Name: 3, dtype: float64