# Section 6: Python for Data Analysis - Pandas

# 04 - DataFrames Part III

In [33]:
##In this lecture - Multi-index dataFrames and index hierarchy 
import numpy as np 
import pandas as pd

from numpy.random import randn
np.random.seed(101)

In [34]:
##Index Levels 
outside = ['G1','G1','G1','G2','G2','G2']
inside  = [1,2,3,1,2,3]
hier_index = list(zip(outside, inside))
hier_index = pd.MultiIndex.from_tuples(hier_index) ##Not used too much during this course - Takes a list and creates
                                                   ##a MultiIndex level 

hier_index

MultiIndex([('G1', 1),
            ('G1', 2),
            ('G1', 3),
            ('G2', 1),
            ('G2', 2),
            ('G2', 3)],
           )

In [35]:
list(zip(outside, inside))

[('G1', 1), ('G1', 2), ('G1', 3), ('G2', 1), ('G2', 2), ('G2', 3)]

In [36]:
help(zip)

Help on class zip in module builtins:

class zip(object)
 |  zip(iter1 [,iter2 [...]]) --> zip object
 |  
 |  Return a zip object whose .__next__() method returns a tuple where
 |  the i-th element comes from the i-th iterable argument.  The .__next__()
 |  method continues until the shortest iterable in the argument sequence
 |  is exhausted and then it raises StopIteration.
 |  
 |  Methods defined here:
 |  
 |  __getattribute__(self, name, /)
 |      Return getattr(self, name).
 |  
 |  __iter__(self, /)
 |      Implement iter(self).
 |  
 |  __next__(self, /)
 |      Implement next(self).
 |  
 |  __reduce__(...)
 |      Return state information for pickling.
 |  
 |  ----------------------------------------------------------------------
 |  Static methods defined here:
 |  
 |  __new__(*args, **kwargs) from builtins.type
 |      Create and return a new object.  See help(type) for accurate signature.



In [37]:
df = pd.DataFrame(randn(6,2), hier_index, ['A','B'])

In [38]:
df ##This dataFrame contains an index hierarchy or multi-level index

Unnamed: 0,Unnamed: 1,A,B
G1,1,2.70685,0.628133
G1,2,0.907969,0.503826
G1,3,0.651118,-0.319318
G2,1,-0.848077,0.605965
G2,2,-2.018168,0.740122
G2,3,0.528813,-0.589001


In [39]:
##Selecting Data from a Multi-level index 
df.loc['G1'] ## Grab from outside -> Inside 

Unnamed: 0,A,B
1,2.70685,0.628133
2,0.907969,0.503826
3,0.651118,-0.319318


In [40]:
df.loc['G1'].loc[1]

A    2.706850
B    0.628133
Name: 1, dtype: float64

In [51]:
##Currently our indices have no names and thus we can assign them by changing the .index.names attributes 

df.index.names = ['Groups', 'Num'] ##Currently a Frozen List with values ['None', 'None']

In [52]:
df ##USeful for Multi-leveled index information along with a Data Frame

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B
Groups,Num,Unnamed: 2_level_1,Unnamed: 3_level_1
G1,1,2.70685,0.628133
G1,2,0.907969,0.503826
G1,3,0.651118,-0.319318
G2,1,-0.848077,0.605965
G2,2,-2.018168,0.740122
G2,3,0.528813,-0.589001


In [53]:
##Additional selection example

df.loc['G2'].loc[2]['B']

0.7401220570561068

In [54]:
##Suggested exercise -> Selecting an element from Group 1 , Name 3 and Column A #Sol. 0.651118
df.loc['G1'].loc[3]['A']

0.6511179479432686

In [55]:
##Using Cross Section - Returns a cross section of rows or columns from a Series or DataFrame
df

Unnamed: 0_level_0,Unnamed: 1_level_0,A,B
Groups,Num,Unnamed: 2_level_1,Unnamed: 3_level_1
G1,1,2.70685,0.628133
G1,2,0.907969,0.503826
G1,3,0.651118,-0.319318
G2,1,-0.848077,0.605965
G2,2,-2.018168,0.740122
G2,3,0.528813,-0.589001


In [56]:
##Selecting everything from G1 

##Using the .loc() method 
df.loc['G1']

Unnamed: 0_level_0,A,B
Num,Unnamed: 1_level_1,Unnamed: 2_level_1
1,2.70685,0.628133
2,0.907969,0.503826
3,0.651118,-0.319318


In [58]:
##Using the cross section .xs() method - Has the ability to skip a multi-level index, e.g. Grabbing num 1 from both 
##G1 and G2 -> Tricky with .loc[] 

df.xs(1, level='Num') ##This allows us to grab a cross section from the desired level

Unnamed: 0_level_0,A,B
Groups,Unnamed: 1_level_1,Unnamed: 2_level_1
G1,2.70685,0.628133
G2,-0.848077,0.605965
