## Hierarchical Indexing

In [1]:
import numpy as np 
import pandas as pd 

In [None]:
## Multi-indexing 
data = pd.DataFrame([[1,2],[3,4],[5,6],[7,8],[9,10]],
                   index = [[1,1,2,2,3],
                            ['a','b','c','d','e']],
                   columns = ['c1', 'c2'])
data
data.loc[1,'a'] # But how do I select two levels + col? 
data.loc[1,'a'].c1 # Ok 

## Selection of an inner level does not work for me though 
data.c1.loc[:,'a'] # have to decompose to series (type(data.c1) returns series) to enter inner index

data.unstack()

## Summary stats by level
## Uses pandas group by operations 
frame = pd.DataFrame(np.arange(12).reshape((4, 3)),index=[['a', 'a', 'b', 'b'], [1, 2, 1, 2]],
    columns=[['Ohio', 'Ohio', 'Colorado'],
    ['Green', 'Red', 'Green']])
frame.sum(level=0, axis = 1) # Level is an integer, not a character matching the index


## Indexing using a dataframe's columns 
frame = pd.DataFrame({'a': range(7), 'b': range(7, 0, -1),
    'c': ['one', 'one', 'one', 'two', 'two',
    'two', 'two'],
    'd':[0,1,2,0,1,2,3]})

frame.set_index(['c', 'd']) # Columns are removed by default - with drop argument 

## Combining and Merging Datasets 


In [68]:
df1 = pd.DataFrame({'key': ['b', 'b', 'a', 'c', 'a', 'a', 'b'],
                    'data1': range(7)})
df2 = pd.DataFrame({'key': ['a', 'b', 'd'],
    'data2': range(3)})

pd.merge(df1, df2, on='key').reindex(columns = ['key', 'data1', 'data2'])

## Can use left_on, right_on args if the key column name is different. 
## @note: Can also merge using an index - using the left_index or right_index args of merge or join() function

## Concatinating along an axis
arr = np.arange(12).reshape((3,4)) #@note: don't forget, reshapre takes tuple. - takes int or tuple of ints. 
# arr2 = np.arange(12).reshape(3,4) Seems to be the same though... 

## How to concat:
## - combine using elements in axes or shared intersection values 
## - does concatenated data need to be identified? 
## - Do we preserve concatenation axis? 

s1 = pd.Series([0,1], index = ['a','b'])
s2 = pd.Series([2,3,4], index = ['c','d','e'])
s3 = pd.Series([5,6], index = ['f','g'])

pd.concat([s1,s2,s3])  # regular concat, like rbind
pd.concat([s1,s2,s3], axis = 1) # cbind - USING INDEX! 

s4 = pd.concat([s1, s3])
pd.concat([s1, s4], axis=1) # Without specifying join axis 
pd.concat([s1, s4], axis=1, join_axes=[['a', 'c', 'b', 'e']]) # Spec. join axis - will only return rrows w/ ind that was spec

## How to identify concatenated pieces in the result - use hierarchical indexing! 
result = pd.concat([s1,s2,s3], axis = 0, keys=['uno','dos','tres'])
result.unstack()

## Now concat dataframes: 
df1 = pd.DataFrame(np.arange(6).reshape(3, 2), index=['a', 'b', 'c'],
                   columns=['one', 'two'])
df2 = pd.DataFrame(5 + np.arange(4).reshape(2, 2), index=['a', 'c'],
                   columns=['three', 'four'])
pd.concat([df1, df2], axis = 1, keys = ['l1','l2'])

## Can also pass dict, with key: value pair being new indices and dataframes respectively 
pd.concat({'l1':df1, 'l2':df2}, axis = 1)



Unnamed: 0_level_0,l1,l1,l2,l2
Unnamed: 0_level_1,one,two,three,four
a,0,1,5.0,6.0
b,2,3,,
c,4,5,7.0,8.0


## Reshaping

In [6]:
## Stacking and unstacking - reshaping with hier. indexing 
data = pd.DataFrame(np.arange(6).reshape((2,3)), 
                   index = pd.Index(['OH', 'CO'], name = 'state'),
                   columns = pd.Index(['one', 'two', 'three'], name = 'number'))
result = data.stack()
result.unstack('state') # Now state is in the columns. 

## Pivoting long to wide - with pivot 
## Melting wide to long - with melt 
