In [1]:
import pandas as pd
import numpy as np

## stack 

    데이터의 칼럼을 로우로 피벗 또는 회전을 시킨다.
    

In [42]:
help(pd.DataFrame.stack)

Help on function stack in module pandas.core.frame:

stack(self, level=-1, dropna=True)
    Pivot a level of the (possibly hierarchical) column labels, returning a
    DataFrame (or Series in the case of an object with a single level of
    column labels) having a hierarchical index with a new inner-most level
    of row labels.
    The level involved will automatically get sorted.
    
    Parameters
    ----------
    level : int, string, or list of these, default last level
        Level(s) to stack, can pass level name
    dropna : boolean, default True
        Whether to drop rows in the resulting Frame/Series with no valid
        values
    
    Examples
    ----------
    >>> s
         a   b
    one  1.  2.
    two  3.  4.
    
    >>> s.stack()
    one a    1
        b    2
    two a    3
        b    4
    
    Returns
    -------
    stacked : DataFrame or Series



In [3]:
data = pd.DataFrame(np.arange(6).reshape((2,3)), 
                    index=pd.Index(['Ohio','Colorado'],name='state'),
                    columns=pd.Index(['one','two','three'],name='number')
                    )

In [4]:
data

number,one,two,three
state,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Ohio,0,1,2
Colorado,3,4,5


In [5]:
result = data.stack()

In [6]:
result

state     number
Ohio      one       0
          two       1
          three     2
Colorado  one       3
          two       4
          three     5
dtype: int64

### 색인확인하기

In [8]:
result.index

MultiIndex(levels=[['Ohio', 'Colorado'], ['one', 'two', 'three']],
           labels=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]],
           names=['state', 'number'])

## unstack

    로우를 칼럼으로 피벗시킨다.

In [18]:
help(result.unstack)

Help on method unstack in module pandas.core.series:

unstack(level=-1, fill_value=None) method of pandas.core.series.Series instance
    Unstack, a.k.a. pivot, Series with MultiIndex to produce DataFrame.
    The level involved will automatically get sorted.
    
    Parameters
    ----------
    level : int, string, or list of these, default last level
        Level(s) to unstack, can pass level name
    fill_value : replace NaN with this value if the unstack produces
        missing values
    
        .. versionadded: 0.18.0
    
    Examples
    --------
    >>> s = pd.Series([1, 2, 3, 4],
    ...     index=pd.MultiIndex.from_product([['one', 'two'], ['a', 'b']]))
    >>> s
    one  a    1
         b    2
    two  a    3
         b    4
    dtype: int64
    
    >>> s.unstack(level=-1)
         a  b
    one  1  2
    two  3  4
    
    >>> s.unstack(level=0)
       one  two
    a    1    3
    b    2    4
    
    Returns
    -------
    unstacked : DataFrame



###  멀티 색인된 시리즈를 다시 데이터프레임으로 처리

In [10]:
reset = result.unstack()

In [15]:
reset

number,one,two,three
state,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Ohio,0,1,2
Colorado,3,4,5


In [13]:
type(reset)

pandas.core.frame.DataFrame

In [11]:
reset.index

Index(['Ohio', 'Colorado'], dtype='object', name='state')

In [12]:
reset.columns

Index(['one', 'two', 'three'], dtype='object', name='number')

### unstack 처리


In [33]:
result = data.stack()

In [34]:
result

state     number
Ohio      one       0
          two       1
          three     2
Colorado  one       3
          two       4
          three     5
dtype: int64

### 멀티 인덱스의 레벨 첫번째로 처리

In [35]:
result.unstack('state')

state,Ohio,Colorado
number,Unnamed: 1_level_1,Unnamed: 2_level_1
one,0,3
two,1,4
three,2,5


In [14]:
result.unstack(0)

state,Ohio,Colorado
number,Unnamed: 1_level_1,Unnamed: 2_level_1
one,0,3
two,1,4
three,2,5


### 멀티 인덱스의 두번째로 처리

In [37]:
result.unstack('number')

number,one,two,three
state,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Ohio,0,1,2
Colorado,3,4,5


## 시리즈에 멀티 인덱스로 만들기

In [39]:
s1 = pd.Series([0,1,2,3], index=['a','b','c','d'])

In [20]:
s1

a    0
b    1
c    2
d    3
dtype: int64

In [21]:
s2 = pd.Series([4,5,6], index=['c','d','e'])

In [22]:
s2

c    4
d    5
e    6
dtype: int64

In [24]:
data2 = pd.concat([s1,s2],keys=['one','two'])

In [26]:
type(data2)

pandas.core.series.Series

In [27]:
data2.index

MultiIndex(levels=[['one', 'two'], ['a', 'b', 'c', 'd', 'e']],
           labels=[[0, 0, 0, 0, 1, 1, 1], [0, 1, 2, 3, 2, 3, 4]])

In [25]:
data2

one  a    0
     b    1
     c    2
     d    3
two  c    4
     d    5
     e    6
dtype: int64

## 시리즈를 데이터프레임으로 처리하기

In [28]:
data2.unstack()

Unnamed: 0,a,b,c,d,e
one,0.0,1.0,2.0,3.0,
two,,,4.0,5.0,6.0


In [29]:
data2.unstack().stack()

one  a    0.0
     b    1.0
     c    2.0
     d    3.0
two  c    4.0
     d    5.0
     e    6.0
dtype: float64

## 전체 인덱스로 생성하기

In [31]:
data2.unstack().stack(dropna= False)

one  a    0.0
     b    1.0
     c    2.0
     d    3.0
     e    NaN
two  a    NaN
     b    NaN
     c    4.0
     d    5.0
     e    6.0
dtype: float64