In [1]:
import pandas as pd

## 예제 5-1 시리즈의 값 정렬 

In [2]:
obj1 = pd.Series([40,10,20,30], index=['가','다','나','라'])

In [3]:
obj1

가    40
다    10
나    20
라    30
dtype: int64

In [4]:
obj1.sort_values()

다    10
나    20
라    30
가    40
dtype: int64

In [5]:
obj1.sort_values(ascending=False)

가    40
라    30
나    20
다    10
dtype: int64

In [6]:
obj1

가    40
다    10
나    20
라    30
dtype: int64

In [7]:
obj1.argsort()

가    1
다    2
나    3
라    0
dtype: int64

In [8]:
obj1.idxmin()

'다'

In [9]:
obj1.idxmax()

'가'

## 예제 5-2 시리즈의 인덱스 정렬 

In [10]:
obj2 = pd.Series([40,10,20,30], index=['c','a','b','d'])

In [11]:
obj2

c    40
a    10
b    20
d    30
dtype: int64

In [12]:
obj2.sort_index()

a    10
b    20
c    40
d    30
dtype: int64

In [13]:
obj2.sort_index(ascending=False)

d    30
c    40
b    20
a    10
dtype: int64

## 예제 5-3 데이터프레임의 값 정렬

In [14]:
import numpy as np

In [15]:
data = np.arange(8).reshape((2,4))

In [16]:
data

array([[0, 1, 2, 3],
       [4, 5, 6, 7]])

In [17]:
frame = pd.DataFrame(data, 
                     index=['three','one'], 
                     columns=['d','a','b','c'])

In [18]:
frame

Unnamed: 0,d,a,b,c
three,0,1,2,3
one,4,5,6,7


In [19]:
frame.sort_values(by='a')

Unnamed: 0,d,a,b,c
three,0,1,2,3
one,4,5,6,7


In [20]:
frame.sort_values(by='a', ascending=False)

Unnamed: 0,d,a,b,c
one,4,5,6,7
three,0,1,2,3


In [21]:
data1 = data.astype(np.float)

In [22]:
data1[0,1] = np.nan

In [23]:
frame1 = pd.DataFrame(data1, 
                     index=['three','one'], 
                     columns=['d','a','b','c'])

In [24]:
frame1

Unnamed: 0,d,a,b,c
three,0.0,,2.0,3.0
one,4.0,5.0,6.0,7.0


In [25]:
frame1.sort_values(by='a')

Unnamed: 0,d,a,b,c
one,4.0,5.0,6.0,7.0
three,0.0,,2.0,3.0


In [26]:
frame1.sort_values(by='a',na_position='last')

Unnamed: 0,d,a,b,c
one,4.0,5.0,6.0,7.0
three,0.0,,2.0,3.0


In [27]:
frame1.sort_values(by='a',na_position='first')

Unnamed: 0,d,a,b,c
three,0.0,,2.0,3.0
one,4.0,5.0,6.0,7.0


In [28]:
frame2 = pd.DataFrame([{'가':3, '나':15, '다': 3},
                       {'가':3, '나':10, '다': 5},
                       {'가':1, '나':20, '다': 5},
                       {'가':2, '나':15, '다': 7},
                       {'가':2, '나':100,'다': 9}])

In [29]:
frame2

Unnamed: 0,가,나,다
0,3,15,3
1,3,10,5
2,1,20,5
3,2,15,7
4,2,100,9


In [30]:
frame2.sort_values(['가','나'], ascending=[False,True])

Unnamed: 0,가,나,다
1,3,10,5
0,3,15,3
3,2,15,7
4,2,100,9
2,1,20,5


## 예제 5-4 데이터프레임의  인덱스 정렬

In [31]:
frame3 = pd.DataFrame([{'가':3, '나':15, '다': 3},
                       {'가':3, '나':10, '다': 5},
                       {'가':1, '나':20, '다': 5},
                       {'가':2, '나':15, '다': 7},
                       {'가':2, '나':100,'다': 9}],
                     columns=['다','가','나'])

In [32]:
frame3

Unnamed: 0,다,가,나
0,3,3,15
1,5,3,10
2,5,1,20
3,7,2,15
4,9,2,100


In [33]:
frame3.sort_index(axis=0)

Unnamed: 0,다,가,나
0,3,3,15
1,5,3,10
2,5,1,20
3,7,2,15
4,9,2,100


In [34]:
frame2.sort_index(axis=1)

Unnamed: 0,가,나,다
0,3,15,3
1,3,10,5
2,1,20,5
3,2,15,7
4,2,100,9


In [35]:
frame2.sort_index().sort_index(axis=1)

Unnamed: 0,가,나,다
0,3,15,3
1,3,10,5
2,1,20,5
3,2,15,7
4,2,100,9


## 예제 5-5 데이터프레임의  순위 및 이동  처리

In [36]:
data1 = {'이름': ['길동', '옥주', '현웅', '주몽', '지원'], 
        '학번': [2012, 2012, 2013, 2014, 2014], 
        '과제건수': [1, 5, 2, 3, 4],
        '점수': [25, 94, 57, 62, 70]}

In [37]:
frame3 = pd.DataFrame(data1)

In [38]:
frame3

Unnamed: 0,과제건수,이름,점수,학번
0,1,길동,25,2012
1,5,옥주,94,2012
2,2,현웅,57,2013
3,3,주몽,62,2014
4,4,지원,70,2014


In [39]:
frame3['점수'].rank(ascending=False)

0    5.0
1    1.0
2    4.0
3    3.0
4    2.0
Name: 점수, dtype: float64

In [40]:
frame3['순위'] = frame3['점수'].rank(ascending=False)

In [41]:
frame3

Unnamed: 0,과제건수,이름,점수,학번,순위
0,1,길동,25,2012,5.0
1,5,옥주,94,2012,1.0
2,2,현웅,57,2013,4.0
3,3,주몽,62,2014,3.0
4,4,지원,70,2014,2.0


In [42]:
frame3.sort_values(by="순위")

Unnamed: 0,과제건수,이름,점수,학번,순위
1,5,옥주,94,2012,1.0
4,4,지원,70,2014,2.0
3,3,주몽,62,2014,3.0
2,2,현웅,57,2013,4.0
0,1,길동,25,2012,5.0


In [45]:
frame4 = frame3.sort_values(by="순위")

In [46]:
frame4.index = [0,1,2,3,4]

In [47]:
frame4

Unnamed: 0,과제건수,이름,점수,학번,순위
0,5,옥주,94,2012,1.0
1,4,지원,70,2014,2.0
2,3,주몽,62,2014,3.0
3,2,현웅,57,2013,4.0
4,1,길동,25,2012,5.0


In [62]:
data = {'가' : pd.Series([1.], index=['a']),
        '나' : pd.Series([1., 2.], index=['a', 'b']),
        '다' : pd.Series([1., 2., 3., 4.], index=['a', 'b', 'c', 'd'])}


In [63]:
df = pd.DataFrame(data)

In [64]:
df

Unnamed: 0,가,나,다
a,1.0,1.0,1.0
b,,2.0,2.0
c,,,3.0
d,,,4.0


In [65]:
df['나'] = pd.Series([3,4,5,6],index=list('abcd'),dtype='float')

In [66]:
df

Unnamed: 0,가,나,다
a,1.0,3.0,1.0
b,,4.0,2.0
c,,5.0,3.0
d,,6.0,4.0


In [67]:
df1 = df[["나", "다"]]


In [68]:
df1

Unnamed: 0,나,다
a,3.0,1.0
b,4.0,2.0
c,5.0,3.0
d,6.0,4.0


In [69]:
df1.at['a','나'] = 100

In [70]:
df1

Unnamed: 0,나,다
a,100.0,1.0
b,4.0,2.0
c,5.0,3.0
d,6.0,4.0


In [80]:
df1.at[['b','c'],['나']] = 99

In [81]:
df1

Unnamed: 0,나,다
a,,1.0
b,99.0,2.0
c,99.0,3.0
d,6.0,4.0


In [74]:
pd.set_option('chained',None)

In [82]:
a = df1.나.shift(-1)

In [83]:
a

a    99.0
b    99.0
c     6.0
d     NaN
Name: 나, dtype: float64

In [84]:
df1['나'] = a

In [86]:
a = df1.나.shift(1) 

In [87]:
a

a     NaN
b    99.0
c    99.0
d     6.0
Name: 나, dtype: float64

In [88]:
df1['나'] = a