In [25]:
import pandas as pd
import numpy as np

#### DataFrames

In [3]:
# dataframe 생성
df = pd.DataFrame({'AAA' : [4,5,6,7], 'BBB' : [10,20,30,40],'CCC' : [100,50,-30,-50]}); df

Unnamed: 0,AAA,BBB,CCC
0,4,10,100
1,5,20,50
2,6,30,-30
3,7,40,-50


In [4]:
# isin의 리스트요소에 대한(번호) 행만 true
# 해당 행번호를 가진 행이 존재하면 true
df[(df.AAA <= 6) & (df.index.isin([0,2,4]))]

Unnamed: 0,AAA,BBB,CCC
0,4,10,100
2,6,30,-30


In [11]:
#dataframe 생성
data = {'AAA' : [4,5,6,7], 'BBB' : [10,20,30,40],'CCC' : [100,50,-30,-50]}

In [15]:
#행번호 대신 명칭을 사용한 dataframe
df = pd.DataFrame(data=data,index=['foo','bar','boo','kar']);df

Unnamed: 0,AAA,BBB,CCC
foo,4,10,100
bar,5,20,50
boo,6,30,-30
kar,7,40,-50


In [16]:
#범위 지정 - kar를 포함한다.
df.loc['bar':'kar']

Unnamed: 0,AAA,BBB,CCC
bar,5,20,50
boo,6,30,-30
kar,7,40,-50


In [17]:
#범위지정
df.iloc[0:3]

Unnamed: 0,AAA,BBB,CCC
foo,4,10,100
bar,5,20,50
boo,6,30,-30


In [18]:
#범위지정
df.loc['bar':'kar']

Unnamed: 0,AAA,BBB,CCC
bar,5,20,50
boo,6,30,-30
kar,7,40,-50


In [19]:
df2 = pd.DataFrame(data=data,index=[1,2,3,4]);

In [21]:
#범위지정, iloc는 index로 지정된 index는 무시된다.
df2.iloc[1:3]

Unnamed: 0,AAA,BBB,CCC
2,5,20,50
3,6,30,-30


In [22]:
#index에 부여된 범위에 지정
df2.loc[1:3]

Unnamed: 0,AAA,BBB,CCC
1,4,10,100
2,5,20,50
3,6,30,-30


In [23]:
df = pd.DataFrame({'AAA' : [4,5,6,7], 'BBB' : [10,20,30,40], 'CCC' : [100,50,-30,-50]}); df

Unnamed: 0,AAA,BBB,CCC
0,4,10,100
1,5,20,50
2,6,30,-30
3,7,40,-50


In [24]:
# isin에 해당하는 행들중에 AAA가 6이하가 아닌것들
df[~((df.AAA <= 6) & (df.index.isin([0,2,4])))]

Unnamed: 0,AAA,BBB,CCC
1,5,20,50
3,7,40,-50


#### Panels

In [26]:
rng = pd.date_range('1/1/2013',periods=100,freq='D')

In [27]:
data = np.random.randn(100,4)

In [28]:
cols = ['A','B','C','D']

In [30]:
#3개의 datacframe
df1, df2, df3 = pd.DataFrame(data,rng,cols), pd.DataFrame(data,rng,cols), pd.DataFrame(data,rng,cols)

In [33]:
#3개의 dataframe을 가지고 있는 패널
pf = pd.Panel({'df1':df1,'df2':df2,'df3':df3});pf

<class 'pandas.core.panel.Panel'>
Dimensions: 3 (items) x 100 (major_axis) x 4 (minor_axis)
Items axis: df1 to df3
Major_axis axis: 2013-01-01 00:00:00 to 2013-04-10 00:00:00
Minor_axis axis: A to D

In [34]:
#F에 추가
pf.loc[:,:,'F'] = pd.DataFrame(data, rng, cols);pf

<class 'pandas.core.panel.Panel'>
Dimensions: 3 (items) x 100 (major_axis) x 5 (minor_axis)
Items axis: df1 to df3
Major_axis axis: 2013-01-01 00:00:00 to 2013-04-10 00:00:00
Minor_axis axis: A to F

#### New Columns

In [35]:
df = pd.DataFrame( {'AAA' : [1,2,1,3], 'BBB' : [1,1,2,2], 'CCC' : [2,1,3,1]});df

Unnamed: 0,AAA,BBB,CCC
0,1,1,2
1,2,1,1
2,1,2,3
3,3,2,1


In [36]:
source_cols = df.columns

In [37]:
new_cols = [str(x) + "_cat" for x in source_cols]

In [39]:
categories = {1:'Alpha', 2:'Beta', 3:'Charlie'}

In [41]:
# 새로운 컬럼에 대해(new_cols)
# map을 적용하는데 applymap을 통해 값에 대한 매칭을 통해 변경
df[new_cols] = df[source_cols].applymap(categories.get);df

Unnamed: 0,AAA,BBB,CCC,AAA_cat,BBB_cat,CCC_cat
0,1,1,2,Alpha,Alpha,Beta
1,2,1,1,Beta,Alpha,Alpha
2,1,2,3,Alpha,Beta,Charlie
3,3,2,1,Charlie,Beta,Alpha


In [43]:
# dataframe 생성
df = pd.DataFrame({'AAA' : [1,1,1,2,2,2,3,3], 'BBB' : [2,1,3,4,5,1,2,3]}); df

Unnamed: 0,AAA,BBB
0,1,2
1,1,1
2,1,3
3,2,4
4,2,5
5,2,1
6,3,2
7,3,3


In [45]:
#AAA를 통한 groupby 결과에서
#BBB의 idx값이 가장 작은행들
df.loc[df.groupby('AAA')['BBB'].idxmin()]

Unnamed: 0,AAA,BBB
1,1,1
5,2,1
6,3,2


In [52]:
# BBB로 정렬하고 
# AAA로 groupby한 결과의 첫번째행들
#as_index=False를 선택안할시 GropuBy의 요소가 index가 됨
df.sort_values(by='BBB').groupby('AAA',as_index=False).first()

Unnamed: 0_level_0,BBB
AAA,Unnamed: 1_level_1
1,1
2,1
3,2
