## 예제 2-20 명시적 인덱서 검색

In [1]:
import pandas as pd

In [2]:
# 암묵적 인덱서보다 명시적 인덱서를 사용하는 것을 추천한다.
# 열이 항상 고정되어있지않기때문에
import seaborn as sns

In [3]:
diamonds = sns.load_dataset('diamonds')

In [4]:
diamonds.head()

Unnamed: 0,carat,cut,color,clarity,depth,table,price,x,y,z
0,0.23,Ideal,E,SI2,61.5,55.0,326,3.95,3.98,2.43
1,0.21,Premium,E,SI1,59.8,61.0,326,3.89,3.84,2.31
2,0.23,Good,E,VS1,56.9,65.0,327,4.05,4.07,2.31
3,0.29,Premium,I,VS2,62.4,58.0,334,4.2,4.23,2.63
4,0.31,Good,J,SI2,63.3,58.0,335,4.34,4.35,2.75


In [5]:
diamonds.shape

(53940, 10)

In [6]:
# null값의 총 합
diamonds.isnull().sum()

carat      0
cut        0
color      0
clarity    0
depth      0
table      0
price      0
x          0
y          0
z          0
dtype: int64

In [7]:
type(diamonds.loc)

pandas.core.indexing._LocIndexer

In [8]:
# loc[행 검색조건, 열 검색조건]

In [9]:
# 하나의 데이터출력
diamonds.loc[0]

carat       0.23
cut        Ideal
color          E
clarity      SI2
depth       61.5
table       55.0
price        326
x           3.95
y           3.98
z           2.43
Name: 0, dtype: object

In [10]:
# 0번째 행 carat => Series형태로 출력
diamonds.loc[0,'carat']

0.23

In [11]:
# 모든 행 carat
diamonds.loc[:,'carat']

0        0.23
1        0.21
2        0.23
3        0.29
4        0.31
         ... 
53935    0.72
53936    0.72
53937    0.70
53938    0.86
53939    0.75
Name: carat, Length: 53940, dtype: float64

In [12]:
diamonds['carat']

0        0.23
1        0.21
2        0.23
3        0.29
4        0.31
         ... 
53935    0.72
53936    0.72
53937    0.70
53938    0.86
53939    0.75
Name: carat, Length: 53940, dtype: float64

In [None]:
# 팬시검색을 통해 데이터프레임으로 반환

In [13]:
diamonds.loc[:,['carat']]

Unnamed: 0,carat
0,0.23
1,0.21
2,0.23
3,0.29
4,0.31
...,...
53935,0.72
53936,0.72
53937,0.70
53938,0.86


In [14]:
# 팬시검색은 열 검색 조건이다.
diamonds[['carat']]

Unnamed: 0,carat
0,0.23
1,0.21
2,0.23
3,0.29
4,0.31
...,...
53935,0.72
53936,0.72
53937,0.70
53938,0.86


In [15]:
diamonds.head()

Unnamed: 0,carat,cut,color,clarity,depth,table,price,x,y,z
0,0.23,Ideal,E,SI2,61.5,55.0,326,3.95,3.98,2.43
1,0.21,Premium,E,SI1,59.8,61.0,326,3.89,3.84,2.31
2,0.23,Good,E,VS1,56.9,65.0,327,4.05,4.07,2.31
3,0.29,Premium,I,VS2,62.4,58.0,334,4.2,4.23,2.63
4,0.31,Good,J,SI2,63.3,58.0,335,4.34,4.35,2.75


In [None]:
# xs함수를 통해 필터링가능
# 기본 인자는 행 필터링 조건

In [16]:
diamonds.xs(0)

carat       0.23
cut        Ideal
color          E
clarity      SI2
depth       61.5
table       55.0
price        326
x           3.95
y           3.98
z           2.43
Name: 0, dtype: object

In [17]:
diamonds.xs(1)

carat         0.21
cut        Premium
color            E
clarity        SI1
depth         59.8
table         61.0
price          326
x             3.89
y             3.84
z             2.31
Name: 1, dtype: object

In [None]:
# 열단위로 필터링하려면 첫번째 인자에 열 이름 두번째 인자에 1 (axis=1) 설정

In [18]:
diamonds.xs("carat",1).head()

0    0.23
1    0.21
2    0.23
3    0.29
4    0.31
Name: carat, dtype: float64

## 예제 2-21 암묵적 인덱서 검색

In [None]:
# 인덱스가 레이블로 있다하더라도 암묵적으로 인덱스번호로 접근
# iloc[행 필터링 조건, 열 필터링 조건]

In [19]:
diamonds.iloc[0]

carat       0.23
cut        Ideal
color          E
clarity      SI2
depth       61.5
table       55.0
price        326
x           3.95
y           3.98
z           2.43
Name: 0, dtype: object

In [20]:
# 0번째 행 'carat'의 0번째 열을 가져와라
diamonds.iloc[0,0]

0.23

In [22]:
%config Completer.use_jedi = False

In [24]:
diamonds.iloc[0,[0]]

carat    0.23
Name: 0, dtype: object

In [25]:
type(diamonds.iloc[0,[0]])

pandas.core.series.Series

In [26]:
diamonds.iloc[0,[0,1,2]]

carat     0.23
cut      Ideal
color        E
Name: 0, dtype: object

In [29]:
# 슬라이싱을 할 경우에 데이터프레임으로 반환한다.
# 행(0~4) 열(0~2)

In [30]:
diamonds.iloc[:5,:3]

Unnamed: 0,carat,cut,color
0,0.23,Ideal,E
1,0.21,Premium,E
2,0.23,Good,E
3,0.29,Premium,I
4,0.31,Good,J


## 예제 2-22 명시적인덱서 복합 검색

In [None]:
# 명시적 인덱서는 레이블을 가지고 처리하는데 슬라이스, 논리 검색과 조합하여 
#다양한 검색을 할 수 있다.

In [32]:
diamonds.columns

Index(['carat', 'cut', 'color', 'clarity', 'depth', 'table', 'price', 'x', 'y',
       'z'],
      dtype='object')

In [33]:
((diamonds['carat'] == 0.23) & (diamonds['cut'] == 'Very Good')).head()

0    False
1    False
2    False
3    False
4    False
dtype: bool

In [34]:
carat_cut = diamonds.loc[((diamonds['carat'] == 0.23) &
                         (diamonds['cut'] == 'Very Good')),
                        ['carat','cut','color','price']]

In [35]:
carat_cut.shape

(197, 4)

In [36]:
carat_cut.head()

Unnamed: 0,carat,cut,color,price
9,0.23,Very Good,H,338
21,0.23,Very Good,E,352
22,0.23,Very Good,H,353
25,0.23,Very Good,G,354
28,0.23,Very Good,D,357


In [37]:
diamonds.loc[:,['carat','cut','color','price']].head()

Unnamed: 0,carat,cut,color,price
0,0.23,Ideal,E,326
1,0.21,Premium,E,326
2,0.23,Good,E,327
3,0.29,Premium,I,334
4,0.31,Good,J,335


## 예제 2-23 문자열값 순서 검색

In [38]:
diamonds.head()

Unnamed: 0,carat,cut,color,clarity,depth,table,price,x,y,z
0,0.23,Ideal,E,SI2,61.5,55.0,326,3.95,3.98,2.43
1,0.21,Premium,E,SI1,59.8,61.0,326,3.89,3.84,2.31
2,0.23,Good,E,VS1,56.9,65.0,327,4.05,4.07,2.31
3,0.29,Premium,I,VS2,62.4,58.0,334,4.2,4.23,2.63
4,0.31,Good,J,SI2,63.3,58.0,335,4.34,4.35,2.75


In [39]:
# 'color' 라는 행의 인덱스 추가 ...
# 기존의 'color' 열은 없어진다
set_i = diamonds.set_index('color')

In [40]:
set_i.head()

Unnamed: 0_level_0,carat,cut,clarity,depth,table,price,x,y,z
color,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
E,0.23,Ideal,SI2,61.5,55.0,326,3.95,3.98,2.43
E,0.21,Premium,SI1,59.8,61.0,326,3.89,3.84,2.31
E,0.23,Good,VS1,56.9,65.0,327,4.05,4.07,2.31
I,0.29,Premium,VS2,62.4,58.0,334,4.2,4.23,2.63
J,0.31,Good,SI2,63.3,58.0,335,4.34,4.35,2.75


In [None]:
# 행의 인덱스가 정렬되어 있지않으므로 예외발생

In [41]:
try:
    set_i.loc['E':'F']
except ValueError as e:
    print('ValueError',e)
except KeyError as e:
    print('KeyError',e)

KeyError "Cannot get left slice bound for non-unique label: 'E'"


In [42]:
set_i

Unnamed: 0_level_0,carat,cut,clarity,depth,table,price,x,y,z
color,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
E,0.23,Ideal,SI2,61.5,55.0,326,3.95,3.98,2.43
E,0.21,Premium,SI1,59.8,61.0,326,3.89,3.84,2.31
E,0.23,Good,VS1,56.9,65.0,327,4.05,4.07,2.31
I,0.29,Premium,VS2,62.4,58.0,334,4.20,4.23,2.63
J,0.31,Good,SI2,63.3,58.0,335,4.34,4.35,2.75
...,...,...,...,...,...,...,...,...,...
D,0.72,Ideal,SI1,60.8,57.0,2757,5.75,5.76,3.50
D,0.72,Good,SI1,63.1,55.0,2757,5.69,5.75,3.61
D,0.70,Very Good,SI1,62.8,60.0,2757,5.66,5.68,3.56
H,0.86,Premium,SI2,61.0,58.0,2757,6.15,6.12,3.74


In [43]:
# 내부의 값을 변경하고자 할 때 inplace = True 인자를 세팅한다.
set_i.sort_index(inplace = True)

In [44]:
set_i

Unnamed: 0_level_0,carat,cut,clarity,depth,table,price,x,y,z
color,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
D,0.75,Ideal,SI2,62.2,55.0,2757,5.83,5.87,3.64
D,1.00,Very Good,SI2,63.5,59.0,4295,6.35,6.32,4.02
D,1.00,Good,SI2,57.8,58.0,4295,6.61,6.55,3.80
D,1.00,Very Good,SI2,61.5,63.0,4295,6.32,6.27,3.87
D,0.56,Ideal,IF,61.9,57.0,4293,5.28,5.31,3.28
...,...,...,...,...,...,...,...,...,...
J,1.00,Premium,SI1,60.8,58.0,3712,6.39,6.44,3.90
J,1.52,Ideal,SI1,61.9,57.0,7491,7.37,7.33,4.55
J,1.50,Very Good,VS2,62.6,58.0,7492,7.25,7.29,4.55
J,0.65,Ideal,SI1,61.4,55.0,1276,5.58,5.62,3.44


In [45]:
try :
    set_i_str = set_i.loc['E':'F']
except ValueError as e :
    print('ValueError', e)
except KeyError as e :
    print(' KeyError', e)

In [46]:
set_i_str.shape

(19339, 9)

In [47]:
set_i_str.head()

Unnamed: 0_level_0,carat,cut,clarity,depth,table,price,x,y,z
color,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
E,2.01,Ideal,SI2,62.2,57.0,17849,8.09,8.04,5.02
E,1.51,Very Good,VVS2,60.2,61.0,17689,7.34,7.4,4.44
E,0.5,Ideal,VVS2,61.5,57.0,2236,5.09,5.12,3.14
E,1.13,Ideal,VVS1,60.6,57.0,14525,6.72,6.77,4.09
E,1.51,Ideal,VS2,62.7,57.0,14482,7.31,7.26,4.57


In [49]:
set_i_str.tail()

Unnamed: 0_level_0,carat,cut,clarity,depth,table,price,x,y,z
color,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
F,1.01,Ideal,VS1,62.5,54.0,7602,6.46,6.43,4.03
F,1.01,Premium,VS1,62.5,58.0,7602,6.42,6.37,4.0
F,1.05,Ideal,VS2,59.1,57.0,7697,6.67,6.66,3.94
F,1.01,Good,VS2,57.3,62.0,5487,6.63,6.59,3.79
F,1.52,Ideal,SI1,62.0,54.0,12071,7.36,7.44,4.59
