# 명시적 인덱서 검색

In [1]:
import pandas as pd
import seaborn as sns

diamonds = sns.load_dataset('diamonds')
diamonds.head()

Unnamed: 0,carat,cut,color,clarity,depth,table,price,x,y,z
0,0.23,Ideal,E,SI2,61.5,55.0,326,3.95,3.98,2.43
1,0.21,Premium,E,SI1,59.8,61.0,326,3.89,3.84,2.31
2,0.23,Good,E,VS1,56.9,65.0,327,4.05,4.07,2.31
3,0.29,Premium,I,VS2,62.4,58.0,334,4.2,4.23,2.63
4,0.31,Good,J,SI2,63.3,58.0,335,4.34,4.35,2.75


# loc

* 데이터프레임객체.loc[ 행검색조건 , 열검색조건 ]

첫번째 행 데이터

In [3]:
diamonds.loc[0]

carat       0.23
cut        Ideal
color          E
clarity      SI2
depth       61.5
table       55.0
price        326
x           3.95
y           3.98
z           2.43
Name: 0, dtype: object

In [4]:
diamonds.loc[2]

carat      0.23
cut        Good
color         E
clarity     VS1
depth      56.9
table      65.0
price       327
x          4.05
y          4.07
z          2.31
Name: 2, dtype: object

세번째행의 price열 데이터 조회

In [5]:
diamonds.loc[2,'price']


np.int64(327)

In [8]:
diamonds.loc[3,'clarity']

'VS2'

* 슬라이싱 적용  
[주의] loc에서는 마지막 인덱스를 포함

In [9]:
diamonds.head(26)

Unnamed: 0,carat,cut,color,clarity,depth,table,price,x,y,z
0,0.23,Ideal,E,SI2,61.5,55.0,326,3.95,3.98,2.43
1,0.21,Premium,E,SI1,59.8,61.0,326,3.89,3.84,2.31
2,0.23,Good,E,VS1,56.9,65.0,327,4.05,4.07,2.31
3,0.29,Premium,I,VS2,62.4,58.0,334,4.2,4.23,2.63
4,0.31,Good,J,SI2,63.3,58.0,335,4.34,4.35,2.75
5,0.24,Very Good,J,VVS2,62.8,57.0,336,3.94,3.96,2.48
6,0.24,Very Good,I,VVS1,62.3,57.0,336,3.95,3.98,2.47
7,0.26,Very Good,H,SI1,61.9,55.0,337,4.07,4.11,2.53
8,0.22,Fair,E,VS2,65.1,61.0,337,3.87,3.78,2.49
9,0.23,Very Good,H,VS1,59.4,61.0,338,4.0,4.05,2.39


10행에서 25행 사이의 모든 'carat' 데이터 조회

In [16]:
diamonds.loc[10:25,'carat']

10    0.30
11    0.23
12    0.22
13    0.31
14    0.20
15    0.32
16    0.30
17    0.30
18    0.30
19    0.30
20    0.30
21    0.23
22    0.23
23    0.31
24    0.31
25    0.23
Name: carat, dtype: float64

- 단일열을 데이터프레임형태로 반환

In [18]:
diamonds['carat'].head(3)

0    0.23
1    0.21
2    0.23
Name: carat, dtype: float64

* loc에서 펜시검색 적용

In [15]:
diamonds['carat'][10:26]

10    0.30
11    0.23
12    0.22
13    0.31
14    0.20
15    0.32
16    0.30
17    0.30
18    0.30
19    0.30
20    0.30
21    0.23
22    0.23
23    0.31
24    0.31
25    0.23
Name: carat, dtype: float64

# 암묵적(Implicity) 인덱서

* 인덱스 또는 columns이 레이블(문자)로 존재하더라고 암묵적으로 인덱스 접근이 가능
* iloc[행필터링조건, 열필터링조건]
* 암묵적 인덱스는 행 또는 열에 시작이 0부터 시작된다.

* 마이너스 인덱스 활용

In [20]:
diamonds.shape

(53940, 10)

수집한 데이터의 중위수번째(26970)의 데이터를 샘플링하여 조회하고 싶은경우

In [22]:
diamonds.iloc[26970]

carat        1.6
cut        Ideal
color          G
clarity      VS1
depth       62.0
table       54.0
price      17050
x           7.53
y            7.5
z            4.6
Name: 26970, dtype: object

In [24]:
diamonds.columns

Index(['carat', 'cut', 'color', 'clarity', 'depth', 'table', 'price', 'x', 'y',
       'z'],
      dtype='object')

In [28]:
diamonds.iloc[:,2] #행의 조건은 생략할수 없다

0        E
1        E
2        E
3        I
4        J
        ..
53935    D
53936    D
53937    D
53938    H
53939    D
Name: color, Length: 53940, dtype: category
Categories (7, object): ['D', 'E', 'F', 'G', 'H', 'I', 'J']

In [29]:
diamonds.iloc[:,[2]].head()

Unnamed: 0,color
0,E
1,E
2,E
3,I
4,J


* 마이너스 인덱스 접근  
  마지막 열 조회

In [31]:
diamonds.iloc[:,[-1]]

Unnamed: 0,z
0,2.43
1,2.31
2,2.31
3,2.63
4,2.75
...,...
53935,3.50
53936,3.61
53937,3.56
53938,3.74


* 마이너스 인덱스를 활용한 행,열 슬라이싱  
마지막 3개열 중위수부터 10개

In [33]:
diamonds.iloc[26970:26980,-3:]

Unnamed: 0,x,y,z
0,3.95,3.98,2.43
1,3.89,3.84,2.31
2,4.05,4.07,2.31
3,4.20,4.23,2.63
4,4.34,4.35,2.75
...,...,...,...
53935,5.75,5.76,3.50
53936,5.69,5.75,3.61
53937,5.66,5.68,3.56
53938,6.15,6.12,3.74


* 열이름을 통한 슬라이싱  

In [34]:
#주의사항] 열이름으로 슬라이싱을 하려는 경우는 열이름이 정렬이 되어 있어야 한다.
diamonds.loc[:,'x':'z'].head()

Unnamed: 0,x,y,z
0,3.95,3.98,2.43
1,3.89,3.84,2.31
2,4.05,4.07,2.31
3,4.2,4.23,2.63
4,4.34,4.35,2.75


# 인덱스 변경

In [36]:
diamonds.head(60)

Unnamed: 0,carat,cut,color,clarity,depth,table,price,x,y,z
0,0.23,Ideal,E,SI2,61.5,55.0,326,3.95,3.98,2.43
1,0.21,Premium,E,SI1,59.8,61.0,326,3.89,3.84,2.31
2,0.23,Good,E,VS1,56.9,65.0,327,4.05,4.07,2.31
3,0.29,Premium,I,VS2,62.4,58.0,334,4.2,4.23,2.63
4,0.31,Good,J,SI2,63.3,58.0,335,4.34,4.35,2.75
5,0.24,Very Good,J,VVS2,62.8,57.0,336,3.94,3.96,2.48
6,0.24,Very Good,I,VVS1,62.3,57.0,336,3.95,3.98,2.47
7,0.26,Very Good,H,SI1,61.9,55.0,337,4.07,4.11,2.53
8,0.22,Fair,E,VS2,65.1,61.0,337,3.87,3.78,2.49
9,0.23,Very Good,H,VS1,59.4,61.0,338,4.0,4.05,2.39


In [40]:
set_i =diamonds.set_index('color').head(20)
set_i

Unnamed: 0_level_0,carat,cut,clarity,depth,table,price,x,y,z
color,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
E,0.23,Ideal,SI2,61.5,55.0,326,3.95,3.98,2.43
E,0.21,Premium,SI1,59.8,61.0,326,3.89,3.84,2.31
E,0.23,Good,VS1,56.9,65.0,327,4.05,4.07,2.31
I,0.29,Premium,VS2,62.4,58.0,334,4.2,4.23,2.63
J,0.31,Good,SI2,63.3,58.0,335,4.34,4.35,2.75
J,0.24,Very Good,VVS2,62.8,57.0,336,3.94,3.96,2.48
I,0.24,Very Good,VVS1,62.3,57.0,336,3.95,3.98,2.47
H,0.26,Very Good,SI1,61.9,55.0,337,4.07,4.11,2.53
E,0.22,Fair,VS2,65.1,61.0,337,3.87,3.78,2.49
H,0.23,Very Good,VS1,59.4,61.0,338,4.0,4.05,2.39


In [41]:
set_i.sort_index().head(20) #인덱스를 정렬한 결과를 데이터 프레임으로 변환, 자기자신은 변경X

Unnamed: 0_level_0,carat,cut,clarity,depth,table,price,x,y,z
color,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
E,0.23,Ideal,SI2,61.5,55.0,326,3.95,3.98,2.43
E,0.21,Premium,SI1,59.8,61.0,326,3.89,3.84,2.31
E,0.23,Good,VS1,56.9,65.0,327,4.05,4.07,2.31
E,0.32,Premium,I1,60.9,58.0,345,4.38,4.42,2.68
E,0.2,Premium,SI2,60.2,62.0,345,3.79,3.75,2.27
E,0.22,Fair,VS2,65.1,61.0,337,3.87,3.78,2.49
F,0.22,Premium,SI1,60.4,61.0,342,3.88,3.84,2.33
H,0.23,Very Good,VS1,59.4,61.0,338,4.0,4.05,2.39
H,0.26,Very Good,SI1,61.9,55.0,337,4.07,4.11,2.53
I,0.24,Very Good,VVS1,62.3,57.0,336,3.95,3.98,2.47


* 인덱스 정렬

In [43]:
sorted_set_i = set_i.sort_index().head(20)

* 인덱스 정렬 후 인덱스 기반 행데이터 필터링

E-F등급 다이아몬드 데이터 추출

In [44]:
sorted_set_i.loc['E':'F'].head(20)

Unnamed: 0_level_0,carat,cut,clarity,depth,table,price,x,y,z
color,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
E,0.23,Ideal,SI2,61.5,55.0,326,3.95,3.98,2.43
E,0.21,Premium,SI1,59.8,61.0,326,3.89,3.84,2.31
E,0.23,Good,VS1,56.9,65.0,327,4.05,4.07,2.31
E,0.32,Premium,I1,60.9,58.0,345,4.38,4.42,2.68
E,0.2,Premium,SI2,60.2,62.0,345,3.79,3.75,2.27
E,0.22,Fair,VS2,65.1,61.0,337,3.87,3.78,2.49
F,0.22,Premium,SI1,60.4,61.0,342,3.88,3.84,2.33


In [None]:
정렬되지 않은 경우 => error

In [45]:
#set_i.loc['E':'F'].head(20) 

KeyError: "Cannot get left slice bound for non-unique label: 'E'"

* 열의 순서 바꾸기

In [54]:
diamonds.columns

Index(['carat', 'cut', 'color', 'clarity', 'depth', 'table', 'price', 'x', 'y',
       'z'],
      dtype='object')

기존 열에서 price열을 제일 앞에 정의하고 싶을 경우

In [55]:
diamonds.columns = ['price', 'carat', 'cut', 'color', 'clarity', 'depth', 'table', 'x', 'y', 'z']
diamonds

Unnamed: 0,price,carat,cut,color,clarity,depth,table,x,y,z
0,0.23,Ideal,E,SI2,61.5,55.0,326,3.95,3.98,2.43
1,0.21,Premium,E,SI1,59.8,61.0,326,3.89,3.84,2.31
2,0.23,Good,E,VS1,56.9,65.0,327,4.05,4.07,2.31
3,0.29,Premium,I,VS2,62.4,58.0,334,4.20,4.23,2.63
4,0.31,Good,J,SI2,63.3,58.0,335,4.34,4.35,2.75
...,...,...,...,...,...,...,...,...,...,...
53935,0.72,Ideal,D,SI1,60.8,57.0,2757,5.75,5.76,3.50
53936,0.72,Good,D,SI1,63.1,55.0,2757,5.69,5.75,3.61
53937,0.70,Very Good,D,SI1,62.8,60.0,2757,5.66,5.68,3.56
53938,0.86,Premium,H,SI2,61.0,58.0,2757,6.15,6.12,3.74


In [59]:
diamonds.columns =['carat', 'cut', 'color', 'clarity', 'depth', 'table', 'price', 'x', 'y', 'z']

In [62]:
diamonds_priority = diamonds[['price', 'carat', 'cut', 'color', 'clarity', 'depth', 'table', 'x', 'y','z']]
diamonds_priority

Unnamed: 0,price,carat,cut,color,clarity,depth,table,x,y,z
0,326,0.23,Ideal,E,SI2,61.5,55.0,3.95,3.98,2.43
1,326,0.21,Premium,E,SI1,59.8,61.0,3.89,3.84,2.31
2,327,0.23,Good,E,VS1,56.9,65.0,4.05,4.07,2.31
3,334,0.29,Premium,I,VS2,62.4,58.0,4.20,4.23,2.63
4,335,0.31,Good,J,SI2,63.3,58.0,4.34,4.35,2.75
...,...,...,...,...,...,...,...,...,...,...
53935,2757,0.72,Ideal,D,SI1,60.8,57.0,5.75,5.76,3.50
53936,2757,0.72,Good,D,SI1,63.1,55.0,5.69,5.75,3.61
53937,2757,0.70,Very Good,D,SI1,62.8,60.0,5.66,5.68,3.56
53938,2757,0.86,Premium,H,SI2,61.0,58.0,6.15,6.12,3.74


In [63]:
diamonds_priority.loc[:,'price':'cut']

Unnamed: 0,price,carat,cut
0,326,0.23,Ideal
1,326,0.21,Premium
2,327,0.23,Good
3,334,0.29,Premium
4,335,0.31,Good
...,...,...,...
53935,2757,0.72,Ideal
53936,2757,0.72,Good
53937,2757,0.70,Very Good
53938,2757,0.86,Premium


In [None]:
ms_prior

* 열 이름으로 필터링 할 경우에는 열 이름까지 정렬할 필요는 없다.  
  왜냐하면 열이름은 중복이 허용되지 않기 때문이다.