
## 검색하는 방법

    판다스는 다양한 방법으로 검색이 가능하므로 실제 검색에 필요한 방법을 명확히 이해해야 한다.
    

    Operation	                     Syntax    	      Result
    Select column	                df[col]	          Series
    Select row by label location    df.loc[label]	  Series
    Select row by integer location	df.iloc[loc]	  Series
    Slice rows	                    df[5:10]	      DataFrame
    Select rows by boolean vector	df[bool_vec]	  DataFrame
    
    

In [3]:
import pandas as pd
import numpy as np

### 딕셔너리로 데이터 프레임 생성하기

    첫번째 키는 칼럼명으로 들어간다.
    값에 들어가있는 딕셔너리에서 키는 인덱스가 된다.

In [4]:
d = {'one' : {'a':1,'b':2,'c':3,'d':4},
     'two' : {'a':10,'b':20,'c':30,'d':40, 'e':50}}


df = pd.DataFrame(d)

df

Unnamed: 0,one,two
a,1.0,10
b,2.0,20
c,3.0,30
d,4.0,40
e,,50


##  시리즈로 데이터 프레임 생성

In [57]:
s1 = pd.Series([1,2,3,4], index=list('abcd'),name='one')
s2 = pd.Series([10,20,30,40,50], index=list('abcde'),name='two')

df_s1 = pd.DataFrame(s1)


In [58]:
df_s1

Unnamed: 0,one
a,1
b,2
c,3
d,4


### 인덱싱을 이용해서 인덱스 추가하기

In [59]:
df_s1['two'] = s2

In [60]:
df_s1

Unnamed: 0,one,two
a,1,10
b,2,20
c,3,30
d,4,40


### 여러 개의 시리즈를 리스트에 넣어서 데이터 프레임을 생성하면 행과 열이 바뀌어져 나온다.

In [65]:


df_s = pd.DataFrame(data =[s1,s2])


In [52]:
df_s

Unnamed: 0,a,b,c,d,e
one,1.0,2.0,3.0,4.0,
two,10.0,20.0,30.0,40.0,50.0


In [53]:
df_s.columns

Index(['a', 'b', 'c', 'd', 'e'], dtype='object')

In [54]:
df_s.index

Index(['one', 'two'], dtype='object')

### 여러 개의 시리즈로 데이터프레임을 생성할 때는 딕셔너리로 처리해야 함

In [66]:
df_sd = pd.DataFrame(data = { s1.name :s1, s2.name:s2})

In [67]:
df_sd

Unnamed: 0,one,two
a,1.0,10
b,2.0,20
c,3.0,30
d,4.0,40
e,,50


## 인덱싱은  열단위 검색은 시리즈 처리된다.


In [4]:
df['one']

a    1.0
b    2.0
c    3.0
d    4.0
e    NaN
Name: one, dtype: float64

## 슬라이싱은 행단위 처리

### index를 가지고 행단위 처리

In [5]:
df['a':]

Unnamed: 0,one,two
a,1.0,10
b,2.0,20
c,3.0,30
d,4.0,40
e,,50


### 암묵적인 인덱스를 가지고 행단위 처리

In [6]:
df[:1]

Unnamed: 0,one,two
a,1.0,10


## 팬시 검색은 열단위 검색

   하나만 검색할 때도 팬식검색은 데이터프레임을 유지한다.

In [5]:
df[['one']]

Unnamed: 0,one
a,1.0
b,2.0
c,3.0
d,4.0
e,


In [12]:
df[['one','two']]

Unnamed: 0,one,two
a,1.0,10
b,2.0,20
c,3.0,30
d,4.0,40
e,,50


## 데이터 프레임의 속성을 가지고 인덱싱 하기

###  loc 속성으로 행단위 검색

In [6]:
df.loc['a']

one     1.0
two    10.0
Name: a, dtype: float64

### iloc 속성으로 암묵적 인덱스로  행단위 속성 검색

In [9]:
df.iloc[0]

one     1.0
two    10.0
Name: a, dtype: float64

## 논리 연산자를 이용한 마스킹 검색 

In [15]:
a = [True,True,False,False,False]
df[a]

Unnamed: 0,one,two
a,1.0,10
b,2.0,20


### 산술연산

    산술연산시 행과 열이 안 맞을 경우 nan으로 원소를 처리한다.
    
    nan과 산술연산 값은 항상 nan이다.
    
    

In [24]:
df41 = pd.DataFrame(np.random.randn(10, 4), columns=['A', 'B', 'C', 'D'])

df42 = pd.DataFrame(np.random.randn(7, 3), columns=['A', 'B', 'C'])


In [22]:
df41

Unnamed: 0,A,B,C,D
0,-1.507135,0.466916,-0.315949,-0.5736
1,0.56341,-1.177166,-1.752189,-0.690239
2,-0.490187,-0.209071,-0.550648,0.397061
3,-0.002017,0.824661,-0.286008,-1.684201
4,-0.457883,-0.619099,-1.138082,0.526128
5,0.770187,0.641377,-1.195515,1.490851
6,-0.045369,1.192389,-1.475684,-0.181737
7,0.040508,0.160315,1.376298,-1.55172
8,-1.118513,-0.10007,1.152629,-0.554373
9,-1.306904,-0.152442,-0.125609,0.025845


In [23]:
df42

Unnamed: 0,A,B,C
0,0.826454,0.407493,-1.907073
1,0.34408,-0.334105,0.091379
2,-0.097356,-0.10907,1.322995
3,-1.74395,0.340916,1.112972
4,-0.411908,-0.556011,-1.19181
5,-0.067637,1.040361,0.935761
6,-0.009117,-0.700442,-0.448997


In [21]:
df41+df42

Unnamed: 0,A,B,C,D
0,-0.680681,0.874409,-2.223022,
1,0.90749,-1.511271,-1.660811,
2,-0.587544,-0.31814,0.772346,
3,-1.745967,1.165577,0.826964,
4,-0.869791,-1.17511,-2.329892,
5,0.702549,1.681738,-0.259754,
6,-0.054486,0.491947,-1.924681,
7,,,,
8,,,,
9,,,,


### 칼럼이 동일한 산술연산

    브로드캐스팅이 발생해서 처리한다.
    

   

In [26]:
df41

Unnamed: 0,A,B,C,D
0,-0.118481,-1.866403,0.653264,-0.879601
1,-0.744037,-0.397195,0.15662,-0.246577
2,0.159854,1.350118,1.188513,-0.290258
3,1.078684,0.927654,1.108263,0.100919
4,0.952951,-1.448608,-0.042317,0.854896
5,-0.653134,1.135487,0.020741,0.134808
6,1.171664,-0.392296,0.593198,-1.755945
7,0.692198,-1.766466,1.252581,0.666322
8,-2.119302,-2.4351,-0.068338,-0.863518
9,0.161086,-0.291292,0.01115,-0.641749


In [27]:
df41.iloc[0]

A   -0.118481
B   -1.866403
C    0.653264
D   -0.879601
Name: 0, dtype: float64

In [25]:
df41 - df41.iloc[0]

Unnamed: 0,A,B,C,D
0,0.0,0.0,0.0,0.0
1,-0.625556,1.469208,-0.496643,0.633024
2,0.278335,3.216521,0.535249,0.589343
3,1.197165,2.794057,0.454999,0.98052
4,1.071432,0.417795,-0.695581,1.734497
5,-0.534653,3.001891,-0.632523,1.014409
6,1.290145,1.474107,-0.060066,-0.876344
7,0.810679,0.099937,0.599317,1.545923
8,-2.000821,-0.568697,-0.721602,0.016083
9,0.279567,1.575111,-0.642114,0.237852


In [41]:
df411 = pd.DataFrame(np.zeros((10, 4)), columns=['A', 'B', 'C', 'D'])

for i in range(10) :
    df411.iloc[i] =  df41.iloc[0]
print(df411)

          A         B         C         D
0 -0.118481 -1.866403  0.653264 -0.879601
1 -0.118481 -1.866403  0.653264 -0.879601
2 -0.118481 -1.866403  0.653264 -0.879601
3 -0.118481 -1.866403  0.653264 -0.879601
4 -0.118481 -1.866403  0.653264 -0.879601
5 -0.118481 -1.866403  0.653264 -0.879601
6 -0.118481 -1.866403  0.653264 -0.879601
7 -0.118481 -1.866403  0.653264 -0.879601
8 -0.118481 -1.866403  0.653264 -0.879601
9 -0.118481 -1.866403  0.653264 -0.879601


In [42]:
print(df41 - df411)

          A         B         C         D
0  0.000000  0.000000  0.000000  0.000000
1 -0.625556  1.469208 -0.496643  0.633024
2  0.278335  3.216521  0.535249  0.589343
3  1.197165  2.794057  0.454999  0.980520
4  1.071432  0.417795 -0.695581  1.734497
5 -0.534653  3.001891 -0.632523  1.014409
6  1.290145  1.474107 -0.060066 -0.876344
7  0.810679  0.099937  0.599317  1.545923
8 -2.000821 -0.568697 -0.721602  0.016083
9  0.279567  1.575111 -0.642114  0.237852


In [45]:
import collections.abc as cols

issubclass(range,cols.Iterable)

True

In [46]:
x = 10
try :
    x = x+1
except Exception as e :
    print(e)
else :
    print("no exception ")
finally :
    print(x)

no exception 
11
