# 조건식과 메소드를 이용해서 dataframe에 대한 내부 검색 및 갱신하기



In [32]:
import pandas as pd
import numpy as np

In [12]:
df = pd.DataFrame({'AAA' : [4,5,6,7], 
                   'BBB' : [10,20,30,40],
                   'CCC' : [100,50,-30,-50]})

df

Unnamed: 0,AAA,BBB,CCC
0,4,10,100
1,5,20,50
2,6,30,-30
3,7,40,-50


### if 제어 처리

    loc 속성 내에서 조건식을 사용해서 처리한다.
    

###  행으로 접근 하기

In [13]:
df.loc[df.AAA >= 5]

Unnamed: 0,AAA,BBB,CCC
1,5,20,50
2,6,30,-30
3,7,40,-50


## 행으로 접근한 것을 열을 제한하기 


### 한 열만 선택하기 

In [16]:
df.loc[df.AAA >= 5,'BBB'] 

1    20
2    30
3    40
Name: BBB, dtype: int64

### 두열 선택하기


In [17]:
df.loc[df.AAA >= 5,['AAA','BBB']] 

Unnamed: 0,AAA,BBB
1,5,20
2,6,30
3,7,40


### 선택된 값을 스칼라로 갱신하기

In [9]:
df.loc[df.AAA >= 5,'BBB'] = -1

df

Unnamed: 0,AAA,BBB,CCC
0,4,10,100
1,5,-1,50
2,6,-1,-30
3,7,-1,-50


In [10]:
df.loc[df.AAA >= 5,['BBB','CCC']] = 555

df

Unnamed: 0,AAA,BBB,CCC
0,4,10,100
1,5,555,555
2,6,555,555
3,7,555,555


In [18]:
df.loc[df.AAA < 5,['BBB','CCC']] = 2000

df

Unnamed: 0,AAA,BBB,CCC
0,4,2000,2000
1,5,20,50
2,6,30,-30
3,7,40,-50


### index 검색 시 조건식을 사용해서 처리하기

    동일한 열 내의 값을 기준으로 그 내부의 행에 속한 원소들을 평가해서 참일 경우에만 출력한다.
    별도의 객체로 리턴하는 것을 알 수 있다. 
    

In [35]:
df = pd.DataFrame({'AAA' : [4,5,6,7], 
                   'BBB' : [10,20,30,40],
                   'CCC' : [100,50,-30,-50]})

df

Unnamed: 0,AAA,BBB,CCC
0,4,10,100
1,5,20,50
2,6,30,-30
3,7,40,-50


In [36]:
dflow = df[df.AAA <= 5]
dflow

Unnamed: 0,AAA,BBB,CCC
0,4,10,100
1,5,20,50


In [38]:
dflow = df[df.AAA > 5]
dflow

Unnamed: 0,AAA,BBB,CCC
2,6,30,-30
3,7,40,-50


### 메소드로 처리 하기 : where 


In [21]:
help(pd.DataFrame.where)

Help on function where in module pandas.core.generic:

where(self, cond, other=nan, inplace=False, axis=None, level=None, try_cast=False, raise_on_error=True)
    Return an object of same shape as self and whose corresponding
    entries are from self where cond is True and otherwise are from
    other.
    
    Parameters
    ----------
    cond : boolean NDFrame, array or callable
        If cond is callable, it is computed on the NDFrame and
        should return boolean NDFrame or array.
        The callable must not change input NDFrame
        (though pandas doesn't check it).
    
        .. versionadded:: 0.18.1
    
        A callable can be used as cond.
    
    other : scalar, NDFrame, or callable
        If other is callable, it is computed on the NDFrame and
        should return scalar or NDFrame.
        The callable must not change input NDFrame
        (though pandas doesn't check it).
    
        .. versionadded:: 0.18.1
    
        A callable can be used as other.

In [25]:
df = pd.DataFrame({'AAA' : [4,5,6,7], 
                   'BBB' : [10,20,30,40],
                   'CCC' : [100,50,-30,-50]})

df

Unnamed: 0,AAA,BBB,CCC
0,4,10,100
1,5,20,50
2,6,30,-30
3,7,40,-50


#### 조건이 False 일 경우에 값에 대한 변경을 처리 


In [26]:
df_mask = pd.DataFrame({'AAA' : [True] * 4, 'BBB' : [False] * 4,'CCC' : [True,False] * 2})

df.where(df_mask,-1000, inplace=True)

df

Unnamed: 0,AAA,BBB,CCC
0,4,-1000,100
1,5,-1000,-1000
2,6,-1000,-30
3,7,-1000,-1000


###  if then else 일 경우는 np.where를 사용해서 처리한다.



In [34]:
df_mask = pd.DataFrame({'AAA' : [True] * 4, 'BBB' : [False] * 4,'CCC' : [True,False] * 2})

dfx = np.where(df_mask,9999,-7777)

dfx

array([[ 9999, -7777,  9999],
       [ 9999, -7777, -7777],
       [ 9999, -7777,  9999],
       [ 9999, -7777, -7777]])