# 주요 함수 

In [1]:
import pandas as pd
import numpy as np
import types

## None 데이터 처리

In [12]:
s = np.array([1,2,None,4])

In [13]:
s

array([1, 2, None, 4], dtype=object)

### None 타입 연산은 예외가 발생한다.


In [4]:
np.sum(s)

TypeError: unsupported operand type(s) for +: 'int' and 'NoneType'

In [5]:
np.nansum(s)

TypeError: unsupported operand type(s) for +: 'int' and 'NoneType'

###  nan은 float 자료형

In [17]:
isinstance(np.nan, float)

True

In [14]:
def none_nan(x) :
    if x == None :
        return np.nan
    return x
    
for i,v in enumerate(s) :
    print(i,v)
    s[i] = none_nan(v)
    
s

0 1
1 2
2 None
3 4


array([1, 2, nan, 4], dtype=object)

In [15]:
np.nansum(s)

7

## nan 값 처리 


In [18]:
dfna = pd.DataFrame(np.arange(9).reshape(3,3))
dfna[[0,0]] = np.nan
print(dfna)

    0  1  2
0 NaN  1  2
1 NaN  4  5
2 NaN  7  8


In [19]:

print(dfna.isnull())


      0      1      2
0  True  False  False
1  True  False  False
2  True  False  False


In [20]:
b = dfna.dropna(axis=1)
print(b)


   1  2
0  1  2
1  4  5
2  7  8


In [21]:
print(b.notnull())

      1     2
0  True  True
1  True  True
2  True  True


### nan 값을 넣기

In [25]:
dfna1 = pd.DataFrame(np.arange(9).reshape(3,3))

dfna1[0] = np.nan

In [23]:
help(dfna1.fillna)

Help on method fillna in module pandas.core.frame:

fillna(value=None, method=None, axis=None, inplace=False, limit=None, downcast=None, **kwargs) method of pandas.core.frame.DataFrame instance
    Fill NA/NaN values using the specified method
    
    Parameters
    ----------
    value : scalar, dict, Series, or DataFrame
        Value to use to fill holes (e.g. 0), alternately a
        dict/Series/DataFrame of values specifying which value to use for
        each index (for a Series) or column (for a DataFrame). (values not
        in the dict/Series/DataFrame will not be filled). This value cannot
        be a list.
    method : {'backfill', 'bfill', 'pad', 'ffill', None}, default None
        Method to use for filling holes in reindexed Series
        pad / ffill: propagate last valid observation forward to next valid
        backfill / bfill: use NEXT valid observation to fill gap
    axis : {0 or 'index', 1 or 'columns'}
    inplace : boolean, default False
        If True, fil

In [27]:
dfna1.fillna(999)


Unnamed: 0,0,1,2
0,999.0,1,2
1,999.0,4,5
2,999.0,7,8


In [28]:
dfna1

Unnamed: 0,0,1,2
0,,1,2
1,,4,5
2,,7,8


### 데이터프레임일 경우는 내부 갱신을 위해서는 inplace=True로 표시해야함

In [29]:
dfna1.fillna(888,inplace=True)

In [30]:
dfna1

Unnamed: 0,0,1,2
0,888.0,1,2
1,888.0,4,5
2,888.0,7,8


### 판다스에서는 숫자 데이터 타입일 경우는 None 처리

In [32]:
dfna2 = pd.DataFrame(np.arange(9).reshape(3,3))

dfna2[0] = None

In [40]:
dfna2[0]

0    None
1    None
2    None
Name: 0, dtype: object

In [48]:
help(dfna2.set_value)

Help on method set_value in module pandas.core.frame:

set_value(index, col, value, takeable=False) method of pandas.core.frame.DataFrame instance
    Put single value at passed column and index
    
    Parameters
    ----------
    index : row label
    col : column label
    value : scalar value
    takeable : interpret the index/col as indexers, default False
    
    Returns
    -------
    frame : DataFrame
        If label pair is contained, will be reference to calling DataFrame,
        otherwise a new object



### None을 np.nan으로 변경

In [51]:
dfna2.set_value(index=[0,1,2], col=0,value=np.nan)

Unnamed: 0,0,1,2
0,,1,2
1,,4,5
2,,7,8


In [52]:
print(dfna2)

     0  1  2
0  NaN  1  2
1  NaN  4  5
2  NaN  7  8
