In [1]:
import pandas as pd
import numpy as np

# 주요 메소드 처리



In [2]:
df11 = pd.DataFrame(np.arange(16).reshape(4,4), index=['a','b','c','d'],columns=['f','g','h','i'])
print(df11)


    f   g   h   i
a   0   1   2   3
b   4   5   6   7
c   8   9  10  11
d  12  13  14  15


### 삽입하기  

    dataframe 일 경우에 칼럼에 대한 데이터를 넣기 위해  삽입이 존재한다.
    

In [4]:
help(df11.insert)

Help on method insert in module pandas.core.frame:

insert(loc, column, value, allow_duplicates=False) method of pandas.core.frame.DataFrame instance
    Insert column into DataFrame at specified location.
    
    If `allow_duplicates` is False, raises Exception if column
    is already contained in the DataFrame.
    
    Parameters
    ----------
    loc : int
        Must have 0 <= loc <= len(columns)
    column : object
    value : scalar, Series, or array-like



In [3]:

df11.insert(4,'j',[99,99,99,99])
print(df11)

    f   g   h   i   j
a   0   1   2   3  99
b   4   5   6   7  99
c   8   9  10  11  99
d  12  13  14  15  99


### 인덱스의 갯수가 부족할 때 오류가 발생함

In [5]:
df11.insert(5,'k',[199,199,199])
print(df11)

ValueError: Length of values does not match length of index

In [6]:
df11.insert(5,'k',[199,199,199,199,199])
print(df11)

ValueError: Length of values does not match length of index

### 인덱스에 대한 명칭가 실제 원소가 같을 경우에 처리가 됨

In [10]:
ser = pd.Series([99,99,99,99],index=['a','b','c','d'])
df11.insert(5,'k',ser)
print(df11)

    f   g   h   i   j   k
a   0   1   2   3  99  99
b   4   5   6   7  99  99
c   8   9  10  11  99  99
d  12  13  14  15  99  99


### index를 주었을 때 이름이 상이한 경우가 있을 경우는  처리되지 않고 NaN으로 처리된다.



In [11]:
df11

Unnamed: 0,f,g,h,i,j,k
a,0,1,2,3,99,99
b,4,5,6,7,99,99
c,8,9,10,11,99,99
d,12,13,14,15,99,99


In [12]:
ser = pd.Series([99,99,99,99],index=['a','b','c','f'])
df11.insert(6,'l',ser)
print(df11)

    f   g   h   i   j   k     l
a   0   1   2   3  99  99  99.0
b   4   5   6   7  99  99  99.0
c   8   9  10  11  99  99  99.0
d  12  13  14  15  99  99   NaN


### 삭제 처리 

In [13]:
ser1 = pd.Series([991,992,993,994],index=['a','b','c','f'])

print(ser1.pop())

TypeError: pop() missing 1 required positional argument: 'item'

In [18]:
help(pd.DataFrame.pop)

Help on function pop in module pandas.core.generic:

pop(self, item)
    Return item and drop from frame. Raise KeyError if not found.



#### series 는 원소 하나를 삭제

In [58]:
ser1 = pd.Series([991,992,993,994],index=['a','b','c','f'])

print(ser1.pop('a'))
print(ser1)

991
b    992
c    993
f    994
dtype: int64


#### dataframe 은 칼럼을 삭제한다.


In [16]:
df211 = pd.DataFrame(np.arange(16).reshape(4,4), index=['a','b','c','d'],columns=['f','g','h','i'])
print(df211)


    f   g   h   i
a   0   1   2   3
b   4   5   6   7
c   8   9  10  11
d  12  13  14  15


In [17]:
print(df211.pop('f'))
print(df211)

a     0
b     4
c     8
d    12
Name: f, dtype: int64
    g   h   i
a   1   2   3
b   5   6   7
c   9  10  11
d  13  14  15


#### dataframe은 index로 삭제가 불가하다.


In [19]:
print(df211.pop('a'))
print(df211)

KeyError: 'a'

### 원소 값에 대한 변경 


In [66]:
help(pd.DataFrame.replace)

Help on function replace in module pandas.core.generic:

replace(self, to_replace=None, value=None, inplace=False, limit=None, regex=False, method='pad', axis=None)
    Replace values given in 'to_replace' with 'value'.
    
    Parameters
    ----------
    to_replace : str, regex, list, dict, Series, numeric, or None
    
        * str or regex:
    
            - str: string exactly matching `to_replace` will be replaced
              with `value`
            - regex: regexs matching `to_replace` will be replaced with
              `value`
    
        * list of str, regex, or numeric:
    
            - First, if `to_replace` and `value` are both lists, they
              **must** be the same length.
            - Second, if ``regex=True`` then all of the strings in **both**
              lists will be interpreted as regexs otherwise they will match
              directly. This doesn't matter much for `value` since there
              are only a few possible substitution regexes yo

#### to_replace에 넣은 값에 대해서만 value에서 처리

In [20]:
df311 = pd.DataFrame(np.arange(16).reshape(4,4), index=['a','b','c','d'],columns=['f','g','h','i'])
print(df311)


    f   g   h   i
a   0   1   2   3
b   4   5   6   7
c   8   9  10  11
d  12  13  14  15


In [25]:
print(df311.replace(to_replace=(15,), value= 999,inplace=True))
print(df311)

None
     f    g   h    i
a  999  999   2    3
b    4    5   6    7
c    8    9  10   11
d   12   13  14  999


#### 딕셔너리일 때 변경값을 넣으면 전체를 검색해서 변경한다.

In [22]:
df = pd.DataFrame({'a': ['1\n', '2\n', '3'], 'b': ['4\n', '5', '6\n']})


In [24]:
df

Unnamed: 0,a,b
0,1\n,4\n
1,2\n,5
2,3,6\n


In [23]:
df.replace({'\n': '<br>'}, regex=True)

Unnamed: 0,a,b
0,1<br>,4<br>
1,2<br>,5
2,3,6<br>


In [26]:
df

Unnamed: 0,a,b
0,1\n,4\n
1,2\n,5
2,3,6\n


### 내부 값을 변경한 것을 보관하려면 inplace= True를 추가해야 한다

In [27]:
df.replace({'\n': '<br>'}, regex=True, inplace=True)

In [28]:
df

Unnamed: 0,a,b
0,1<br>,4<br>
1,2<br>,5
2,3,6<br>
