## 예제 1-1  연산자와 메소드

In [1]:
import pandas as pd

In [2]:
pd.__version__

'0.23.0'

In [3]:
ser = pd.Series([1,2,3])

In [4]:
ser

0    1
1    2
2    3
dtype: int64

In [5]:
ser + ser

0    2
1    4
2    6
dtype: int64

In [6]:
ser.add(ser)

0    2
1    4
2    6
dtype: int64

## 예제 1-2  검색과 슬라이싱 편리성

In [62]:
df = pd.DataFrame(ser)

In [63]:
df

Unnamed: 0,0
0,1
1,2
2,3


In [64]:
df[0]

0    1
1    2
2    3
Name: 0, dtype: int64

In [65]:
df.iloc[0]

0    1
Name: 0, dtype: int64

In [66]:
df.iloc[0,0]

1

In [67]:
df[0]

0    1
1    2
2    3
Name: 0, dtype: int64

In [68]:
df[0][0]

1

## 예제 1-3  인스턴스를 관리하는 속성

In [69]:
df = pd.DataFrame(data=[[0,1,2],[3,4,5]], index=list('ab'),
                  columns=list('cde'))

In [70]:
df

Unnamed: 0,c,d,e
a,0,1,2
b,3,4,5


In [14]:
type(df.index)

pandas.core.indexes.base.Index

In [15]:
type(df.columns)

pandas.core.indexes.base.Index

In [16]:
type(df.values)

numpy.ndarray

In [17]:
type(df.iloc)

pandas.core.indexing._iLocIndexer

In [18]:
type(df.loc)

pandas.core.indexing._LocIndexer

## 예제 1-4 변수에 할당

In [71]:
df = pd.DataFrame([[1,2,3],[4,5,6],[7,8,9]],
                  columns=list('abc'))

In [72]:
df_a = df

In [73]:
df_a['a'] = 100

In [74]:
df_a

Unnamed: 0,a,b,c
0,100,2,3
1,100,5,6
2,100,8,9


In [75]:
df

Unnamed: 0,a,b,c
0,100,2,3
1,100,5,6
2,100,8,9


In [77]:
id(df),id(df_a)

(2237706011816, 2237706011816)

In [76]:
df.values.base is df_a.values.base

True

## 예제 1-5  연산을 수행할 경우 새로운 인스턴스 생성

In [37]:
df_b = df + 99

In [38]:
df_b

Unnamed: 0,a,b,c
0,199,101,102
1,199,104,105
2,199,107,108


In [78]:
id(df_b), id(df)

(2237701314600, 2237706011816)

In [39]:
df_b.values.base is df.values.base

False

In [40]:
df

Unnamed: 0,a,b,c
0,100,2,3
1,100,5,6
2,100,8,9


In [41]:
import numpy as np

In [42]:
s1 = pd.Series(index=list('abcd'), data=np.arange(4))

In [43]:
s2 = s1

In [44]:
s1 is s2

True

In [45]:
s3 = s1.copy()

In [46]:
s1 is s3

False

## 예제 1- 6 __getitem__ 이해하기

In [79]:
pd.Series.__getitem__

<function pandas.core.series.Series.__getitem__(self, key)>

In [80]:
pd.DataFrame.__getitem__

<function pandas.core.frame.DataFrame.__getitem__(self, key)>

In [81]:
pd.Index.__getitem__

<function pandas.core.indexes.base.Index.__getitem__(self, key)>

In [82]:
pd.MultiIndex.__getitem__

<function pandas.core.indexes.multi.MultiIndex.__getitem__(self, key)>

In [83]:
df = pd.DataFrame([[1,2,3,4],[5,6,7,8]],index=['a','b'],
                  columns=['가','나','다','라'])

In [84]:
df.loc.__getitem__

<bound method _LocationIndexer.__getitem__ of <pandas.core.indexing._LocIndexer object at 0x0000020901ACB278>>

In [85]:
df.iloc.__getitem__

<bound method _LocationIndexer.__getitem__ of <pandas.core.indexing._iLocIndexer object at 0x0000020901ACB818>>

In [86]:
df

Unnamed: 0,가,나,다,라
a,1,2,3,4
b,5,6,7,8


In [87]:
df['나']

a    2
b    6
Name: 나, dtype: int64

In [88]:
df.__getitem__('나')

a    2
b    6
Name: 나, dtype: int64

In [89]:
ser = df['나']

In [90]:
ser

a    2
b    6
Name: 나, dtype: int64

In [91]:
ser['a']

2

In [92]:
ser.__getitem__('a')

2

In [93]:
ser.__getitem__(ser > 3)

b    6
Name: 나, dtype: int64

In [94]:
df.__getitem__(df > 3)

Unnamed: 0,가,나,다,라
a,,,,4
b,5.0,6.0,7.0,8


## 예제 1-7  브로드캐스팅과 벡터화 연산

In [19]:
df = pd.DataFrame([[1,2,3],[4,5,6],[7,8,9]],
                  columns=list('abc'))

In [20]:
df

Unnamed: 0,a,b,c
0,1,2,3
1,4,5,6
2,7,8,9


In [21]:
df + 3

Unnamed: 0,a,b,c
0,4,5,6
1,7,8,9
2,10,11,12


In [22]:
df1 = pd.DataFrame([[3,3,3],[3,3,3],[3,3,3]],
                  columns=list('abc'))

In [23]:
df1

Unnamed: 0,a,b,c
0,3,3,3
1,3,3,3
2,3,3,3


In [24]:
df + 3

Unnamed: 0,a,b,c
0,4,5,6
1,7,8,9
2,10,11,12


In [25]:
df + df1

Unnamed: 0,a,b,c
0,4,5,6
1,7,8,9
2,10,11,12


In [26]:
type(df.__add__)

method

In [27]:
type(df.__add__.__func__)

function

In [28]:
type(df.add)

method

In [29]:
type(df.add.__func__)

function

In [30]:
df.__add__.__func__ is df.add.__func__

False

## 예제 1-8 결측값으로 인스턴스 생성

In [47]:
import numpy as np

In [48]:
df_na = pd.DataFrame([1,2,np.nan])

In [49]:
df_na

Unnamed: 0,0
0,1.0
1,2.0
2,


In [50]:
df_na.sum()

0    3.0
dtype: float64

In [51]:
df_na.sum(skipna=False)

0   NaN
dtype: float64

In [52]:
df_na < 3

Unnamed: 0,0
0,True
1,True
2,False


In [53]:
df_na < np.nan

Unnamed: 0,0
0,False
1,False
2,False


## 예제 1-9 축 알아보기

In [54]:
df_sax = pd.Series([1,2,3,4])

In [55]:
df_sax

0    1
1    2
2    3
3    4
dtype: int64

In [56]:
df_sax.sum(axis=0)

10

In [57]:
df_dax = pd.DataFrame([[1,2,3,4],[5,6,7,8]])

In [58]:
df_dax

Unnamed: 0,0,1,2,3
0,1,2,3,4
1,5,6,7,8


In [59]:
df_dax.sum(axis=0)

0     6
1     8
2    10
3    12
dtype: int64

In [60]:
df_dax.sum(axis=1)

0    10
1    26
dtype: int64