## Series

- pandas에서 사용하는 1차원 배열
- index를 사용할 수 있다.

In [8]:
import pandas as pd
import numpy as np

In [9]:
arr = np.arange(100, 110)
print(arr)

[100 101 102 103 104 105 106 107 108 109]


In [10]:
s = pd.Series(arr)        # -> 대문자? 클래스.
print(s)

0    100
1    101
2    102
3    103
4    104
5    105
6    106
7    107
8    108
9    109
dtype: int64


In [18]:
s = pd.Series(arr, dtype='float32')
print(s)

0    100.0
1    101.0
2    102.0
3    103.0
4    104.0
5    105.0
6    106.0
7    107.0
8    108.0
9    109.0
dtype: float32


In [19]:
s = pd.Series(['kim', 'lee', 'pyo'])
print(s)

0    kim
1    lee
2    pyo
dtype: object


In [23]:
s = pd.Series([1, 2, 3, '1', '2', 'hello'])
print(s)

0        1
1        2
2        3
3        1
4        2
5    hello
dtype: object


In [24]:
s[5]

'hello'

In [26]:
# 음수 인덱싱은 지원하지 않음
# s[-1]

In [27]:
s.index

RangeIndex(start=0, stop=6, step=1)

In [35]:
names = pd.Series(['kim', 'lee', 'park'], index=['a', 'b', 'c'])   ##-> 인덱스를 문자로도 기입 가능.

In [42]:
# names[0]
names.iloc[0]

'kim'

In [43]:
names['a']

'kim'

In [44]:
names.index

Index(['a', 'b', 'c'], dtype='object')

In [46]:
names.values

array(['kim', 'lee', 'park'], dtype=object)

In [47]:
names.ndim

1

In [48]:
names.shape

(3,)

## NaN(Not a Number)

In [51]:
s = pd.Series([1, 2, 3, np.nan])
print(s)

0    1.0
1    2.0
2    3.0
3    NaN
dtype: float64


## fancy indexing

In [55]:
f = ['banna', 'apple', 'grape', np.nan]
s = pd.Series(f, index=list('abcd'))
print(s)

a    banna
b    apple
c    grape
d      NaN
dtype: object


In [57]:
s[['d', 'a']]

d      NaN
a    banna
dtype: object

In [58]:
s[[3, 1]]

  s[[3, 1]]


d      NaN
b    apple
dtype: object

## bool indexing

In [59]:
s

a    banna
b    apple
c    grape
d      NaN
dtype: object

In [60]:
s[[True, False, True, False]]

a    banna
c    grape
dtype: object

In [61]:
s == 'banana'

a    False
b    False
c    False
d    False
dtype: bool

In [62]:
s[s == 'banana']

Series([], dtype: object)

In [65]:
s = pd.Series([1, 5, 3, 8, 9, 6])
s[s > 3]

1    5
3    8
4    9
5    6
dtype: int64

## 결측치(NaN) 처리

In [68]:
s = pd.Series([1, 3, np.nan, 10, 11, np.nan])
print(s)

0     1.0
1     3.0
2     NaN
3    10.0
4    11.0
5     NaN
dtype: float64


In [70]:
s.isnull()

0    False
1    False
2     True
3    False
4    False
5     True
dtype: bool

In [77]:
# s[s.isnull()]
s.isna()

0    False
1    False
2     True
3    False
4    False
5     True
dtype: bool

In [78]:
# s[s.notnull()]
s[s.notna()]

0     1.0
1     3.0
3    10.0
4    11.0
dtype: float64

## Slicing

In [79]:
s[1:3]

1    3.0
2    NaN
dtype: float64

In [81]:
s = pd.Series([1, 2, 3], index=list('abc'))
s[1:2]
s['a':'b']

a    1
b    2
dtype: int64

## Dataframe
- 2차원 데이터 구조(excel, sheet와 유사)
- 행(row), 열(column) 구조

In [89]:
d = pd.DataFrame([
    [1, 2, 3],
    [4, 5, 6],
    [7, 8, 9],
])
d

Unnamed: 0,0,1,2
0,1,2,3
1,4,5,6
2,7,8,9


In [90]:
d = pd.DataFrame([
    [1, 2, 3],
    [4, 5, 6],
    [7, 8, 9],
], columns=['가', '나', '다'])
d

Unnamed: 0,가,나,다
0,1,2,3
1,4,5,6
2,7,8,9


In [95]:
info = {
    'name' : ['lee', 'pyo', 'kim'],
    'age' : [20, 22, 21]
}

In [96]:
pd.DataFrame(info)

Unnamed: 0,name,age
0,lee,20
1,pyo,22
2,kim,21


In [100]:
info_df = pd.DataFrame(info)

In [101]:
info_df.index

RangeIndex(start=0, stop=3, step=1)

In [102]:
info_df.columns

Index(['name', 'age'], dtype='object')

In [104]:
info_df.values  # -> 안에 실제 데이터에 접근 가능.

array([['lee', 20],
       ['pyo', 22],
       ['kim', 21]], dtype=object)

In [106]:
info_df.dtypes

name    object
age      int64
dtype: object

In [107]:
info_df.T

Unnamed: 0,0,1,2
name,lee,pyo,kim
age,20,22,21


## index 지정

In [111]:
info_df.index = list('abc')

In [114]:
info_df

Unnamed: 0,name,age
a,lee,20
b,pyo,22
c,kim,21


## column 다루기

In [116]:
info_df.columns

Index(['name', 'age'], dtype='object')

In [118]:
print(info_df['name'])
print(type(info_df['name']))

a    lee
b    pyo
c    kim
Name: name, dtype: object
<class 'pandas.core.series.Series'>


In [120]:
info_df[      ['age', 'name']       ]

Unnamed: 0,age,name
a,20,lee
b,22,pyo
c,21,kim


In [131]:
info_df = info_df.rename(columns = {'name': '알고싶다 너의 이름...'})

In [132]:
info_df

Unnamed: 0,알고싶다 너의 이름...,age
a,lee,20
b,pyo,22
c,kim,21


In [136]:
info_df.rename(columns={'이름': 'last_name'}, inplace=True)

In [137]:
info_df

Unnamed: 0,알고싶다 너의 이름...,age
a,lee,20
b,pyo,22
c,kim,21
