##### Pandas
- 데이터 분석을 위한 사용이 쉽고 성능이 좋은 오픈소스 Python 라이브러리
- Pandas의 데이터 타입
    - Series
        - index와 value로 이루어진 데이터 타입입니다.
    - Dataframe
        - index와 column과 value로 이루어진 데이터 타입입니다.
        - column은 Series로 구성이 되어 있습니다.
        - Dataframe은 여러개의 Series가 column을 구성하는 모양으로 만들어졌습니다.
- `$ pip3 install pandas`

In [None]:
import numpy as np
import pandas as pd

##### Series

In [17]:
# make - 0~9까지의 숫자를 랜덤하게 5개 뽑아서 Series를 만들겠습니다.
data = pd.Series(np.random.randint(10, size=5))
data

0    5
1    5
2    9
3    9
4    7
dtype: int64

In [21]:
# series index를 설정
data = pd.Series(np.random.randint(10, size=5), index=["A","B","C","D","E"])
data

A    6
B    8
C    3
D    0
E    8
dtype: int64

In [15]:
data.index, data.values

(Index(['A', 'B', 'C', 'D', 'E'], dtype='object'), array([0, 4, 8, 6, 1]))

In [16]:
# 문자로 되어있는 인덱스만 사용 가능합니다.
# index를 문자 1,2,3으로 지정해도 사용이 불가능합니다.
data.A, data.C

(0, 8)

In [23]:
# series에는 인덱스의 이름과 Series 데이터의 이름을 부여할수 있습니다.
data.name = "random_number"
data.index.name = "index_str"
data

index_str
A    6
B    8
C    3
D    0
E    8
Name: random_number, dtype: int64

In [25]:
# 연산도 가능합니다. 연산은 브로드캐스팅으로 연산됩니다.
data * 10

index_str
A    60
B    80
C    30
D     0
E    80
Name: random_number, dtype: int64

In [26]:
# index로 여러개의 값 가져오기
data[["B","D"]]

index_str
B    8
D    0
Name: random_number, dtype: int64

In [28]:
# offset 사용 가능
data[2:]

index_str
C    3
D    0
E    8
Name: random_number, dtype: int64

In [29]:
data[2:4]

index_str
C    3
D    0
Name: random_number, dtype: int64

In [30]:
data[:4]

index_str
A    6
B    8
C    3
D    0
Name: random_number, dtype: int64

In [31]:
data[::-1]

index_str
E    8
D    0
C    3
B    8
A    6
Name: random_number, dtype: int64

In [32]:
# 비교 연산 가능

In [35]:
data[data > 6]

index_str
B    8
E    8
Name: random_number, dtype: int64

In [38]:
for idx, value in data.items():
    print(idx, value)

A 6
B 8
C 3
D 0
E 8


In [39]:
# dictionary 데이터로 Series 생성이 가능합니다.
dic = {"D":7, "E":5, "F":9}
data2 = pd.Series(dic)
data2

D    7
E    5
F    9
dtype: int64

In [41]:
data

index_str
A    6
B    8
C    3
D    0
E    8
Name: random_number, dtype: int64

In [43]:
# series 사이의 연산 - index가 매핑이 되서 연산이 됩니다.
result = data + data2
result

A     NaN
B     NaN
C     NaN
D     7.0
E    13.0
F     NaN
dtype: float64

In [45]:
# NaN 제거 - notnull 사용
print(result.notnull())
result = result[result.notnull()]
result

A    False
B    False
C    False
D     True
E     True
F    False
dtype: bool


D     7.0
E    13.0
dtype: float64

In [46]:
result[0]

7.0

##### Dataframe
- series(index, value), column으로 이루어진 pandas의 데이터 타입
- table 모양으로 구성 됩니다. row와 column이 있습니다.
- make
- insert
    - row
    - column
- append
- concat
- groupby, aggregate
- select
- merge : join - 'inner', 'outer'

In [48]:
# make
df = pd.DataFrame(columns=["Email","Name"])
df

Unnamed: 0,Email,Name


In [49]:
df["Name"] = ["fcamp", "dss"]
df["Email"] = ["fcamp@gmail.com", "dss@gmail.com"]
df

Unnamed: 0,Email,Name
0,fcamp@gmail.com,fcamp
1,dss@gmail.com,dss


In [50]:
df["Name"]

0    fcamp
1      dss
Name: Name, dtype: object

In [61]:
# 딕셔너리를 이용해서 데이터 프레임 만들기
name = ["fcamp", "dss"]
email = ["fcamp@gmail.com", "dss@gmail.com"]
dic = {"Name":name, "Email":email}
df = pd.DataFrame(dic)
df

Unnamed: 0,Email,Name
0,fcamp@gmail.com,fcamp
1,dss@gmail.com,dss


In [54]:
# 인덱스를 설정 : unique 값만 올수 있습니다.
index_list = ["one", "two"]
df = pd.DataFrame(dic, index=index_list)
df

Unnamed: 0,Email,Name
one,fcamp@gmail.com,fcamp
two,dss@gmail.com,dss


In [58]:
df.index, df.columns, df.values

(Index(['one', 'two'], dtype='object'),
 Index(['Email', 'Name'], dtype='object'),
 array([['fcamp@gmail.com', 'fcamp'],
        ['dss@gmail.com', 'dss']], dtype=object))

##### insert
- row
- column

In [62]:
# row
df

Unnamed: 0,Email,Name
0,fcamp@gmail.com,fcamp
1,dss@gmail.com,dss


In [63]:
# row 접근 - loc
df.loc[1]

Email    dss@gmail.com
Name               dss
Name: 1, dtype: object

In [64]:
# column 접근
df["Name"]

0    fcamp
1      dss
Name: Name, dtype: object

In [65]:
# insert row 
df.loc[2] = {'Email':'data@gmail.com', 'Name':'data'}
df

Unnamed: 0,Email,Name
0,fcamp@gmail.com,fcamp
1,dss@gmail.com,dss
2,data@gmail.com,data


In [67]:
df.loc[len(df)] = {'Email':'data2@gmail.com', 'Name':'data2'}

In [68]:
df

Unnamed: 0,Email,Name
0,fcamp@gmail.com,fcamp
1,dss@gmail.com,dss
2,data@gmail.com,data
3,data2@gmail.com,data2


In [71]:
# insert column
df["Address"] = "dss"
df

Unnamed: 0,Email,Name,Address
0,fcamp@gmail.com,fcamp,dss
1,dss@gmail.com,dss,dss
2,data@gmail.com,data,dss
3,data2@gmail.com,data2,dss


In [76]:
# 들어가는 데이터는 하나의 데이터를 넣으 브로드 캐스팅이 되도록 하던가
# 데이터 프레임의 로우수와 같은 갯수의 데이터가 들어가야 합니다.
df["Address"] = ["Seoul","Pusan","Jeju","Deajeon"]
df

Unnamed: 0,Email,Name,Address
0,fcamp@gmail.com,fcamp,Seoul
1,dss@gmail.com,dss,Pusan
2,data@gmail.com,data,Jeju
3,data2@gmail.com,data2,Deajeon


In [77]:
# apply - Name 컬럼 데이터의 문자수를 세서 name(count) 형태로 출력되는 새로운 컬럼을 만들겁니다.
def name_count(name):
    return "{}({})".format(name, len(name))

name_count("doojin")

'doojin(6)'

In [81]:
df["Name_Count"] = df["Name"].apply(name_count)

In [82]:
df

Unnamed: 0,Email,Name,Address,Name_Count
0,fcamp@gmail.com,fcamp,Seoul,fcamp(5)
1,dss@gmail.com,dss,Pusan,dss(3)
2,data@gmail.com,data,Jeju,data(4)
3,data2@gmail.com,data2,Deajeon,data2(5)


In [83]:
df["Address_Count"] = df["Address"].apply(lambda addr:"{}({})".format(addr, len(addr)))
df

Unnamed: 0,Email,Name,Address,Name_Count,Address_Count
0,fcamp@gmail.com,fcamp,Seoul,fcamp(5),Seoul(5)
1,dss@gmail.com,dss,Pusan,dss(3),Pusan(5)
2,data@gmail.com,data,Jeju,data(4),Jeju(4)
3,data2@gmail.com,data2,Deajeon,data2(5),Deajeon(7)


In [114]:
# append
# 사람이름 나이 데이터가 들어가는 데이터 프레임을 만들겠습니다.

import random, string

def get_name():
    names = ["Adam", "Alan", "Alex", "Alvin", "Andrew", "Anthony", "Arnold", "Jin", "Anchal", "Peter"]
    return random.choice(names)

def get_age(start=20, end=40):
    return random.randint(start, end)

def make_data(rows=10):
    datas = []
    for _ in range(rows):
        data = {"Age":get_age(), "Name":get_name()}
        datas.append(data)
    return datas

In [115]:
make_data()

[{'Age': 31, 'Name': 'Jin'},
 {'Age': 24, 'Name': 'Anchal'},
 {'Age': 32, 'Name': 'Anchal'},
 {'Age': 37, 'Name': 'Alex'},
 {'Age': 27, 'Name': 'Alvin'},
 {'Age': 23, 'Name': 'Peter'},
 {'Age': 33, 'Name': 'Adam'},
 {'Age': 32, 'Name': 'Alvin'},
 {'Age': 33, 'Name': 'Anthony'},
 {'Age': 25, 'Name': 'Arnold'}]

In [118]:
data1 = make_data()
df1 = pd.DataFrame(data1)
df1

Unnamed: 0,Age,Name
0,38,Alan
1,26,Alan
2,21,Alex
3,36,Alex
4,29,Andrew
5,27,Andrew
6,33,Anchal
7,21,Jin
8,37,Alan
9,37,Andrew


In [119]:
data2 = make_data()
df2 = pd.DataFrame(data2)
df2

Unnamed: 0,Age,Name
0,38,Arnold
1,34,Anthony
2,36,Peter
3,33,Adam
4,40,Arnold
5,30,Jin
6,25,Anthony
7,37,Anchal
8,33,Arnold
9,24,Alan


In [120]:
# append
df3 = df1.append(df2)
df3

Unnamed: 0,Age,Name
0,38,Alan
1,26,Alan
2,21,Alex
3,36,Alex
4,29,Andrew
5,27,Andrew
6,33,Anchal
7,21,Jin
8,37,Alan
9,37,Andrew


In [122]:
df3.reset_index(drop=True)

Unnamed: 0,Age,Name
0,38,Alan
1,26,Alan
2,21,Alex
3,36,Alex
4,29,Andrew
5,27,Andrew
6,33,Anchal
7,21,Jin
8,37,Alan
9,37,Andrew


In [125]:
df3 = df1.append(df2, ignore_index=True)
df3

Unnamed: 0,Age,Name
0,38,Alan
1,26,Alan
2,21,Alex
3,36,Alex
4,29,Andrew
5,27,Andrew
6,33,Anchal
7,21,Jin
8,37,Alan
9,37,Andrew


###### Concat
- rows
- columns

In [130]:
# concat rows
df3 = pd.concat([df1, df2]).reset_index(drop=True)
df3

Unnamed: 0,Age,Name
0,38,Alan
1,26,Alan
2,21,Alex
3,36,Alex
4,29,Andrew
5,27,Andrew
6,33,Anchal
7,21,Jin
8,37,Alan
9,37,Andrew


In [137]:
# concat columns
# axis = 1 설정하면 가로로 합쳐집니다.
pd.concat([df1, df3], axis=1)

Unnamed: 0,Age,Name,Age.1,Name.1
0,38.0,Alan,38,Alan
1,26.0,Alan,26,Alan
2,21.0,Alex,21,Alex
3,36.0,Alex,36,Alex
4,29.0,Andrew,29,Andrew
5,27.0,Andrew,27,Andrew
6,33.0,Anchal,33,Anchal
7,21.0,Jin,21,Jin
8,37.0,Alan,37,Alan
9,37.0,Andrew,37,Andrew


In [136]:
pd.concat([df1, df3], axis=1, join='inner')

Unnamed: 0,Age,Name,Age.1,Name.1
0,38,Alan,38,Alan
1,26,Alan,26,Alan
2,21,Alex,21,Alex
3,36,Alex,36,Alex
4,29,Andrew,29,Andrew
5,27,Andrew,27,Andrew
6,33,Anchal,33,Anchal
7,21,Jin,21,Jin
8,37,Alan,37,Alan
9,37,Andrew,37,Andrew


##### Groupby
- 이름별로 평균 나이를 나타내는 데이터 프레임을 만들겠습니다.

In [151]:
g_df = pd.DataFrame(make_data(20))
g_df.tail()

Unnamed: 0,Age,Name
15,28,Jin
16,21,Alan
17,39,Adam
18,35,Anchal
19,20,Adam


In [152]:
result = list(g_df["Name"].unique())
len(result), result

(9,
 ['Peter',
  'Anchal',
  'Alex',
  'Arnold',
  'Anthony',
  'Adam',
  'Andrew',
  'Alan',
  'Jin'])

In [154]:
# groupby 를 이용하여 각 이름별로 몇번 나왔는지 counts 컬럼이 추가된 데이터 프레임 만들겠습니다.
result_df = g_df.groupby("Name").size().reset_index(name="counts")
result_df

Unnamed: 0,Name,counts
0,Adam,3
1,Alan,2
2,Alex,1
3,Anchal,5
4,Andrew,1
5,Anthony,2
6,Arnold,2
7,Jin,1
8,Peter,3


In [158]:
# agg : min
g_df.groupby("Name").agg('min').reset_index()

Unnamed: 0,Name,Age
0,Adam,20
1,Alan,21
2,Alex,37
3,Anchal,21
4,Andrew,32
5,Anthony,25
6,Arnold,30
7,Jin,28
8,Peter,24


In [159]:
# agg : max
g_df.groupby("Name").agg('max').reset_index()

Unnamed: 0,Name,Age
0,Adam,39
1,Alan,23
2,Alex,37
3,Anchal,36
4,Andrew,32
5,Anthony,29
6,Arnold,37
7,Jin,28
8,Peter,38


In [160]:
# agg : mean
g_df.groupby("Name").agg('mean').reset_index()

Unnamed: 0,Name,Age
0,Adam,30.0
1,Alan,22.0
2,Alex,37.0
3,Anchal,30.2
4,Andrew,32.0
5,Anthony,27.0
6,Arnold,33.5
7,Jin,28.0
8,Peter,30.333333


In [162]:
# 여러개를 agg 할수 있습니다.
df = g_df.groupby("Name").agg(['min','max','mean']).reset_index()
df

Unnamed: 0_level_0,Name,Age,Age,Age
Unnamed: 0_level_1,Unnamed: 1_level_1,min,max,mean
0,Adam,20,39,30.0
1,Alan,21,23,22.0
2,Alex,37,37,37.0
3,Anchal,21,36,30.2
4,Andrew,32,32,32.0
5,Anthony,25,29,27.0
6,Arnold,30,37,33.5
7,Jin,28,28,28.0
8,Peter,24,38,30.333333


##### select

In [164]:
df.head()

Unnamed: 0_level_0,Name,Age,Age,Age
Unnamed: 0_level_1,Unnamed: 1_level_1,min,max,mean
0,Adam,20,39,30.0
1,Alan,21,23,22.0
2,Alex,37,37,37.0
3,Anchal,21,36,30.2
4,Andrew,32,32,32.0


In [168]:
df.tail(3)

Unnamed: 0_level_0,Name,Age,Age,Age
Unnamed: 0_level_1,Unnamed: 1_level_1,min,max,mean
6,Arnold,30,37,33.5
7,Jin,28,28,28.0
8,Peter,24,38,30.333333


In [170]:
# offset index
df[3:6]

Unnamed: 0_level_0,Name,Age,Age,Age
Unnamed: 0_level_1,Unnamed: 1_level_1,min,max,mean
3,Anchal,21,36,30.2
4,Andrew,32,32,32.0
5,Anthony,25,29,27.0


In [171]:
df[3:]

Unnamed: 0_level_0,Name,Age,Age,Age
Unnamed: 0_level_1,Unnamed: 1_level_1,min,max,mean
3,Anchal,21,36,30.2
4,Andrew,32,32,32.0
5,Anthony,25,29,27.0
6,Arnold,30,37,33.5
7,Jin,28,28,28.0
8,Peter,24,38,30.333333


In [172]:
df[::-1]

Unnamed: 0_level_0,Name,Age,Age,Age
Unnamed: 0_level_1,Unnamed: 1_level_1,min,max,mean
8,Peter,24,38,30.333333
7,Jin,28,28,28.0
6,Arnold,30,37,33.5
5,Anthony,25,29,27.0
4,Andrew,32,32,32.0
3,Anchal,21,36,30.2
2,Alex,37,37,37.0
1,Alan,21,23,22.0
0,Adam,20,39,30.0


In [174]:
# row select 하기 위해서는 loc 사용합니다.
df.loc[3]

Name          Anchal
Age   min         21
      max         36
      mean      30.2
Name: 3, dtype: object

In [177]:
df["Age"]["min"][3]

21

In [181]:
data = {
    "Name": df["Name"],
    "Min": df["Age"]["min"],
    "Max": df["Age"]["max"],
    "Mean": df["Age"]["mean"],
}
n_df = pd.DataFrame(data)
n_df

Unnamed: 0,Max,Mean,Min,Name
0,39,30.0,20,Adam
1,23,22.0,21,Alan
2,37,37.0,37,Alex
3,36,30.2,21,Anchal
4,32,32.0,32,Andrew
5,29,27.0,25,Anthony
6,37,33.5,30,Arnold
7,28,28.0,28,Jin
8,38,30.333333,24,Peter


In [188]:
# 평균나이가 30세 이상인 데이터를 mean value로 내림차순으로 정렬하고 인덱스를 리셋
# 정렬 : sort_values
n_df[n_df["Mean"] > 30].sort_values(by=["Mean"], ascending=False).reset_index(drop=True)

Unnamed: 0,Max,Mean,Min,Name
0,37,37.0,37,Alex
1,37,33.5,30,Arnold
2,32,32.0,32,Andrew
3,38,30.333333,24,Peter
4,36,30.2,21,Anchal


In [192]:
n_df

Unnamed: 0,Max,Mean,Min,Name
0,39,30.0,20,Adam
1,23,22.0,21,Alan
2,37,37.0,37,Alex
3,36,30.2,21,Anchal
4,32,32.0,32,Andrew
5,29,27.0,25,Anthony
6,37,33.5,30,Arnold
7,28,28.0,28,Jin
8,38,30.333333,24,Peter


In [193]:
# 각 이름별로 같은 이름이 몇명있는지에 대한 컬럼을 추가
n_df["Count"] = list(g_df.groupby("Name").size())
n_df

Unnamed: 0,Max,Mean,Min,Name,Count
0,39,30.0,20,Adam,3
1,23,22.0,21,Alan,2
2,37,37.0,37,Alex,1
3,36,30.2,21,Anchal,5
4,32,32.0,32,Andrew,1
5,29,27.0,25,Anthony,2
6,37,33.5,30,Arnold,2
7,28,28.0,28,Jin,1
8,38,30.333333,24,Peter,3


In [195]:
# Mean 컬럼을 가장 뒤로 옮기기
# drop - 삭제
mean = n_df["Mean"]
n_df.drop("Mean", axis=1, inplace=True)
n_df

Unnamed: 0,Max,Min,Name,Count
0,39,20,Adam,3
1,23,21,Alan,2
2,37,37,Alex,1
3,36,21,Anchal,5
4,32,32,Andrew,1
5,29,25,Anthony,2
6,37,30,Arnold,2
7,28,28,Jin,1
8,38,24,Peter,3


In [196]:
n_df["Mean"] = mean
n_df

Unnamed: 0,Max,Min,Name,Count,Mean
0,39,20,Adam,3,30.0
1,23,21,Alan,2,22.0
2,37,37,Alex,1,37.0
3,36,21,Anchal,5,30.2
4,32,32,Andrew,1,32.0
5,29,25,Anthony,2,27.0
6,37,30,Arnold,2,33.5
7,28,28,Jin,1,28.0
8,38,24,Peter,3,30.333333


In [199]:
# 컬럼이름을 재설정 - rename
n_df.rename(columns={"Name":"Unique_Name", "Max":"Maximum"}, inplace=True)
n_df

Unnamed: 0,Maximum,Min,Unique_Name,Count,Mean
0,39,20,Adam,3,30.0
1,23,21,Alan,2,22.0
2,37,37,Alex,1,37.0
3,36,21,Anchal,5,30.2
4,32,32,Andrew,1,32.0
5,29,25,Anthony,2,27.0
6,37,30,Arnold,2,33.5
7,28,28,Jin,1,28.0
8,38,24,Peter,3,30.333333


##### merge
- sql의 join과 같은 개념입니다.
- 두개의 데이터 프레임을 하나로 합쳐서 데이터를 보여줄수 있습니다.
- user_df : 아이디, 이름, 나이 데이터 프레임
- money_df : 아이디, 돈 데이터 프레임

In [201]:
# make user_df
user_df = pd.DataFrame(columns=["UserID","Name","Age"])

for idx in range(1,9):
    name = get_name()
    
    # 이름이 중복 되지 않도록
    while name in list(user_df["Name"]):
        name = get_name()
    
    # 데이터를 insert
    data = {"Name":name, "UserID":idx, "Age":get_age()}
    user_df.loc[len(user_df)] = data
    
user_df    

Unnamed: 0,UserID,Name,Age
0,1,Andrew,22
1,2,Anchal,23
2,3,Alan,31
3,4,Peter,28
4,5,Arnold,26
5,6,Anthony,39
6,7,Alex,21
7,8,Adam,20


In [211]:
money_df = pd.DataFrame(columns=["ID","Money"])
for _ in range(15):
    money = random.randint(1,20) * 1000
    data = {"Money":money, "ID":random.randint(1,9)}
    money_df.loc[len(money_df)] = data
money_df

Unnamed: 0,ID,Money
0,9,1000
1,4,12000
2,7,12000
3,9,1000
4,2,9000
5,5,2000
6,9,6000
7,6,11000
8,1,19000
9,1,9000


In [212]:
money_df.merge(user_df, left_on="ID", right_on="UserID" )

Unnamed: 0,ID,Money,UserID,Name,Age
0,4,12000,4,Peter,28
1,4,4000,4,Peter,28
2,4,5000,4,Peter,28
3,7,12000,7,Alex,21
4,7,18000,7,Alex,21
5,2,9000,2,Anchal,23
6,5,2000,5,Arnold,26
7,5,15000,5,Arnold,26
8,6,11000,6,Anthony,39
9,1,19000,1,Andrew,22


In [213]:
money_df.merge(user_df, left_on="ID", right_on="UserID", how='outer' )

Unnamed: 0,ID,Money,UserID,Name,Age
0,9.0,1000.0,,,
1,9.0,1000.0,,,
2,9.0,6000.0,,,
3,9.0,6000.0,,,
4,4.0,12000.0,4.0,Peter,28.0
5,4.0,4000.0,4.0,Peter,28.0
6,4.0,5000.0,4.0,Peter,28.0
7,7.0,12000.0,7.0,Alex,21.0
8,7.0,18000.0,7.0,Alex,21.0
9,2.0,9000.0,2.0,Anchal,23.0


In [215]:
user_df.rename(columns={"UserID":"ID"}, inplace=True)
user_df

Unnamed: 0,ID,Name,Age
0,1,Andrew,22
1,2,Anchal,23
2,3,Alan,31
3,4,Peter,28
4,5,Arnold,26
5,6,Anthony,39
6,7,Alex,21
7,8,Adam,20


In [219]:
result_df = pd.merge(money_df, user_df)
result_df

Unnamed: 0,ID,Money,Name,Age
0,4,12000,Peter,28
1,4,4000,Peter,28
2,4,5000,Peter,28
3,7,12000,Alex,21
4,7,18000,Alex,21
5,2,9000,Anchal,23
6,5,2000,Arnold,26
7,5,15000,Arnold,26
8,6,11000,Anthony,39
9,1,19000,Andrew,22


In [222]:
result_df.groupby("Name").sum()["Money"].reset_index()

Unnamed: 0,Name,Money
0,Alex,30000
1,Anchal,9000
2,Andrew,28000
3,Anthony,11000
4,Arnold,17000
5,Peter,21000


In [225]:
# fillna - NaN 데이터를 설정한 값으로 채워주는 함수
result = pd.merge(user_df, money_df, how='outer')[:13].fillna(value=0)
result

Unnamed: 0,ID,Name,Age,Money
0,1.0,Andrew,22,19000
1,1.0,Andrew,22,9000
2,2.0,Anchal,23,9000
3,3.0,Alan,31,0
4,4.0,Peter,28,12000
5,4.0,Peter,28,4000
6,4.0,Peter,28,5000
7,5.0,Arnold,26,2000
8,5.0,Arnold,26,15000
9,6.0,Anthony,39,11000


In [231]:
# 컬럼의 형변화
result["ID"] = result["ID"].astype("int")
result

Unnamed: 0,ID,Name,Age,Money
0,1,Andrew,22,19000
1,1,Andrew,22,9000
2,2,Anchal,23,9000
3,3,Alan,31,0
4,4,Peter,28,12000
5,4,Peter,28,4000
6,4,Peter,28,5000
7,5,Arnold,26,2000
8,5,Arnold,26,15000
9,6,Anthony,39,11000


##### input / output
- csv
    - , 로 값을 구분하는 파일 타입
- excel
    - 인코딩 타입이 UTF-8이 아닙니다.
    - 영어가 아닌 다른 언어를 저장하거나 로드할때 인코딩을 주의 해줘야 합니다.
- `$ pip3 install xlrd`
- `$ pip3 install openpyxl`

In [234]:
result.to_csv("foo.csv", index=False)

In [235]:
pd.read_csv("foo.csv")

Unnamed: 0,ID,Name,Age,Money
0,1,Andrew,22,19000
1,1,Andrew,22,9000
2,2,Anchal,23,9000
3,3,Alan,31,0
4,4,Peter,28,12000
5,4,Peter,28,4000
6,4,Peter,28,5000
7,5,Arnold,26,2000
8,5,Arnold,26,15000
9,6,Anthony,39,11000


In [239]:
# excel
result.to_excel("foo.xlsx", sheet_name="dss")

In [240]:
pd.read_excel("foo.xlsx", "dss")

Unnamed: 0,ID,Name,Age,Money
0,1,Andrew,22,19000
1,1,Andrew,22,9000
2,2,Anchal,23,9000
3,3,Alan,31,0
4,4,Peter,28,12000
5,4,Peter,28,4000
6,4,Peter,28,5000
7,5,Arnold,26,2000
8,5,Arnold,26,15000
9,6,Anthony,39,11000
