### pandas 입출력
Pandas는 데이터 파일을 읽어 데이터프레임을 만들 수 있다. 다음처럼 여러가지 포맷을 지원한다.

CSV

Excel

HTML

JSON

HDF5

SAS

STATA

SQL

In [5]:
import pandas as pd
import numpy as np

In [6]:
%%writefile sample1.csv
c1, c2, c3
1, 1.11, one
2, 2.22, two
3, 3.33, three

Overwriting sample1.csv


In [8]:
pd.read_csv('sample1.csv')

Unnamed: 0,c1,c2,c3
0,1,1.11,one
1,2,2.22,two
2,3,3.33,three


In [9]:
%%writefile sample2.csv
1, 1.11, one
2, 2.22, two
3, 3.33, three

Writing sample2.csv


In [10]:
pd.read_csv('sample2.csv', names=['c1', 'c2', 'c3'])

Unnamed: 0,c1,c2,c3
0,1,1.11,one
1,2,2.22,two
2,3,3.33,three


In [11]:
pd.read_csv('sample1.csv', index_col='c1')

Unnamed: 0_level_0,c2,c3
c1,Unnamed: 1_level_1,Unnamed: 2_level_1
1,1.11,one
2,2.22,two
3,3.33,three


In [12]:
%%writefile sample3.txt
c1        c2        c3        c4
0.179181 -1.538472  1.347553  0.43381
1.024209  0.087307 -1.281997  0.49265
0.417899 -2.002308  0.255245 -1.10515

Writing sample3.txt


In [13]:
pd.read_table('sample3.txt', sep='\s+')

Unnamed: 0,c1,c2,c3,c4
0,0.179181,-1.538472,1.347553,0.43381
1,1.024209,0.087307,-1.281997,0.49265
2,0.417899,-2.002308,0.255245,-1.10515


In [14]:
%%writefile sample4.txt
파일 제목: sample4.txt
데이터 포맷의 설명:
c1, c2, c3
1, 1.11, one
2, 2.22, two
3, 3.33, three

Writing sample4.txt


In [15]:
pd.read_csv('sample4.txt', skiprows=[0, 1])

Unnamed: 0,c1,c2,c3
0,1,1.11,one
1,2,2.22,two
2,3,3.33,three


In [16]:
%%writefile sample5.csv
c1, c2, c3
1, 1.11, one
2, , two
누락, 3.33, three

Writing sample5.csv


In [17]:
df = pd.read_csv('sample5.csv', na_values=['누락'])
df

Unnamed: 0,c1,c2,c3
0,1.0,1.11,one
1,2.0,,two
2,,3.33,three


### CSV 파일 출력

In [18]:
df.to_csv('sample6.csv')

In [19]:
!cat sample6.csv  # 윈도우에서는 !type sample6.csv 함수를 사용

,c1, c2, c3
0,1.0, 1.11, one
1,2.0, , two
2,, 3.33, three
,c1, c2, c3
0,1.0, 1.11, one
1,2.0, , two
2,, 3.33, three


cat: '#': No such file or directory
cat: 윈도우에서는: No such file or directory
cat: '!type': No such file or directory
cat: 함수를: No such file or directory
cat: 사용: No such file or directory


In [20]:
df.to_csv('sample7.txt', sep='|')

In [21]:
!cat sample7.txt

|c1| c2| c3
0|1.0| 1.11| one
1|2.0| | two
2|| 3.33| three


In [22]:
df.to_csv('sample8.csv', na_rep='누락')

In [23]:
!cat sample8.csv

,c1, c2, c3
0,1.0, 1.11, one
1,2.0, , two
2,누락, 3.33, three


In [24]:
df.index = ["a", "b", "c"]
df

Unnamed: 0,c1,c2,c3
a,1.0,1.11,one
b,2.0,,two
c,,3.33,three


In [25]:
df.to_csv('sample9.csv', index=False, header=False)

In [26]:
!cat sample9.csv  # 윈도우에서는 !type sample6.csv 함수를 사용

1.0, 1.11, one
2.0, , two
, 3.33, three
,c1, c2, c3
0,1.0, 1.11, one
1,2.0, , two
2,, 3.33, three


cat: '#': No such file or directory
cat: 윈도우에서는: No such file or directory
cat: '!type': No such file or directory
cat: 함수를: No such file or directory
cat: 사용: No such file or directory
