# Data Loading, Storage, and File Formats

In [1]:
%load_ext watermark
%watermark -u -d -v

last updated: 2017-01-24 

CPython 3.5.2
IPython 5.1.0


In [27]:
import datetime
datetime.datetime.today().strftime("%Y-%m-%d %H:%M:%S")

In [2]:
%matplotlib inline

In [3]:
import numpy as np
import matplotlib.pyplot as plt
from pandas import Series, DataFrame
import pandas as pd

## Reading and Writing Data in Text Format

In [10]:
print(open('pydata-book/ch06/ex1.csv').read())

a,b,c,d,message
1,2,3,4,hello
5,6,7,8,world
9,10,11,12,foo


In [4]:
df = pd.read_csv('pydata-book/ch06/ex1.csv')
df

Unnamed: 0,a,b,c,d,message
0,1,2,3,4,hello
1,5,6,7,8,world
2,9,10,11,12,foo


区切り指定

In [6]:
pd.read_table('pydata-book/ch06/ex1.csv', sep=',')

Unnamed: 0,a,b,c,d,message
0,1,2,3,4,hello
1,5,6,7,8,world
2,9,10,11,12,foo


ヘッダー行なし

In [11]:
print(open('pydata-book/ch06/ex2.csv').read())

1,2,3,4,hello
5,6,7,8,world
9,10,11,12,foo


In [13]:
pd.read_csv('pydata-book/ch06/ex2.csv', header=None)

Unnamed: 0,0,1,2,3,4
0,1,2,3,4,hello
1,5,6,7,8,world
2,9,10,11,12,foo


In [14]:
names = ['a', 'b', 'c', 'd', 'message']
pd.read_csv('pydata-book/ch06/ex2.csv', names=names, index_col='message')

Unnamed: 0_level_0,a,b,c,d
message,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
hello,1,2,3,4
world,5,6,7,8
foo,9,10,11,12


In [15]:
print(open('pydata-book/ch06/csv_mindex.csv').read())

key1,key2,value1,value2
one,a,1,2
one,b,3,4
one,c,5,6
one,d,7,8
two,a,9,10
two,b,11,12
two,c,13,14
two,d,15,16



In [16]:
parsed = pd.read_csv('pydata-book/ch06/csv_mindex.csv', index_col=['key1', 'key2'])
parsed

Unnamed: 0_level_0,Unnamed: 1_level_0,value1,value2
key1,key2,Unnamed: 2_level_1,Unnamed: 3_level_1
one,a,1,2
one,b,3,4
one,c,5,6
one,d,7,8
two,a,9,10
two,b,11,12
two,c,13,14
two,d,15,16


正規表現で区切る

In [17]:
list(open('pydata-book/ch06/ex3.txt'))

['            A         B         C\n',
 'aaa -0.264438 -1.026059 -0.619500\n',
 'bbb  0.927272  0.302904 -0.032399\n',
 'ccc -0.264273 -0.386314 -0.217601\n',
 'ddd -0.871858 -0.348382  1.100491\n']

In [18]:
result = pd.read_table('pydata-book/ch06/ex3.txt', sep='\s+')
result

Unnamed: 0,A,B,C
aaa,-0.264438,-1.026059,-0.6195
bbb,0.927272,0.302904,-0.032399
ccc,-0.264273,-0.386314,-0.217601
ddd,-0.871858,-0.348382,1.100491


任意の行をスキップできる

In [19]:
list(open('pydata-book/ch06/ex4.csv'))

['# hey!\n',
 'a,b,c,d,message\n',
 '# just wanted to make things more difficult for you\n',
 '# who reads CSV files with computers, anyway?\n',
 '1,2,3,4,hello\n',
 '5,6,7,8,world\n',
 '9,10,11,12,foo']

In [20]:
pd.read_csv('pydata-book/ch06/ex4.csv', skiprows=[0, 2, 3])

Unnamed: 0,a,b,c,d,message
0,1,2,3,4,hello
1,5,6,7,8,world
2,9,10,11,12,foo


欠測値

In [21]:
print(open('pydata-book/ch06/ex5.csv').read())

something,a,b,c,d,message
one,1,2,3,4,NA
two,5,6,,8,world
three,9,10,11,12,foo


In [22]:
pd.read_csv('pydata-book/ch06/ex5.csv')

Unnamed: 0,something,a,b,c,d,message
0,one,1,2,3.0,4,
1,two,5,6,,8,world
2,three,9,10,11.0,12,foo


In [23]:
result = pd.read_csv('pydata-book/ch06/ex5.csv', na_values=['NULL'])
result

Unnamed: 0,something,a,b,c,d,message
0,one,1,2,3.0,4,
1,two,5,6,,8,world
2,three,9,10,11.0,12,foo


In [24]:
sentinels = {'message': ['foo', 'NA'], 'something': ['two']}
result = pd.read_csv('pydata-book/ch06/ex5.csv', na_values=sentinels)
result

Unnamed: 0,something,a,b,c,d,message
0,one,1,2,3.0,4,
1,,5,6,,8,world
2,three,9,10,11.0,12,


### Reading Text Files in Pieces

In [41]:
datetime.datetime.today().strftime("%Y-%m-%d %H:%M:%S")

'2017-01-24 12:29:52'

In [42]:
imports = %imports_
%watermark -u -d -v -p $imports

last updated: 2017-01-24 

CPython 3.5.2
IPython 5.1.0

numpy 1.11.3
datetime n
pandas 0.19.2
matplotlib 1.5.3
