### 1.文本数据的读写

1.创建csv文件

In [54]:
import csv

headers = ['a','b','c','d','message']

rows = [('1', '2', '3', '4', 'hello'),
        ('5', '6', '7', '8', 'world'),
        ('9', '10', '11', '12', 'foo')]

with open('ex1.csv','w',newline = '') as f:
    f_csv = csv.writer(f)
    f_csv.writerow(headers)
    f_csv.writerows(rows)
    
with open('ex2.csv','w',newline = '') as f:
    f_csv = csv.writer(f)
    f_csv.writerows(rows)

with open('ex1.csv') as f:
    f_csv = csv.reader(f)
    for row in f_csv:
        print(row)

['a', 'b', 'c', 'd', 'message']
['1', '2', '3', '4', 'hello']
['5', '6', '7', '8', 'world']
['9', '10', '11', '12', 'foo']


2.使用pandas库中的read_csv模块读入文件内容

In [55]:
import pandas as pd

df = pd.read_csv('ex1.csv')
df

Unnamed: 0,a,b,c,d,message
0,1,2,3,4,hello
1,5,6,7,8,world
2,9,10,11,12,foo


也可以使用read_table读入，并指定分隔符

In [45]:
pd.read_table('ex1.csv', sep = ',')

Unnamed: 0,a,b,c,d,message
0,1,2,3,4,hello
1,5,6,7,8,world
2,9,10,11,12,foo


3.如果文件中没有表头行，可以自己设置header

In [46]:
pd.read_csv('ex2.csv',header = None)

Unnamed: 0,0,1,2,3,4
0,1,2,3,4,hello
1,5,6,7,8,world
2,9,10,11,12,foo


In [47]:
pd.read_csv('ex2.csv',names = ['a','b','c','d','message'])

Unnamed: 0,a,b,c,d,message
0,1,2,3,4,hello
1,5,6,7,8,world
2,9,10,11,12,foo


4.指定索引内容

In [48]:
names = ['a','b','c','d','message']
pd.read_csv('ex1.csv',names = names, index_col = 'message')

Unnamed: 0_level_0,a,b,c,d
message,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
message,a,b,c,d
hello,1,2,3,4
world,5,6,7,8
foo,9,10,11,12


当指定索引内容是多个列时，形成一个分层索引

In [49]:
pd.read_csv('ex1.csv', index_col = ['message', 'a'])

Unnamed: 0_level_0,Unnamed: 1_level_0,b,c,d
message,a,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
hello,1,2,3,4
world,5,6,7,8
foo,9,10,11,12


5.可以使用skiprows来跳过第二行

In [59]:
pd.read_csv('ex1.csv',skiprows = [2])

Unnamed: 0,a,b,c,d,message
0,1,2,3,4,hello
1,9,10,11,12,foo


### 2.将数据写入文本格式

1.使用to_csv方法，将数据写入文本

In [61]:
data = pd.read_csv('ex1.csv')
data

Unnamed: 0,a,b,c,d,message
0,1,2,3,4,hello
1,5,6,7,8,world
2,9,10,11,12,foo


In [62]:
data.to_csv('out.csv')

2.自定义数据的分隔符

In [63]:
import sys
data.to_csv(sys.stdout,sep='|')

|a|b|c|d|message
0|1|2|3|4|hello
1|5|6|7|8|world
2|9|10|11|12|foo


3.缺失值转化为文本时，默认为空字符串，也可通过na_rep参数自定义格式

In [65]:
data.to_csv(sys.stdout, na_rep = 'NULL')

,a,b,c,d,message
0,1,2,3,4,hello
1,5,6,7,8,world
2,9,10,11,12,foo


4.写入文本时，默认行和列的标签都会被写入，二者可以禁止写入

In [66]:
data.to_csv(sys.stdout, header = False, index = False)

1,2,3,4,hello
5,6,7,8,world
9,10,11,12,foo


5.写入文本中指定的列

In [67]:
data.to_csv(sys.stdout, index = False, columns = ['a', 'b', 'c'])

a,b,c
1,2,3
5,6,7
9,10,11
