# 读写CSV数据

In [23]:
"""把数据写入csv

需要首先创建writer对象
"""

import csv

header = ['Name', 'Number', 'Gender']
rows = [
    ('James', 23, 'male'),
    ('Kobe', 24, 'male'),
    ('Shelly', 11, 'female')
]

# 如果该文件不存在，则新建
# 设置newline参数避免写入空行
with open('./files/players.csv', 'w', newline='') as f:
    fw = csv.writer(f)
    fw.writerow(header)
    fw.writerows(rows)

In [16]:
"""写入字典序列数据"""

import csv

header = ['Name', 'Number', 'Gender']
rows = [
    {'Name':'Curry', 'Number':30, 'Gender':'male'},
    {'Name':'Alita', 'Number':12, 'Gender':'female'},
    {'Name':'Kevin', 'Number':33, 'Gender':'male'}
]

# w模式会覆盖文件原有内容
with open('./files/players.csv', 'w', newline='') as f:
    fw = csv.DictWriter(f, header)
    fw.writeheader()
    fw.writerows(rows)

In [17]:
"""读取csv文件

需要首先创建reader()对象
"""

import csv

with open('./files/players.csv') as f:
    fr = csv.reader(f)
    header = next(fr)
    print('Header: ', header)
    print('Rows: ')
    for row in fr:
        print(row)

Header:  ['Name', 'Number', 'Gender']
Rows: 
['Curry', '30', 'male']
['Alita', '12', 'female']
['Kevin', '33', 'male']


In [24]:
"""使用命名元祖读取csv数据

使用列名代替下标访问，只有在列名为合法python标识符时才有效，否则需要修改原始列名
"""

import csv
from collections import namedtuple

with open('./files/players.csv') as f:
    fr = csv.reader(f)
    header = next(fr)
    Row = namedtuple('Row', header)
    for r in fr:
        row = Row(*r)
        print(row.Name)
        print(row)

James
Row(Name='James', Number='23', Gender='male')
Kobe
Row(Name='Kobe', Number='24', Gender='male')
Shelly
Row(Name='Shelly', Number='11', Gender='female')


# 读写JSON数据

In [19]:
"""读写JSON数据

JSON支持的基本数据类型包括None bool int float str
以及包含这些类型数据的列表、元祖、字典
"""

import json

raw_data = {'name':'James', 'number':23, 'gender':'male'}

# 把python对象转换为json字符串
json_str = json.dumps(raw_data)
print(type(json_str), json_str)

# 把json字符串转换为python对象
py_obj = json.loads(json_str)
print(type(py_obj), py_obj)

<class 'str'> {"name": "James", "number": 23, "gender": "male"}
<class 'dict'> {'name': 'James', 'number': 23, 'gender': 'male'}


In [22]:
"""使用json处理文件"""

import json

raw_data = {'name':'James', 'number':23, 'gender':'male'}

# 把数据写入json文件
with open('./files/players.json', 'w') as f:
    json.dump(raw_data, f)
    
# 从json文件中读取数据
with open('./files/play.json', 'r') as f:
    d = json.load(f)
    print(type(d), d)

<class 'dict'> {'name': 'James', 'number': 23, 'gender': 'male'}


In [31]:
"""格式化打印"""

import json

raw_data = {'name':'James', 'number':23, 'gender':'male'}

# 使用indent参数
print(json.dumps(raw_data, indent=4))

# 使用pretty-print
from pprint import pprint

pprint(raw_data, indent=2, width=10)

{
    "name": "James",
    "number": 23,
    "gender": "male"
}
{ 'gender': 'male',
  'name': 'James',
  'number': 23}


# 读写XML数据

In [3]:
"""解析简单的XML数据"""

from urllib import request
from xml.etree.ElementTree import parse

# Download the RSS feed and parse it
with request.urlopen('http://planet.python.org/rss20.xml') as f:
    # doc 表示文档
    doc = parse(f)
    for item in doc.iterfind('channel/item'):
        print(item.findtext('title'))
        print(item.findtext('pubDate'))
        print(item.findtext('link'))
        print()

Mike Driscoll: Using Widgets in Jupyter Notebook (Video)
Mon, 06 Jul 2020 01:23:26 +0000
https://www.blog.pythonlibrary.org/2020/07/05/using-widgets-in-jupyter-notebook-video/

Nikola: Nikola v8.1.1 is out!
Sun, 05 Jul 2020 21:44:00 +0000
https://getnikola.com/blog/nikola-v811-is-out.html

Glyph Lefkowitz: Zen Guardian
Sun, 05 Jul 2020 20:44:00 +0000
https://glyph.twistedmatrix.com/2020/07/zen-guardian.html

PSF GSoC students blogs: Weekly Check-In: Week 6
Sun, 05 Jul 2020 17:35:04 +0000
https://blogs.python-gsoc.org/en/lenixlobos-blog/weekly-check-in-week-6/

PSF GSoC students blogs: Weekly Check-in #6
Sun, 05 Jul 2020 16:03:39 +0000
https://blogs.python-gsoc.org/en/nibba2018s-blog/weekly-check-in-6-7/

Ian Ozsvald: Weekish notes
Sun, 05 Jul 2020 15:42:33 +0000
https://ianozsvald.com/2020/07/05/weekish-notes/

PSF GSoC students blogs: [Week 5] Check-in
Sun, 05 Jul 2020 14:21:24 +0000
https://blogs.python-gsoc.org/en/sangyxs-blog/week-5-check-in-1/

The Digital Cat: Flask project setup

In [4]:
"""使用尽可能少的内存，增量式解析大型XML文件"""

from xml.etree.ElementTree import iterparse

In [8]:
"""将字典转换为XML"""

from xml.etree.ElementTree import Element, tostring

def dict_to_xml(tag, dic):
    el = Element(tag)
    for k, v in dic.items():
        child = Element(k)
        child.text = str(v)
        el.append(child)
    return el

# 测试
dic = {
    'name': 'James',
    'age': 33,
    'gender': 'male'
}
el = dict_to_xml('player', dic)

# 添加属性
el.set('id', '1')

print(tostring(el))

b'<player id="1"><name>James</name><age>33</age><gender>male</gender></player>'


In [None]:
"""解析和修改XML"""

from xml.etree.ElementTree import parse, Element

# 获取根元素
doc = parse('test.xml')
root = doc.getroot()

# 删除元素
root.remove(root.find('foo'))

# 获取元素位置索引
root.getchildren().index(root.find('bar'))

# 插入元素
el = Element('new')
el.text = 'This is a new node'
root.insert(2, el)

# 把修改后的内容重新写入xml文件
doc.write('test.xml', xml_declaration=True)