# json与jsonl的读取

In [2]:
import json
import jsonlines
import itertools

创建01*test.json文件，将下一行数据粘贴到文件里

{ "id": 1, "name": "Alice", "age": 30, "city": "Beijing" }

In [3]:
import json
with open('01*test.json', 'r', encoding='utf-8') as f:
    data = json.load(f)
data

{'id': 1, 'name': 'Alice', 'age': 30, 'city': 'Beijing'}

此时json为
{ "id": 1, "name": "Alice", "age": 30, "city": "Beijing" }, { "id": 2, "name": "Bob", "age": 25, "city": "Shanghai" }, 
{ "id": 3, "name": "Charlie", "age": 28, "city": "Guangzhou" }
此时json文件自身就已经报错了
加载呢？

In [3]:
with open('01*test.json', 'r', encoding='utf-8') as f:
    data = json.load(f)
data

OSError: [Errno 22] Invalid argument: '01*test.json'

加载自然也会报错
修改一下格式

[ 
{ "id": 1, "name": "Alice", "age": 30, "city": "Beijing" }, { "id": 2, "name": "Bob", "age": 25, "city": "Shanghai" }, 
{ "id": 3, "name": "Charlie", "age": 28, "city": "Guangzhou" } 
]

In [5]:
with open('01^test.json', 'r', encoding='utf-8') as f:
    data = json.load(f)
data

[{'id': 1, 'name': 'Alice', 'age': 30, 'city': 'Beijing'},
 {'id': 2, 'name': 'Bob', 'age': 25, 'city': 'Shanghai'},
 {'id': 3, 'name': 'Charlie', 'age': 28, 'city': 'Guangzhou'}]

使用json.dump可以写入json

In [6]:
with open('01^output.json', 'w', encoding='utf-8') as f:
    json.dump(data, f, ensure_ascii=False, indent=2)

记得我们报错的数据吗
如果把文件后缀改为jsonl 
文件这次不报错了
但我们加载要换方式了

In [14]:
data = []
with open('01^test.jsonl','r',encoding='utf-8') as f:
    for line in f:
        line = json.loads(line) 
        data.append(line)

data


[{'id': 1, 'name': 'Alice', 'age': 30, 'city': 'Beijing'},
 {'id': 2, 'name': 'Bob', 'age': 25, 'city': 'Shanghai'},
 {'id': 3, 'name': 'Charlie', 'age': 28, 'city': 'Guangzhou'}]

思考一下，json.load
和json.loads有什么区别呢

当然也有jsonlines 

In [6]:
import jsonlines
with jsonlines.open('01^test.jsonl') as reader:
    data = reader
data

<jsonlines.Reader at 0x2d3dd19e410 wrapping '01^test.jsonl'>

但这样的话，data只是个迭代器

In [15]:
with jsonlines.open('01^test.jsonl') as reader:
    for obj in reader:
        print(obj)

{'id': 1, 'name': 'Alice', 'age': 30, 'city': 'Beijing'}
{'id': 2, 'name': 'Bob', 'age': 25, 'city': 'Shanghai'}
{'id': 3, 'name': 'Charlie', 'age': 28, 'city': 'Guangzhou'}


In [16]:
with jsonlines.open('01^test.jsonl') as reader:
    data = list(reader)
    print(data)

[{'id': 1, 'name': 'Alice', 'age': 30, 'city': 'Beijing'}, {'id': 2, 'name': 'Bob', 'age': 25, 'city': 'Shanghai'}, {'id': 3, 'name': 'Charlie', 'age': 28, 'city': 'Guangzhou'}]


In [3]:
import itertools
with jsonlines.open('01^test.jsonl') as reader:
    data = list(itertools.islice(reader,2))
data

NameError: name 'jsonlines' is not defined

迭代器特殊之处：

In [3]:
data = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
iterator = iter(data)

# 第一次调用
chunk1 = list(itertools.islice(iterator, 3)) # [1, 2, 3]
print("Chunk 1:", chunk1)

# 第二次调用
chunk2 = list(itertools.islice(iterator, 3))  # [4, 5, 6]
print("Chunk 2:", chunk2)

# 第三次调用
chunk3 = list(itertools.islice(iterator, 3))  # [7, 8, 9]
print("Chunk 3:", chunk3)

# 第四次调用
chunk4 = list(itertools.islice(iterator, 3))  # [10]
print("Chunk 4:", chunk4)

# 第五次调用
chunk5 = list(itertools.islice(iterator, 3))  # []
print("Chunk 5:", chunk5)  # 空列表，循环结束

Chunk 1: [1, 2, 3]
Chunk 2: [4, 5, 6]
Chunk 3: [7, 8, 9]
Chunk 4: [10]
Chunk 5: []
