### 迭代器iter，生成器

In [24]:
# iter 会返回迭代器对象，迭代器对象中实现了__next__方法，可用next函数来取值
# for x in a: 会调用iter(a)
a = [1,2,3,4,5,6,7,8]
print(f'iter: {iter(a)}')
print(iter(a).__next__())
print(next(iter(a)))

iter: <list_iterator object at 0x7fbc72248280>
1
1


In [36]:
# 迭代器协议约定 iter：
# 以确保调用者传进来的参数，并不是迭代器对象本身
# 如果迭代器对象传给内置的iter函数，那么此函数会把迭代器返回
# 反之，如果是个容器类型的对象，那么每次都会返回新的迭代器对象
a = [1,2,3,4,5,6,7,8]
it1 = iter(a)
it2 = iter(it1)
assert it1 is it2
print(list(it1))
print(list(it2)) # it1与it2同一个对象，it1在之前的list中已释放完

it1 = iter(a)
it3 = iter(a)
try:
    assert it1 is it3
except AssertionError as e:
    print(e)

print(list(it1))
print(list(it3)) # it1与it3不是同一个迭代器对象，但值相同

[1, 2, 3, 4, 5, 6, 7, 8]
[]

[1, 2, 3, 4, 5, 6, 7, 8]
[1, 2, 3, 4, 5, 6, 7, 8]


### 生成器 generator，next

In [19]:
# 生成器generator使用yield表达式的函数，不会真的运行，而是返回迭代器
# 每次在迭代器上面的调用next，迭代器会把生成器推进到下一个yield表达式
# 并把yield表达式的结果返回

a = [1,2,3,4,5,6,7,8]

def generator(data):
    for x in data:
        yield x

it = generator(a)
print(f'it: {it}, type: {type(it)}')
print(f'next: {next(it)}')
print(f'next: {next(it)}')

def generator(data):

    if data[0] == 1:
        yield float('inf')

    for x in data:
        yield x
it = generator(a)
print(f'it: {it}, type: {type(it)}')
print(f'next: {next(it)}')
print(f'next: {next(it)}')

# 迭代器只能使用一次
b = list(it)
print(f'b: {b}')
c = list(it)
print(f'c: {c}')

it: <generator object generator at 0x7fbc72234d60>, type: <class 'generator'>
next: 1
next: 2
it: <generator object generator at 0x7fbd5b91bd60>, type: <class 'generator'>
next: inf
next: 1
b: [2, 3, 4, 5, 6, 7, 8]
c: []


In [16]:
import torch
import torchvision
import torchvision.transforms as transforms

mnist = torchvision.datasets.MNIST(r'/data/lzh/data/datasets/mnist')
print(f'mnist: {mnist}, type: {type(mnist)}')
print(dir(mnist)) # 获得全部成员

def load(data):
    for x, y in mnist:
        x = transforms.ToTensor()(x)
        yield x, y

loader = load(mnist)
x, y = next(loader)
print(x.shape, y)

mnist: Dataset MNIST
    Number of datapoints: 60000
    Root location: /data/lzh/data/datasets/mnist
    Split: Train, type: <class 'torchvision.datasets.mnist.MNIST'>
['__add__', '__annotations__', '__class__', '__class_getitem__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattr__', '__getattribute__', '__getitem__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__le__', '__len__', '__lt__', '__module__', '__ne__', '__new__', '__orig_bases__', '__parameters__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__slots__', '__str__', '__subclasshook__', '__weakref__', '_check_exists', '_check_legacy_exist', '_format_transform_repr', '_is_protocol', '_load_data', '_load_legacy_data', '_repr_indent', 'class_to_idx', 'classes', 'data', 'download', 'extra_repr', 'functions', 'mirrors', 'processed_folder', 'raw_folder', 'register_datapipe_as_function', 'register_function', 'resources', 'root', 'target_transform'

### 闭包，nonlocal，global

In [5]:
# 作用域bug：防止函数中的局部变量污染函数外面的那个模块

def test():
    ex_a = 1
    def closure():
        ex_a = 2 # 此处会创建一个新的变量
        print(ex_a)
    closure()
    print(ex_a)

test()

2
1


In [6]:
# 用 nonlocal 能够获得闭包内的数据，从上层作用域中查找变量
# nonlocal的限制：不能延伸到模块级别，防止污染全局作用域
# 最好仅在简单的函数中使用
def test():
    ex_a = 1
    def closure():
        nonlocal ex_a
        ex_a = 2 # 会修改闭包外的变量
        print(ex_a)
    closure()
    print(ex_a)

test()

2
2


In [17]:
# global 将会直接修改模块作用域里的那个变量
ex_a = 2

def test():
    global ex_a
    ex_a += 3
    print(ex_a)
test()
print(ex_a)

5
5


### 合理利用try/except/else/finally结构中的每个代码块

In [1]:
# try/finally：既要将异常向上传播，又要在异常发生时执行清理工作
# try/except/else：选择哪些异常由自己的代码处理，哪些异常会传播到上一级，如果无异常，则执行else

a = 1
try:
    a /= 0
finally:
    print(a)

1


ZeroDivisionError: division by zero

### 了解bytes、str与unicode区别

In [2]:
# python 3有两种表示字符序列的类型：bytes和str。
# bytes：该的实例包含原始的8位值（原始的字节，由于每个字节有8个二进制位，所以是原始的8位数，也叫原生8位值，纯8位值）
# str：该的实例包含unicode字符

In [14]:
# unicode字符：表示为二进制数据（原始8位值）有很多种方法，最常见的编码方式就是UTF-8
#   把unicode字符转换成二进制数据：encode
#   把二进制数据转换成unicode字符串：decode
str_ = '你'
bytes_ = str_.encode()
print(f'bytes: {bytes_}, type: {type(bytes_)}')

str__ = bytes_.decode()
print(f'str: {str__}, type: {type(str__)}')

bytes: b'\xe4\xbd\xa0', type: <class 'bytes'>
str: 你, type: <class 'str'>


In [48]:
# 写入和读出二进制数据到文件中
# 读取:
import os
with open('./random.bin', 'wb') as f:
    bytes_ = 'a'.encode()
    f.write(bytes_)

with open('./random.bin', 'rb') as f:
    bytes_ = f.readline()
    print(bytes_)
    print(bytes_.decode())

b'a'
a
