# 27. 用列表推到取代map与filter

In [12]:
# 使用map完成一个列表的平方
a = range(0,10,1)
squares_m = list(map(lambda x: x**2, a)) # map函数生成迭代器，需要使用list将其进行生成

In [13]:
squares_l = [x**2 for x in a]
print(f'列表推导：{squares_l}\n'
      f'map推导{squares_m}')

列表推导：[0, 1, 4, 9, 16, 25, 36, 49, 64, 81]
map推导[0, 1, 4, 9, 16, 25, 36, 49, 64, 81]


In [14]:
a

range(0, 10)

In [15]:
# 但是在进行条件筛选时列表更方便
even_squares = [x**2 for x in a if x % 2 ==0]
even_squares

[0, 4, 16, 36, 64]

# 28. 控制推导逻辑的子表达式不要超过两个

In [1]:
# 对维度不多的对象可以使用多阶推导进行拆分，但多过三个的对象还是使用for循环写
matrix = [[1, 2, 3], [4, 5, 6], [7, 8, 9]]
flat = [x for row in matrix for x in row]
flat

[1, 2, 3, 4, 5, 6, 7, 8, 9]

# 29. 用赋值表达式消除推导中出现的重复代码

In [1]:
stock = {'nails': 125,
         'screws': 35,
         'wingnuts': 8,
         'washers': 24}
order = ['screws', 'wingnuts', 'clips']
def get_batches(count, size = 8):
      return count // size
found = { name: get_batches(stock.get(name, 0)) 
          for name in order if get_batches(stock.get(name, 0))}
found

{'screws': 4, 'wingnuts': 1}

In [2]:
# 使用海豹表达式完成简化
found = {name: batches for name in order
         if (batches := get_batches(stock.get(name, 0)))}
found

{'screws': 4, 'wingnuts': 1}

In [3]:
# 推导顺序很重要，会从条件赋值表达式开始,下式则会报错
result = {name: (tenth := count //10)
          for name, count in stock.items() if tenth >0}

NameError: name 'tenth' is not defined

In [4]:
# 做如下修改就可以正常运行
result = {name: tenth
          for name, count in stock.items() if (tenth := count //10) >0}

In [5]:
result

{'nails': 12, 'screws': 3, 'washers': 2}

In [6]:
# 如果使用了：=赋值，那么在推导中会产生变量泄露
# 即最后一个循环的变量被赋给迭代工具变量（类似for循环）
half = [(last:= count //2) for count in stock.values()]
print(f'last为最后一个值{last}, half列表为{half}')

last为最后一个值12, half列表为[62, 17, 4, 12]


In [7]:
# 如果在推导式中不使用海豹表达式赋值，那么不会产生变量溢出的问题
half = [count //2 for count in stock.values()]
print(half)
print(count)

[62, 17, 4, 12]


NameError: name 'count' is not defined

## 30.不要让函数直接返回列表，应该让他逐渐生成列表的值

In [8]:
def index_words(text):
    result = []
    if text:
        result.append(0)
    for index, letter in enumerate(text):
        if letter == ' ':
            result.append(index + 1)
    return result


In [9]:
address = 'Four score and seven years ago...'
result = index_words(address)
print(result[:10])

[0, 5, 11, 15, 21, 27]


- 上述方法有两个缺点
1. 代码杂乱没有突出重点的index,而是突出了append
2. 既要存列表又要return

In [10]:
# 使用生成器的实现
def index_words_iter(text):
    if text:
        yield 0
    for index, letter in enumerate(text):
        if letter == ' ':
            yield index + 1

In [11]:
[index for index in index_words_iter(address)]

[0, 5, 11, 15, 21, 27]

In [13]:
[index_words_iter(address)]


[<generator object index_words_iter at 0x0000029D9ACA2180>]

In [14]:
it = index_words_iter(address)

In [15]:
# 使用itertools.islice只生成10次
import itertools
itertools.islice(it, 0, 10)

<itertools.islice at 0x29d9ad23470>

In [16]:
print(list(result))

[0, 5, 11, 15, 21, 27]


## 31. 谨慎地迭代函数所收到的参数

In [18]:
# 定义一个归一化函数
def normalize(numbers: list[int]):
    total = sum(numbers)
    result = []
    for value in numbers:
        percent = 100 * value / total
        result.append(percent)
    return result

In [20]:
vitits = [15, 35, 80]
percentages = normalize(vitits)
print(percentages)
assert sum(percentages) == 100.0

[11.538461538461538, 26.923076923076923, 61.53846153846154]


In [22]:
# 生成器/迭代器只能产生一次结果，当嵌套使用时会产生无数据提供的现象。
# 为避免此现象，嵌套时可以只传入生成器的生成函数，在主处理函数内部对生成函数进行实例化
def normalize_func(get_iter):
    total = sum(get_iter())
    reslut = []
    for value in get_iter():
        percent = 100 * value / total
        result.append(percent)
    return result

# 30. 不要让函数直接返回列表，应该让其逐个生成列表里的值

In [3]:
# 此函数将一个字符串中各单词首字母在句子中处在的位置坐标
def index_words(text: str):
    result = []
    if text:
        result.append(0)
    for index, letter in enumerate(text):
        if letter == " " :
            result.append(index+1)
    return result

In [6]:
address = 'Four score and seven years ago...'
result = index_words(address)
print(result[:10])

[0, 5, 11, 15, 21, 27]


In [7]:
# 1. 使用上述代码会模糊重点内容（index+1）
# 2. 函数直接返回列表会占用大量内存，且使用append操作过多
def index_words(text: str):
    if text:
        yield 0
    for index, letter in enumerate(text):
        if letter == " " :
            yield index + 1

In [9]:
it = index_words(address)
print(next(it))
print(next(it))
# 如果对此函数仍想要列表，可以使用list对迭代器完成遍历
list(it)

0
5


[11, 15, 21, 27]

In [10]:
def index_file(handle):
    offset = 0
    for line in handle:
        if line:
            yield offset
        for letter in line:
            offset += 1
            if letter == " ":
                yield offset

In [11]:
import itertools
with open('chapter4.txt', 'r') as f:
    it = index_file(f)
    result = itertools.islice(it, 0, 10)
    print(list(result))

[0, 6, 12, 17, 20, 27]


# 31. 谨慎地迭代函数所收到的参数

In [4]:
# 定义一个归一化函数，函数内先求游客总数，再计算各元素的占比
def normalize(numbers):
    total = sum(numbers)
    result = []
    for value in numbers:
        percent = 100 * value / total
        result.append(percent)
    return result

In [5]:
visits = [15, 35, 80]
percentages = normalize(visits)

In [6]:
print(percentages)

[11.538461538461538, 26.923076923076923, 61.53846153846154]


In [7]:
# 当数据规模更大时，考虑使用迭代器
def read_visits(data_path):
    with open(data_path) as f:
        for line in f:
            yield int(line)

In [8]:
it = read_visits('chapter4.txt')
percentages = normalize(it)
print(percentages)
# 使用上述代码会导致出现空列表，因为normalize中的sum已经完成了对迭代器（read_visit)的迭代，后边的for循环就不再产生元素了

[]


In [9]:
# 避免上述现象的一个方法是再normalize函数中执行一遍迭代器形成新的列表
def normalize(numbers):
    numbers_copy = list(numbers)
    total = sum(numbers_copy)
    result = []
    for value in numbers_copy:
        percent = 100 * value /total
        result.append(percent)
    return result

In [10]:
it = read_visits('chapter4.txt')
percentages = normalize(it)
print(percentages)

[12.037037037037036, 13.88888888888889, 74.07407407407408]


In [11]:
# 但是此方法又出现了一个完整的list，与使用迭代器的初衷相违背
# 使用中间函数在nor函数每次调用迭代器时都提供新的迭代器
def normalize(get_iter):
    total = sum(get_iter())
    result = []
    for value in get_iter():
        percent = 100 * value /total
        result.append(percent)
    return result
# 此时给normalize传参需要使用lambda表达式，使得函数内部每次‘()’都能生成新的迭代器
percentages = normalize(lambda : read_visits('chapter4.txt'))
print(percentages)

[12.037037037037036, 13.88888888888889, 74.07407407407408]


- 除了上述方法,还可以创建自定义容器完成如上需求

In [12]:
class ReadVisits:
    def __init__(self, data_path):
        self.data_path = data_path
    def __iter__(self):  # 定义该容器的迭代器规则，从传入路径中读取每行的数据并取整后作为迭代元素
        with open(self.data_path) as f:
            for line in f:
                yield int(line)
# 上述函数使用最初的nor函数即可
def normalize(numbers):
    total = sum(numbers)  # 第一次触发‘__iter__‘函数分配一个迭代器
    result = []
    for value in numbers:  # 第二次触发‘__iter__‘函数分配另一个迭代器，多个迭代器间并不影响
        percent = 100 * value / total
        result.append(percent)
    return result

In [13]:
visits = ReadVisits('chapter4.txt')
percentages = normalize(visits)
print(percentages)

[12.037037037037036, 13.88888888888889, 74.07407407407408]


In [16]:
# 可以添加类型判断避免普通迭代器传入nor函数
def normalize_defensive(numbers):
    if iter(numbers) is numbers:
        raise TypeError('必须传入一个容器而不是普通迭代器')
    total = sum(numbers)  # 第一次触发‘__iter__‘函数分配一个迭代器
    result = []
    for value in numbers:  # 第二次触发‘__iter__‘函数分配另一个迭代器，多个迭代器间并不影响
        percent = 100 * value / total
        result.append(percent)
    return result

In [17]:
normalize_defensive(iter([15, 35, 80]))

TypeError: 必须传入一个容器而不是普通迭代器

In [18]:
normalize_defensive(visits)

[12.037037037037036, 13.88888888888889, 74.07407407407408]

In [19]:
type(visits)

__main__.ReadVisits

In [20]:
iter(visits)  # 返回自定义类型的迭代器

<generator object ReadVisits.__iter__ at 0x0000019C9DC66650>

In [21]:
iter([0,1,2])

<list_iterator at 0x19c9c7c4340>

In [22]:
# 使用上述函数时，需要传入lambda表达式，这样每次索要get_iter时才能给出一个新生成器
def read_visits(path):
    with open(path) as f:
        for line in f:
            yield int(line)
path = r'./address.txt'
normalize_func(lambda: read_visits(path))

NameError: name 'normalize_func' is not defined

In [None]:
# 可以使用类特性构建一个新容器处理此问题
class ReadVisits:
    def __init__(self, data_path: str) -> None:
        self.data_path = data_path
    
    def __iter__(self):
      with open(self.data_path) as f:
         for line in f:
            yield int(line)
# 如上容器构建后，只需要把新容器传给normalize运行即可。
visits = ReadVisits('./visit.txt')
percentages = normalize(visits)
# 在normalize中,sum方法先调用iter生成一次迭代器,之后的for循环还会触发一次
# 在处理函数中显加入类型判断



In [25]:
from collections.abc import Iterator

def normalize_defensive(numbers: Iterator):
   if isinstance(numbers, Iterator):
      raise TypeError('Must supply a container')
   total = sum(numbers)
   result = []
   for value in numbers:
      percent = 100 * value / total
      result.append(percent)
   return result


In [27]:
visits = [15, 35, 80]
percentages = normalize_defensive(visits)
assert sum(percentages) == 100.0
# 如果是普通迭代器会报错
visits = iter([15, 35, 80])
percentages = normalize_defensive(visits)
assert sum(percentages) == 100.0

TypeError: Must supply a container

**Tips**
- 函数和方法中如果要将一个参数迭代多次,需要注意迭代器状态
- python的迭代器协议规定了容器和迭代器应该如何与iter\next函数\for循环及相关表达式交互
- 要想让自定义类型容器可以跌打.只需要实现'__iter__'方法
- 可以通过iter函数判断对象的小时否是普通迭代器,如果返回是本身则为普通迭代器

# 32. 考虑用生成器表达式改写数据量较大的列表推导

In [1]:
# 使用列表推导式的情况
value = [len(x) for x in open('./data/my_file.txt')]
print(value)

[4, 3, 3, 2, 3, 3, 2, 3, 3, 2]


In [2]:
# 如若数据很大,列表需要读取所有的数据再做处理,
# 会导致内存被过分消耗, 尝试使用生成器改写
it = (len(x) for x in open('./data/my_file.txt'))
print(it)

<generator object <genexpr> at 0x00000190B6435540>


In [3]:
# 生成器还可以实现嵌套
roots = ((x, x**0.5) for x in it)
next(roots)

(4, 2.0)

- 通过列表推导的在大数据时会占用很多内存
- 生成器每次只计算一个数据
- 生成器可以嵌套
- 需要注意生成器的状态, 跑完一轮循环后就不能使用了

# 33. 尝试使用yield from将多个生成器链接

In [4]:
def move(period, speed):
    for _ in range(period):
        yield speed

def pause(delay):
    for _ in range(delay):
        yield 0

In [5]:
def animate():
    for delta in move(4, 5.0):
        yield delta
    for delta in pause(3):
        yield delta
    for delta in move(2, 3.0):
        yield delta

In [6]:
def render(delta):
    print(f'Delta : {delta :.1f}')

def run(func):
    for delta in func():
        render(delta)

In [8]:
run(animate)
# 上文的写法嵌套了2层yield,并在run中执行了一次生成器

Delta : 5.0
Delta : 5.0
Delta : 5.0
Delta : 5.0
Delta : 0.0
Delta : 0.0
Delta : 0.0
Delta : 3.0
Delta : 3.0


In [9]:
def animate_composed():
    yield from move(4, 5.0)
    yield from pause(3)
    yield from move(2,7.3)

In [10]:
run(animate_composed)

Delta : 5.0
Delta : 5.0
Delta : 5.0
Delta : 5.0
Delta : 0.0
Delta : 0.0
Delta : 0.0
Delta : 7.3
Delta : 7.3


In [11]:
import itertools
help(itertools)

Help on built-in module itertools:

NAME
    itertools - Functional tools for creating and using iterators.

DESCRIPTION
    Infinite iterators:
    count(start=0, step=1) --> start, start+step, start+2*step, ...
    cycle(p) --> p0, p1, ... plast, p0, p1, ...
    repeat(elem [,n]) --> elem, elem, elem, ... endlessly or up to n times
    
    Iterators terminating on the shortest input sequence:
    accumulate(p[, func]) --> p0, p0+p1, p0+p1+p2
    chain(p, q, ...) --> p0, p1, ... plast, q0, q1, ...
    chain.from_iterable([p, q, ...]) --> p0, p1, ... plast, q0, q1, ...
    compress(data, selectors) --> (d[0] if s[0]), (d[1] if s[1]), ...
    dropwhile(pred, seq) --> seq[n], seq[n+1], starting when pred fails
    groupby(iterable[, keyfunc]) --> sub-iterators grouped by value of keyfunc(v)
    filterfalse(pred, seq) --> elements of seq where pred(elem) is False
    islice(seq, [start,] stop [, step]) --> elements from
           seq[start:stop:step]
    pairwise(s) --> (s[0],s[1]), (s[

# 36. 考虑用itertools拼装迭代器与生成器

In [13]:
# 链接多个迭代器
# chain, 用于拼接多个迭代器
it = itertools.chain([1, 2, 3], [4, 5, 6])
print(list(it))

[1, 2, 3, 4, 5, 6]


In [15]:

# repeat, 用于将一个迭代器重复输出
it = itertools.repeat([1,2], 3)
print(list(it))
it = itertools.repeat('here', 3)
print(list(it))

[[1, 2], [1, 2], [1, 2]]
['here', 'here', 'here']


In [16]:
# cycle, 用于循环地输出某段内容之中的各项元素
it = itertools.cycle([1, 2])
result = [next(it) for _ in range(10)]# 10代表总数,而不是循环次数
print(result)

[1, 2, 1, 2, 1, 2, 1, 2, 1, 2]


In [18]:
# zip_longest, 用于迭带多个迭代器, 迭代次数取最长的,以指定值对其进行填充
keys = ['one', 'two', 'three']
values = [1, 2]
it = itertools.zip_longest(keys, values, fillvalue='555')
longest = list(it)
print('zip_longest: ', longest)

zip_longest:  [('one', 1), ('two', 2), ('three', '555')]


In [19]:
# 过滤迭代器中的元素
# islice, 按照下表对迭代器进行切片
# takewhile 一直返回结果为true的元素, 直到第一个为false的元素
# dropwhile 反向, 会从第一个true开始返回,直到末尾