# 27. 用列表推到取代map与filter

In [12]:
# 使用map完成一个列表的平方
a = range(0,10,1)
squares_m = list(map(lambda x: x**2, a)) # map函数生成迭代器，需要使用list将其进行生成

In [13]:
squares_l = [x**2 for x in a]
print(f'列表推导：{squares_l}\n'
      f'map推导{squares_m}')

列表推导：[0, 1, 4, 9, 16, 25, 36, 49, 64, 81]
map推导[0, 1, 4, 9, 16, 25, 36, 49, 64, 81]


In [14]:
a

range(0, 10)

In [15]:
# 但是在进行条件筛选时列表更方便
even_squares = [x**2 for x in a if x % 2 ==0]
even_squares

[0, 4, 16, 36, 64]

# 28. 控制推导逻辑的子表达式不要超过两个

In [16]:
# 对维度不多的对象可以使用多阶推导进行拆分，但多过三个的对象还是使用for循环写
matrix = [[1, 2, 3], [4, 5, 6], [7, 8, 9]]
flat = [x for row in matrix for x in row]
flat

[1, 2, 3, 4, 5, 6, 7, 8, 9]

# 29. 用赋值表达式消除推导中出现的重复代码

In [22]:
stock = {'nails': 125,
         'screws': 35,
         'wingnuts': 8,
         'washers': 24}
order = ['screws', 'wingnuts', 'clips']
def get_batches(count, size = 8):
      return count // size
found = { name: get_batches(stock.get(name, 0)) 
          for name in order if get_batches(stock.get(name, 0))}
found

In [25]:
# 使用海豹表达式完成简化
found = {name: batches for name in order
         if (batches := get_batches(stock.get(name, 0)))}
found

In [27]:
# 推导顺序很重要，会从条件赋值表达式开始,下式则会报错
result = {name: (tenth := count //10)
          for name, count in stock.items() if tenth >0}

NameError: name 'tenth' is not defined

In [30]:
# 做如下修改
result = {name: tenth
          for name, count in stock.items() if (tenth := count //10) >0}

In [31]:
result

{'nails': 12, 'screws': 3, 'washers': 2}

In [32]:
# 如果使用了：=赋值，那么在推导中会产生变量泄露
# 即最后一个循环的变量被赋给迭代工具变量（类似for循环）
half = [(last:= count //2) for count in stock.values()]
print(f'last为最后一个值{last}, half列表为{half}')

last为最后一个值12, half列表为[62, 17, 4, 12]


In [33]:
# 如果在推导式中不使用赋值步骤，那么不会产生变量溢出的问题
half = [count //2 for count in stock.values()]
print(half)
print(count)

[62, 17, 4, 12]


NameError: name 'count' is not defined

# 30. 不要让函数直接返回列表，应该让其逐个生成列表里的值

In [3]:
# 此函数将一个字符串中各单词首字母在句子中处在的位置坐标
def index_words(text: str):
    result = []
    if text:
        result.append(0)
    for index, letter in enumerate(text):
        if letter == " " :
            result.append(index+1)
    return result

In [6]:
address = 'Four score and seven years ago...'
result = index_words(address)
print(result[:10])

[0, 5, 11, 15, 21, 27]


In [7]:
# 1. 使用上述代码会模糊重点内容（index+1）
# 2. 函数直接返回列表会占用大量内存，且使用append操作过多
def index_words(text: str):
    if text:
        yield 0
    for index, letter in enumerate(text):
        if letter == " " :
            yield index + 1

In [9]:
it = index_words(address)
print(next(it))
print(next(it))
# 如果对此函数仍想要列表，可以使用list对迭代器完成遍历
list(it)

0
5


[11, 15, 21, 27]

In [10]:
def index_file(handle):
    offset = 0
    for line in handle:
        if line:
            yield offset
        for letter in line:
            offset += 1
            if letter == " ":
                yield offset

In [11]:
import itertools
with open('chapter4.txt', 'r') as f:
    it = index_file(f)
    result = itertools.islice(it, 0, 10)
    print(list(result))

[0, 6, 12, 17, 20, 27]


# 31. 谨慎地迭代函数所收到的参数

In [12]:
# 定义一个归一化函数，函数内先求游客总数，再计算各元素的占比
def normalize(numbers):
    total = sum(numbers)
    result = []
    for value in numbers:
        percent = 100 * value / total
        result.append(percent)
    return result

In [13]:
visits = [15, 35, 80]
percentages = normalize(visits)

In [14]:
print(percentages)

[11.538461538461538, 26.923076923076923, 61.53846153846154]


In [15]:
# 当数据规模更大时，考虑使用迭代器
def read_visits(data_path):
    with open(data_path) as f:
        for line in f:
            yield int(line)

In [17]:
it = read_visits('chapter4.txt')
percentages = normalize(it)
print(percentages)
# 使用上述代码会导致出现空列表，因为normalize中的sum已经完成了对迭代器（read_visit)的迭代，后边的for循环就不再产生元素了

[]


In [18]:
# 避免上述现象的一个方法是再normalize函数中执行一遍迭代器形成新的列表
def normalize(numbers):
    numbers_copy = list(numbers)
    total = sum(numbers_copy)
    result = []
    for value in numbers_copy:
        percent = 100 * value /total
        result.append(percent)
    return result

In [19]:
it = read_visits('chapter4.txt')
percentages = normalize(it)
print(percentages)

[12.037037037037036, 13.88888888888889, 74.07407407407408]


In [20]:
# 但是此方法又出现了一个完整的list，与使用迭代器的初衷相违背
# 使用中间函数在nor函数每次调用迭代器时都提供新的迭代器
def normalize(get_iter):
    total = sum(get_iter())
    result = []
    for value in get_iter():
        percent = 100 * value /total
        result.append(percent)
    return result
# 此时给normalize传参需要使用lambda表达式，使得函数内部每次‘()’都能生成新的迭代器
percentages = normalize(lambda : read_visits('chapter4.txt'))
print(percentages)

[12.037037037037036, 13.88888888888889, 74.07407407407408]


- 除了上述方法,还可以创建自定义容器完成如上需求

In [24]:
class ReadVisits:
    def __init__(self, data_path):
        self.data_path = data_path
    def __iter__(self):  # 定义该容器的迭代器规则，从传入路径中读取每行的数据并取整后作为迭代元素
        with open(self.data_path) as f:
            for line in f:
                yield int(line)
# 上述函数使用最初的nor函数即可
def normalize(numbers):
    total = sum(numbers)  # 第一次触发‘__iter__‘函数分配一个迭代器
    result = []
    for value in numbers:  # 第二次触发‘__iter__‘函数分配另一个迭代器，多个迭代器间并不影响
        percent = 100 * value / total
        result.append(percent)
    return result

In [25]:
visits = ReadVisits('chapter4.txt')
percentages = normalize(visits)
print(percentages)

[12.037037037037036, 13.88888888888889, 74.07407407407408]


In [26]:
# 可以添加类型判断避免普通迭代器传入nor函数
def normalize_defensive(numbers):
    if iter(numbers) is numbers:
        raise TypeError('必须传入一个容器而不是普通迭代器')
    total = sum(numbers)  # 第一次触发‘__iter__‘函数分配一个迭代器
    result = []
    for value in numbers:  # 第二次触发‘__iter__‘函数分配另一个迭代器，多个迭代器间并不影响
        percent = 100 * value / total
        result.append(percent)
    return result

In [27]:
normalize_defensive(iter([15, 35, 80]))

TypeError: 必须传入一个容器而不是普通迭代器

In [28]:
normalize_defensive(visits)

[12.037037037037036, 13.88888888888889, 74.07407407407408]

In [35]:
type(visits)

__main__.ReadVisits

In [34]:
iter(visits)  # 返回自定义类型的迭代器

<generator object ReadVisits.__iter__ at 0x000001EBEA261150>

In [33]:
iter([0,1,2])

<list_iterator at 0x1ebe9920e50>