In [9]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

### Item 27 Use Comprehension Instead of map and filter

In [2]:
a = [1,2,3,4,5,6,7,8,9]
squares = []
for x in a:
    squares.append(x**2)
squares

[1, 4, 9, 16, 25, 36, 49, 64, 81]

In [3]:
squares = [x ** 2 for x in a]
squares

[1, 4, 9, 16, 25, 36, 49, 64, 81]

In [9]:
# the built-in map function requires the creation of a lambda function for the computation, which is visually noisy.

squares = map(lambda x: x ** 2, a)
print(list(squares))

[1, 4, 9, 16, 25, 36, 49, 64, 81]


In [6]:
# filter by comprehension
even_squares = [x ** 2 for x in a if x % 2 == 0]
even_squares

[4, 16, 36, 64]

In [8]:
# the built-in filter function is much harder to read.
alt = map(lambda x: x ** 2, filter(lambda x: x % 2 == 0, a))
assert even_squares == list(alt)

In [10]:
even_squares_dict = {x: x ** 2 for x in a if x % 2 == 0}
threes_cubed_set = {x ** 3 for x in a if x % 3 == 0}
even_squares_dict
threes_cubed_set

{2: 4, 4: 16, 6: 36, 8: 64}

{27, 216, 729}

In [13]:
alt_dict = dict(map(lambda x: (x, x ** 2),
                   filter(lambda x: x % 2 == 0, a)))
print(alt_dict)

alt_set = set(map(lambda x: x ** 3,
                  filter(lambda x: x % 3 == 0, a)))
print(alt_set)

{2: 4, 4: 16, 6: 36, 8: 64}
{216, 729, 27}


**Things to remember**

- *List Comprehensions* are clearer than the *map* and *filter* built-in functions because they don't require *lambda* expressions.

- *List Comprehensions* allow you to easily skip items from the input list, a behavior that map doesn't support without help from *filter*.


### Item 28 Avoid More than **Two** Control subexpressions in comprehensions

In [14]:
# a list comprehensions with two for subexpressions
matrix = [[1,2,3], [4,5,6], [7,8,9]]
flat = [x for row in matrix for x in row]
flat

[1, 2, 3, 4, 5, 6, 7, 8, 9]

In [15]:
squared = [[x ** 2 for x in row] for row in matrix]
squared

[[1, 4, 9], [16, 25, 36], [49, 64, 81]]

In [16]:
my_lists = [
    [[1,2,3], [4,5,6]],
    [[7,8,9], [10,11,12]],
    [[13,14,15], [16,17,18]],
]

flat = [x for sublist1 in my_lists
       for sublist2 in sublist1
       for x in sublist2]
flat

[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18]

In [18]:
flat = []
for sublist1 in my_lists:
    for sublist2 in sublist1:
        flat.extend(sublist2)    # not append()
flat

[[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12], [13, 14, 15], [16, 17, 18]]

In [20]:
# Multiple conditions at the same loop level have an implicit and expression
a = [1,2,3,4,5,6,7,8,9]
b = [x ** 2 for x in a if x > 4 if x % 2 == 0]
c = [x ** 3 for x in a if x > 4 if x % 3 == 0]
b
c 

[36, 64]

[216, 729]

In [21]:
# Conditions can be specified at each level of looping after the for subexpression
matrix = [[1,2,3], [4,5,6], [7,8,9]]
filtered = [[x for x in row if x % 3 == 0]
           for row in matrix if sum(row) >= 10]
filtered

[[6], [9]]

### Item 29 Avoid Repeated Work in Comprehensions by Using Assignment Expressions

A common pattern with comprehensions—including list, dict, and set variants—is the need to reference the same computation in multiple places.

In [6]:
stock = {
    'nails': 125,
    'screws': 35,
    'wingnuts': 8,
    'washers': 24,
}

order = ['screws', 'wingnuts', 'clips']

def get_butches(count, size):
    return count // size

result = {}

for name in order:
    count = stock.get(name, 0)
    batches = get_butches(count, 8)
    
    if batches:
        result[name] = batches
result

{'screws': 4, 'wingnuts': 1}

In [10]:
# use dict comprehensions instead of loop
stock = {
    'nails': 125,
    'screws': 35,
    'wingnuts': 8,
    'washers': 24,
}

order = ['screws', 'wingnuts', 'clips']

def get_butches(count, size):
    return count // size

result = {name: get_butches(stock.get(name,0), 8)
         for name in order
         if get_butches(stock.get(name, 0),8)}
result

{'screws': 4, 'wingnuts': 1}

In [12]:
# the problem with it is that the get_butches(stock.get(name, 0), 8) expression is repeated
# An easy solution to these problems is to use the walrus operator (:=), which was introduced in Python 3.8, to form an assignment expression
# as part of the comprehension

stock = {
    'nails': 125,
    'screws': 35,
    'wingnuts': 8,
    'washers': 24,
}

order = ['screws', 'wingnuts', 'clips']

def get_batches(count, size):
    return count // size

result = {name: batches for name in order
         if (batches := get_batches(stock.get(name,0),8))}

result

{'screws': 4, 'wingnuts': 1}

In [15]:
result = {name: tenth for name, count in stock.items()
         if (tenth := count // 10)> 0}
result

{'nails': 12, 'screws': 3, 'washers': 2}

In [16]:
# If a comprehension uses the walrus operator in the value part of the comprehension and doesn’t have a condition, 
# it’ll leak the loop variable into the containing scope
half = [(last := count // 2) for count in stock.values()]
print(f'Last item of {half} is {last}')

Last item of [62, 17, 4, 12] is 12


In [18]:
# same as a normal for loop 
for count in stock.values():    # leaks loop variable
    pass

print(f'Last item of {list(stock.values())} is {count}')


Last item of [125, 35, 8, 24] is 24


In [22]:
# However, similar leakage doesn't happen for the loop variables from comprehensions
half = [count2 // 2 for count2 in stock.values()]
print(half)
print(count2)

[62, 17, 4, 12]


NameError: name 'count2' is not defined

**THings to remember**

- It’s better not to leak loop variables, so I recommend using assignment
expressions only in the condition part of a comprehension.

In [25]:
# Using an assignment expression also works the same way in generator expressions.
stock = {
    'nails': 125,
    'screws': 35,
    'wingnuts': 8,
    'washers': 24,
}

order = ['screws', 'wingnuts', 'clips']

def get_batches(count, size):
    return count // size

found = ((name, batches) for name in order
        if (batches := get_batches(stock.get(name, 0),8)))
next(found)
next(found)
next(found)

('screws', 4)

('wingnuts', 1)

StopIteration: 

### Item 30 Consider Generators Instead of Returning Lists

In [4]:
def index_words(text):
    result = []
    if text:
        result.append(0)
    for index, letter in enumerate(text):
        if letter == ' ':
            result.append(index + 1)
    return result

In [5]:
address = "The first problem is that the code is a bit dense and noisy"
result = index_words(address)
result

[0, 4, 10, 18, 21, 26, 30, 35, 38, 40, 44, 50, 54]

A better way to write this function is by using a *generator*. Generators are produced by functions that use *yield* expressions. 

When called, a generator function does not actually run but instead immediately returns an *iterator*. 

With each call to the *next* built-in function, the iterator advances the generator to its next yield expression. 

In [6]:
def index_words_iter(text):
    if text:
        yield 0
    for index, letter in enumerate(text):
        if letter == ' ':
             yield index + 1

In [10]:
it = index_words_iter(address)
next(it)
next(it)


0

4

In [11]:
# convert the iterator returned by the generator to a list 
result_ls = list(index_words_iter(address))
result_ls

[0, 4, 10, 18, 21, 26, 30, 35, 38, 40, 44, 50, 54]

In [12]:
def index_file(handle):
    offset = 0 
    for line in handle:
        if line: 
            yield offset
        for letter in line: 
            offset += 1
            if letter == ' ':
                yield offset

In [15]:
import itertools

with open('note.txt','r') as f: 
    it = index_file(f)
    results = itertools.islice(it, 0, 100)
    print(list(results))

[0, 5, 20, 27, 33, 48, 63, 77, 90, 92, 110, 115, 121, 128, 135, 140, 146, 153, 160, 188, 218, 237, 253, 255, 262, 291, 314, 340, 342, 353, 386, 394, 396, 410, 423, 425, 456, 458, 462, 475, 477, 506, 509, 511, 516, 529, 531, 562, 564, 568, 580, 582, 610, 613, 615, 620, 632, 634, 664, 669, 674, 686, 693, 708, 721, 723, 752, 764, 766, 794, 820, 834, 844, 860, 866, 870, 882, 915, 917, 920, 930, 932, 938, 945, 947, 957, 964, 970, 974, 986, 1019, 1021, 1024, 1034, 1036, 1042, 1049, 1051, 1061, 1068]


### Item 31 Be Defensive when Iterating Over Arguments

In [19]:
def normalize(numbers):
    total = sum(numbers)
    result = []
    for value in numbers:
        result.append(100 * value / total)
    return result

In [20]:
visits = [15,35,80]
percentages = normalize(visits)
print(percentages)
assert sum(percentages) == 100.0

[11.538461538461538, 26.923076923076923, 61.53846153846154]


In [23]:
# To scale up, I need to read the data from a file that contains
# every city tourists.

def read_visits(datapath):
    with open(datapath) as f: 
        for line in f: 
            yield int(line)

In [30]:
it = read_visits('tourists.txt')
percentages = normalize(list(it))
print(percentages)
print(sum(percentages))
assert sum(percentages) == 100.0

[5.882352941176471, 13.72549019607843, 35.294117647058826, 26.274509803921568, 18.823529411764707]
100.00000000000001


AssertionError: 

In [35]:
def normalize_copy(numbers):
    numbers_copy = list(numbers)    #copy the iterator
    total = sum(numbers_copy)
    result = []
    for value in numbers_copy:
        result.append(100 * value / total)
    return result

In [36]:
it = read_visits('tourists.txt')
percentages = normalize_copy(it)
print(percentages)
print(sum(percentages))
assert sum(percentages) == 100.0

[5.882352941176471, 13.72549019607843, 35.294117647058826, 26.274509803921568, 18.823529411764707]
100.00000000000001


AssertionError: 

In [39]:
# The problem with this approach is that the copy of the input iterator’s
# contents could be extremely large.

# One way around this is to accept a function
# that returns a new iterator each time it’s called

def normalize_func(get_iter):
    print(f'first iter: {id(get_iter())}')
    total = sum(get_iter())    # new iterator
    print(f'second iter: {id(get_iter())}')
    result = []
    for value in get_iter():   # new iterator
        result.append(100 * value / total)
    return result    

In [40]:
path = 'tourists.txt'
percentages = normalize_func(lambda: read_visits(path))
print(percentages)

first iter: 2280120191904
second iter: 2280120191904
[5.882352941176471, 13.72549019607843, 35.294117647058826, 26.274509803921568, 18.823529411764707]


Although this works, having to pass a lambda function like this is clumsy. 

A better way to achieve the same result is to provide a new container class that implements the ***iterator protocol***.

The iterator protocol is how Python for loops and related expressions traverse the contents of a container type. When Python sees a statement like for x in foo, it actually calls iter(foo). The iter built-in function calls the foo.__iter__ special method in turn. The __iter__ method must return an iterator object (which itself implements the __next__ special method). Then, the for loop repeatedly calls the next built-in function on the iterator object until it’s exhausted (indicated by raising a StopIteration exception).


In [41]:
class ReadVisits:
    def __init__(self, datapath):
        self.datapath = datapath
    
    def __iter__(self):
        with open(self.datapath) as f:
            for line in f: 
                yield int(line)


In [42]:
visits = ReadVisits('tourists.txt')
percentages = normalize(visits)
print(percentages)
assert sum(percentages) == 100.0

[5.882352941176471, 13.72549019607843, 35.294117647058826, 26.274509803921568, 18.823529411764707]


AssertionError: 

This works because the sum method in normalize calls ReadVisits.__iter__ to allocate a new iterator object. The for loop to normalize the numbers also calls __iter__ to allocate a second iterator object. Each of those iterators will be advanced and exhausted independently, ensuring that each unique iteration sees all of the input data values.

The protocol states that when an iterator is passed to the iter built-in function, iter returns the iterator itself. 

In contrast, when a container type is passed to iter, a new iterator object is returned each time. Thus, you can test an input value for this behavior and raise a TypeError to reject arguments that can’t be repeatedly iterated over:


In [43]:
def normalize_defensive(numbers):
    if iter(numbers) is numbers: 
        raise TypeError('Must supply a container')
    total = sum(numbers)
    result = []
    for value in numbers:
        result.append(100 * value / total)
    return result

In [44]:
visits = ReadVisits('tourists.txt')
percentages = normalize_defensive(visits)
print(percentages)
assert sum(percentages) == 100.0

[5.882352941176471, 13.72549019607843, 35.294117647058826, 26.274509803921568, 18.823529411764707]


AssertionError: 

In [45]:
it = read_visits('tourists.txt')
percentages = normalize_defensive(it)
print(percentages)
print(sum(percentages))
assert sum(percentages) == 100.0

TypeError: Must supply a container

In [47]:
# Alternatively, Alternatively, the collections.abc built-in module defines an Iterator
# class that can be used in an isinstance test to recognize the potential problem.

from collections.abc import Iterator

def normalize_defensive2(numbers):
    if isinstance(numbers, Iterator):    #Another way to check Iterator 
        raise TypeError('Must supply a container')
    total = sum(numbers)
    result = []
    for value in numbers:
        result.append(100 * value / total)
    return result

In [49]:
visits = ReadVisits('tourists.txt')
percentages = normalize_defensive2(visits)
print(percentages)
print(sum(percentages))
assert sum(percentages) == 100.0

[5.882352941176471, 13.72549019607843, 35.294117647058826, 26.274509803921568, 18.823529411764707]
100.00000000000001


AssertionError: 

In [50]:
# This function works as expected for list and ReadVisits inputs 
# because they are iterable containers that follow the iterator protocol:

visits = [15,25,90,40]
percentages = normalize_defensive2(visits)
print(percentages)

visits = ReadVisits('tourists.txt')
percentages = normalize_defensive2(visits)
print(percentages)


[8.823529411764707, 14.705882352941176, 52.94117647058823, 23.529411764705884]
[5.882352941176471, 13.72549019607843, 35.294117647058826, 26.274509803921568, 18.823529411764707]


In [51]:
#The function raises an exception if the input is an iterator rather than a container:
visits = [15,25,90,40]
percentages = normalize_defensive2(iter(visits))
print(percentages)

TypeError: Must supply a container