### Yielding and Generators

In [None]:
import math

In [None]:
class FactIter:
    def __init__(self, n):
        self.n = n
        self.i = 0

    def __iter__(self):
        return self

    def __next__(self):
        if self.i >= self.n:
            raise StopIteration
        else:
            result = math.factorial(self.i)
            self.i += 1
            return result

In [None]:
fact_iter = FactIter(5)

In [None]:
for num in fact_iter:
    print(num)

1
1
2
6
24


In [None]:
def fact():
    i = 0
    def inner():
        nonlocal i
        result = math.factorial(i)
        i += 1
        return result
    return inner 

In [None]:
fact_iter = iter(fact(), math.factorial(5))

In [None]:
for num in fact_iter:
    print(num)

1
1
2
6
24


In [None]:
def my_func():
    print('line 1')
    yield 'Flying'
    print('line 2')
    yield 'Circus'    

In [None]:
my_func()

<generator object my_func at 0x7f2e1dc38650>

In [None]:
gen_my_func = my_func()

In [None]:
next(gen_my_func)

line 1


'Flying'

In [None]:
next(gen_my_func)

line 2


'Circus'

In [None]:
next(gen_my_func)

StopIteration: 

In [None]:
gen_my_func = my_func()

In [None]:
'__iter__' in dir(gen_my_func)

True

In [None]:
'__next__' in dir(gen_my_func)

True

In [None]:
gen_my_func

<generator object my_func at 0x7f2df0d87ad0>

In [None]:
iter(gen_my_func)

<generator object my_func at 0x7f2df0d87ad0>

In [None]:
next(iter(gen_my_func))

line 1


'Flying'

In [None]:
next(iter(gen_my_func))

line 2


'Circus'

In [None]:
next(iter(gen_my_func))

StopIteration: 

In [None]:
def squares(sentinel):
    i = 0
    while True:
        if i < sentinel:
            result = i**2
            i += 1
            yield result
        else:
            return 'all done!'

In [None]:
sq = squares(3)

In [None]:
next(sq)

StopIteration: all done!

In [None]:
def squares(sentinel):
    i = 0
    while True:
        if i < sentinel:
            yield i**2
            print("Value of i:", i)
            i += 1 # note how we can incremenet **after** the yield
        else:
            return 'all done!'

In [None]:
for num in squares(5):
    print(num)

0
Value of i: 0
1
Value of i: 1
4
Value of i: 2
9
Value of i: 3
16
Value of i: 4


In [None]:
def factorials(n):
    for i in range(n):
        yield math.factorial(i)

In [None]:
for num in factorials(5):
    print(num)

1
1
2
6
24


In [None]:
facts = factorials(5)

In [None]:
list(facts)

[1, 1, 2, 6, 24]

In [None]:
list(facts)

[]

In [None]:
next(facts)

StopIteration: 

Here is the Fibonacci sequence:

```
1 1 2 3 5 8 13 ...
```

As you can see there is a recursive definition of the numbers in this sequence:

```
Fib(n) = Fib(n-1) + Fib(n-2)
```
where 

```
Fib(0) = 1
``` 

and

```
Fib(1) = 1
```

In [None]:
def fib_recursive(n):
    if n <= 1:
        return 1
    else:
        return fib_recursive(n-1) + fib_recursive(n-2)

In [None]:
[fib_recursive(i) for i in range(7)]

[1, 1, 2, 3, 5, 8, 13]

In [None]:
from timeit import timeit

In [None]:
timeit('fib_recursive(10)', globals=globals(), number=10)

0.00020973104983568192

In [None]:
timeit('fib_recursive(28)', globals=globals(), number=10)

1.3275823956355453

In [None]:
timeit('fib_recursive(29)', globals=globals(), number=10)

2.251129942946136

In [None]:
from functools import lru_cache

In [None]:
@lru_cache()
def fib_recursive(n):
    if n <= 1:
        return 1
    else:
        return fib_recursive(n-1) + fib_recursive(n-2)

In [None]:
timeit('fib_recursive(10)', globals=globals(), number=10)

2.1090731024742126e-05

In [None]:
timeit('fib_recursive(29)', globals=globals(), number=10)

5.232449620962143e-05

In [None]:
@lru_cache()
def fib_recursive(n):
    if n <= 1:
        return 1
    else:
        return fib_recursive(n-1) + fib_recursive(n-2)

In [None]:
fib_recursive(2000)

RecursionError: maximum recursion depth exceeded in comparison

In [None]:
def fib(n):
    fib_0 = 1
    fib_1 = 1
    for i in range(n-1):
        fib_0, fib_1 = fib_1, fib_0 + fib_1
    return fib_1

In [None]:
[fib(i) for i in range(7)]

[1, 1, 2, 3, 5, 8, 13]

In [None]:
timeit('fib(5000)', globals=globals(), number=10)

0.007036538794636726

In [None]:
class Fib:
    def __init__(self, n):
        self.n = n
        
    def __iter__(self):
        return self.FibIter(self.n)
        
    class FibIter:
        def __init__(self, n):
            self.n = n
            self.i = 0
            
        def __iter__(self):
            return self
        
        def __next__(self):
            if self.i >= self.n:
                raise StopIteration
            else:
                result = fib(self.i)
                self.i += 1
                return result

In [None]:
fib_iterable = Fib(7)

In [None]:
for num in fib_iterable:
    print(num)

1
1
2
3
5
8
13


In [None]:
def fib(n):
    fib_0 = 1
    fib_1 = 1
    for i in range(n-1):
        fib_0, fib_1 = fib_1, fib_0 + fib_1
    return fib_1
    
def fib_closure():
    i = 0
    def inner():
        nonlocal i
        result = fib(i)
        i += 1
        return result
    return inner

In [None]:
fib_numbers = fib_closure()
fib_iter = iter(fib_numbers, fib(7))
for num in fib_iter:
    print(num)

1
1
2
3
5
8
13


In [None]:
def fib(n):
    fib_0 = 1
    fib_1 = 1
    for i in range(n-1):
        fib_0, fib_1 = fib_1, fib_0 + fib_1
    return fib_1 

In [None]:
[fib(i) for i in range(7)]

[1, 1, 2, 3, 5, 8, 13]

In [None]:
def fib_gen(n):
    fib_0 = 1
    fib_1 = 1
    for i in range(n-1):
        fib_0, fib_1 = fib_1, fib_0 + fib_1
        yield fib_1    

In [None]:
[num for num in fib_gen(7)]

[2, 3, 5, 8, 13, 21]

In [None]:
def fib_gen(n):
    fib_0 = 1
    yield fib_0
    fib_1 = 1
    yield fib_1
    for i in range(n-1):
        fib_0, fib_1 = fib_1, fib_0 + fib_1
        yield fib_1 

In [None]:
[num for num in fib_gen(7)]

[1, 1, 2, 3, 5, 8, 13, 21]

In [None]:
some_obg = fib_gen(8)
next(some_obg)

1

In [None]:
next(some_obg)

2

In [None]:
fib_gen(8)

<generator object fib_gen at 0x7f2def3ebed0>

In [None]:
timeit('[num for num in Fib(5_000)]', globals=globals(), number=1)

1.560899200849235

In [None]:
fib_numbers = fib_closure()
sentinel = fib(5_001)

timeit('[num for num in iter(fib_numbers, sentinel)]', globals=globals(),
      number=1)

1.4451701119542122

In [None]:
timeit('[num for num in fib_gen(5_000)]', globals=globals(), number=1)

0.0013967351987957954

In [None]:
def squares_gen(n):
    for i in range(n):
        yield i ** 2

In [None]:
sq = squares_gen(5)

In [None]:
for num in sq:
    print(num)

0
1
4
9
16


In [None]:
next(sq)

StopIteration: 

In [None]:
sq = squares_gen(5)

In [None]:
[num for num in sq]

[0, 1, 4, 9, 16]

In [None]:
def squares_gen(n):
    for i in range(n):
        yield i ** 2

class Squares:
    def __init__(self, n):
        self.n = n
        
    def __iter__(self):
        return squares_gen(self.n)

sq = Squares(5)

In [None]:
[num for num in sq]

[0, 1, 4, 9, 16]

In [None]:
[num for num in sq]

[0, 1, 4, 9, 16]

In [None]:
class Squares:
    def __init__(self, n):
        self.n = n
        
    @staticmethod
    def squares_gen(n):
        for i in range(n):
            yield i ** 2
        
    def __iter__(self):
        return Squares.squares_gen(self.n)

In [None]:
sq = Squares(5)

In [None]:
[num for num in sq]

[0, 1, 4, 9, 16]

In [None]:
sq

<__main__.Squares at 0x7f2dee82c510>

In [None]:
s = iter(sq)

In [None]:
next(s)

StopIteration: 

In [None]:
def squares(n):
    for i in range(n):
        yield i ** 2

In [None]:
sq = squares(5)

In [None]:
enum_sq = enumerate(sq)

In [None]:
enum_sq

<enumerate at 0x7f2dee6de2d0>

In [None]:
next(sq)

1

In [None]:
next(enum_sq)

(0, 4)

In [None]:
sq = squares(5)
enum_sq = enumerate(sq)

In [None]:
next(enum_sq)

(0, 4)

In [None]:
next(enum_sq)

(1, 9)

In [None]:
next(enum_sq)

(2, 16)

In [None]:
next(enum_sq)

StopIteration: 

In [None]:
l = [i ** 2 for i in range(5)]

In [None]:
l

[0, 1, 4, 9, 16]

In [None]:
g = (i ** 2 for i in range(5))
g

<generator object <genexpr> at 0x7f2deec74f50>

In [None]:
type(g)

generator

In [None]:
next(g)

4

In [None]:
for item in g:
    print(item)

9
16


In [None]:
next(g)

StopIteration: 

In [None]:
for item in g:
    print(item)

In [None]:
import dis

In [None]:
exp = compile('[i**2 for i in range(5)]', filename='<string>', mode='eval')

In [None]:
dis.dis(exp)

  1           0 LOAD_CONST               0 (<code object <listcomp> at 0x7f2deebe8d20, file "<string>", line 1>)
              2 LOAD_CONST               1 ('<listcomp>')
              4 MAKE_FUNCTION            0
              6 LOAD_NAME                0 (range)
              8 LOAD_CONST               2 (5)
             10 CALL_FUNCTION            1
             12 GET_ITER
             14 CALL_FUNCTION            1
             16 RETURN_VALUE

Disassembly of <code object <listcomp> at 0x7f2deebe8d20, file "<string>", line 1>:
  1           0 BUILD_LIST               0
              2 LOAD_FAST                0 (.0)
        >>    4 FOR_ITER                12 (to 18)
              6 STORE_FAST               1 (i)
              8 LOAD_FAST                1 (i)
             10 LOAD_CONST               0 (2)
             12 BINARY_POWER
             14 LIST_APPEND              2
             16 JUMP_ABSOLUTE            4
        >>   18 RETURN_VALUE


In [None]:
exp = compile('(i ** 2 for i in range(5))', filename='<string>', mode='eval')
dis.dis(exp)

  1           0 LOAD_CONST               0 (<code object <genexpr> at 0x7f2deebe88a0, file "<string>", line 1>)
              2 LOAD_CONST               1 ('<genexpr>')
              4 MAKE_FUNCTION            0
              6 LOAD_NAME                0 (range)
              8 LOAD_CONST               2 (5)
             10 CALL_FUNCTION            1
             12 GET_ITER
             14 CALL_FUNCTION            1
             16 RETURN_VALUE

Disassembly of <code object <genexpr> at 0x7f2deebe88a0, file "<string>", line 1>:
  1           0 LOAD_FAST                0 (.0)
        >>    2 FOR_ITER                14 (to 18)
              4 STORE_FAST               1 (i)
              6 LOAD_FAST                1 (i)
              8 LOAD_CONST               0 (2)
             10 BINARY_POWER
             12 YIELD_VALUE
             14 POP_TOP
             16 JUMP_ABSOLUTE            2
        >>   18 LOAD_CONST               1 (None)
             20 RETURN_VALUE


In [None]:
l = [i * 2 for i in range(5)]

In [None]:
type(l)

list

In [None]:
g = (i ** 2 for i in range(5))
type(g)

generator

In [None]:
start = 1
stop = 10

mult_list = [ [i * j 
               for j in range(start, stop+1)]
             for i in range(start, stop+1)]

In [None]:
mult_list

[[1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
 [2, 4, 6, 8, 10, 12, 14, 16, 18, 20],
 [3, 6, 9, 12, 15, 18, 21, 24, 27, 30],
 [4, 8, 12, 16, 20, 24, 28, 32, 36, 40],
 [5, 10, 15, 20, 25, 30, 35, 40, 45, 50],
 [6, 12, 18, 24, 30, 36, 42, 48, 54, 60],
 [7, 14, 21, 28, 35, 42, 49, 56, 63, 70],
 [8, 16, 24, 32, 40, 48, 56, 64, 72, 80],
 [9, 18, 27, 36, 45, 54, 63, 72, 81, 90],
 [10, 20, 30, 40, 50, 60, 70, 80, 90, 100]]

In [None]:
start = 1
stop = 10

mult_list = ( (i * j 
               for j in range(start, stop+1))
             for i in range(start, stop+1))

In [None]:
mult_list

<generator object <genexpr> at 0x7f2dee97e650>

In [None]:
table = list(mult_list)

In [None]:
table

[<generator object <genexpr>.<genexpr> at 0x7f2dee97ebd0>,
 <generator object <genexpr>.<genexpr> at 0x7f2dee97ec50>,
 <generator object <genexpr>.<genexpr> at 0x7f2dee97ecd0>,
 <generator object <genexpr>.<genexpr> at 0x7f2dee97ed50>,
 <generator object <genexpr>.<genexpr> at 0x7f2dee97edd0>,
 <generator object <genexpr>.<genexpr> at 0x7f2dee97ee50>,
 <generator object <genexpr>.<genexpr> at 0x7f2dee97eed0>,
 <generator object <genexpr>.<genexpr> at 0x7f2dee97ef50>,
 <generator object <genexpr>.<genexpr> at 0x7f2deee58050>,
 <generator object <genexpr>.<genexpr> at 0x7f2deee580d0>]

In [None]:
table_rows = [list(gen) for gen in table]
table_rows

[[10, 20, 30, 40, 50, 60, 70, 80, 90, 100],
 [10, 20, 30, 40, 50, 60, 70, 80, 90, 100],
 [10, 20, 30, 40, 50, 60, 70, 80, 90, 100],
 [10, 20, 30, 40, 50, 60, 70, 80, 90, 100],
 [10, 20, 30, 40, 50, 60, 70, 80, 90, 100],
 [10, 20, 30, 40, 50, 60, 70, 80, 90, 100],
 [10, 20, 30, 40, 50, 60, 70, 80, 90, 100],
 [10, 20, 30, 40, 50, 60, 70, 80, 90, 100],
 [10, 20, 30, 40, 50, 60, 70, 80, 90, 100],
 [10, 20, 30, 40, 50, 60, 70, 80, 90, 100]]

In [None]:
start = 1
stop = 10

mult_list = ( [i * j 
               for j in range(start, stop+1)]
             for i in range(start, stop+1))

In [None]:
for item in mult_list:
    print(item)

[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
[2, 4, 6, 8, 10, 12, 14, 16, 18, 20]
[3, 6, 9, 12, 15, 18, 21, 24, 27, 30]
[4, 8, 12, 16, 20, 24, 28, 32, 36, 40]
[5, 10, 15, 20, 25, 30, 35, 40, 45, 50]
[6, 12, 18, 24, 30, 36, 42, 48, 54, 60]
[7, 14, 21, 28, 35, 42, 49, 56, 63, 70]
[8, 16, 24, 32, 40, 48, 56, 64, 72, 80]
[9, 18, 27, 36, 45, 54, 63, 72, 81, 90]
[10, 20, 30, 40, 50, 60, 70, 80, 90, 100]


In [None]:
k = [k**2 for k in range(10) yield k]


SyntaxError: invalid syntax (<ipython-input-188-8bffa2ee2b54>, line 1)

```
1
1 1
1 2 1
1 3 3 1
1 4 6 4 1
```

we just need to know how to calculate combinations:
```
C(n, k) = n! / (k! (n-k)!)
```

* row 0, column 0: n=0, k=0: c(0, 0) = 0! / 0! 0! = 1/1 = 1
* row 4, column 2: n=4, k=2: c(4, 2) = 4! / 2! 2! = 4x3x2 / 2x2 = 6

In other words, we need to calculate the following list of lists:
```
c(0,0)
c(1,0) c(1,1)
c(2,0) c(2,1) c(2,2)
c(3,0) c(3,1) c(3,2) c(3,3)
...
```

In [None]:
from math import factorial

def combo(n, k):
    return factorial(n) // (factorial(k) * factorial(n-k))

size = 10  # global variable
pascal = [ [combo(n, k) for k in range(n+1)] for n in range(size+1) ]

In [None]:
pascal

[[1],
 [1, 1],
 [1, 2, 1],
 [1, 3, 3, 1],
 [1, 4, 6, 4, 1],
 [1, 5, 10, 10, 5, 1],
 [1, 6, 15, 20, 15, 6, 1],
 [1, 7, 21, 35, 35, 21, 7, 1],
 [1, 8, 28, 56, 70, 56, 28, 8, 1],
 [1, 9, 36, 84, 126, 126, 84, 36, 9, 1],
 [1, 10, 45, 120, 210, 252, 210, 120, 45, 10, 1]]

In [None]:
size = 10  # global variable
pascal = ( (combo(n, k) for k in range(n+1)) for n in range(size+1) )

In [None]:
[list(row) for row in pascal]

[[1],
 [1, 1],
 [1, 2, 1],
 [1, 3, 3, 1],
 [1, 4, 6, 4, 1],
 [1, 5, 10, 10, 5, 1],
 [1, 6, 15, 20, 15, 6, 1],
 [1, 7, 21, 35, 35, 21, 7, 1],
 [1, 8, 28, 56, 70, 56, 28, 8, 1],
 [1, 9, 36, 84, 126, 126, 84, 36, 9, 1],
 [1, 10, 45, 120, 210, 252, 210, 120, 45, 10, 1]]

In [None]:
from timeit import timeit

In [None]:
size = 600

timeit('[[combo(n, k) for k in range(n+1)] for n in range(size+1)]',
      globals=globals(), number=1)

3.7352321669459343

In [None]:
n = 40000000
timeit('((combo(n, k) for k in range(n+1)) for n in range(size+1))',
      globals=globals(), number=1)

6.210990250110626e-06

In [None]:
n = 10
((combo(n, k) for k in range(n+1)))

<generator object <genexpr> at 0x7f2deee58950>

In [None]:
def pascal_list(size):
    l = [[combo(n, k) for k in range(n+1)] for n in range(size+1)]
    for row in l:
        for item in row:
            pass


In [None]:
def pascal_gen(size):
    g = ((combo(n, k) for k in range(n+1)) for n in range(size+1))
    for row in g:
        for item in row:
            pass

In [None]:
size = 600
timeit('pascal_list(size)', globals=globals(), number=1)

3.6716219102963805

In [None]:
size = 600
timeit('pascal_gen(size)', globals=globals(), number=1)

3.782093777321279

In [None]:
import tracemalloc

In [None]:
def pascal_list(size):
    l = [[combo(n, k) for k in range(n+1)] for n in range(size+1)]
    for row in l:
        for item in row:
            pass
    stats = tracemalloc.take_snapshot().statistics('lineno')
    print(stats[0].size, 'bytes')

In [None]:
def pascal_gen(size):
    g = ((combo(n, k) for k in range(n+1)) for n in range(size+1))
    for row in g:
        for item in row:
            pass
    stats = tracemalloc.take_snapshot().statistics('lineno')
    print(stats[0].size, 'bytes')

In [None]:
tracemalloc.stop()
tracemalloc.clear_traces()
tracemalloc.start()
pascal_list(300)

1998608 bytes


In [None]:
tracemalloc.stop()
tracemalloc.clear_traces()
tracemalloc.start()
pascal_gen(300)

1168 bytes


# Yield From

In [None]:
def matrix(n):
    gen = ( (i * j for j in range(1, n+1))
            for i in range(1, n+1)
          )
    return gen

In [None]:
m = list(matrix(5))

In [None]:
m

[<generator object matrix.<locals>.<genexpr>.<genexpr> at 0x7f2deea8fa50>,
 <generator object matrix.<locals>.<genexpr>.<genexpr> at 0x7f2deea8fb50>,
 <generator object matrix.<locals>.<genexpr>.<genexpr> at 0x7f2deea8fbd0>,
 <generator object matrix.<locals>.<genexpr>.<genexpr> at 0x7f2deea8f250>,
 <generator object matrix.<locals>.<genexpr>.<genexpr> at 0x7f2deea8fad0>]

In [None]:
def matrix_iterator(n):
    for row in matrix(n):
        for item in row:
            yield item

In [None]:
for i in matrix_iterator(3):
    print(i)

1
2
3
2
4
6
3
6
9


In [None]:
def matrix_iterator(n):
    for row in matrix(n):
        for item in row:
            yield item
            
def matrix_iterator(n):
    for row in matrix(n):
        yield from row

In [None]:
for i in matrix_iterator(3):
    print(i)

1
2
3
2
4
6
3
6
9


As you can see we obtain the same result.

We can think of 
```
yield from <iterator>
```
as a replacement for the code:
```
for i in <iterator>:
    yield i
```

In [None]:
brands = []

with open('car-brands-1.txt', encoding="latin-1") as f:
    for brand in f:
        brands.append(brand.strip('\n'))
        
with open('car-brands-2.txt', encoding="latin-1") as f:
    for brand in f:
        brands.append(brand.strip('\n'))
        
with open('car-brands-3.txt', encoding="latin-1") as f:
    for brand in f:
        brands.append(brand.strip('\n'))

In [None]:
for brand in brands:
    print(brand, end=', ')

Alfa Romeo, Aston Martin, Audi, Bentley, Benz, BMW, Bugatti, Cadillac, Chevrolet, Chrysler, Citroën, Corvette, DAF, Dacia, Daewoo, Daihatsu, Datsun, De Lorean, Dino, Dodge, Farboud, Ferrari, Fiat, Ford, Honda, Hummer, Hyundai, Jaguar, Jeep, KIA, Koenigsegg, Lada, Lamborghini, Lancia, Land Rover, Lexus, Ligier, Lincoln, Lotus, Martini, Maserati, Maybach, Mazda, McLaren, Mercedes-Benz, Mini, Mitsubishi, Nissan, Noble, Opel, Peugeot, Pontiac, Porsche, Renault, Rolls-Royce, Saab, Seat, Å koda, Smart, Spyker, Subaru, Suzuki, Toyota, Vauxhall, Volkswagen, Volvo, 

In [None]:
def brands(*files):
    for f_name in files:
        with open(f_name, encoding="latin-1") as f:
            for line in f:
                yield line.strip('\n')

In [None]:
files = 'car-brands-1.txt', 'car-brands-2.txt', 'car-brands-3.txt'
for brand in brands(*files):
    print(brand, end = ', ')

Alfa Romeo, Aston Martin, Audi, Bentley, Benz, BMW, Bugatti, Cadillac, Chevrolet, Chrysler, Citroën, Corvette, DAF, Dacia, Daewoo, Daihatsu, Datsun, De Lorean, Dino, Dodge, Farboud, Ferrari, Fiat, Ford, Honda, Hummer, Hyundai, Jaguar, Jeep, KIA, Koenigsegg, Lada, Lamborghini, Lancia, Land Rover, Lexus, Ligier, Lincoln, Lotus, Martini, Maserati, Maybach, Mazda, McLaren, Mercedes-Benz, Mini, Mitsubishi, Nissan, Noble, Opel, Peugeot, Pontiac, Porsche, Renault, Rolls-Royce, Saab, Seat, Å koda, Smart, Spyker, Subaru, Suzuki, Toyota, Vauxhall, Volkswagen, Volvo, 

In [None]:
def brands(*files):
    for f_name in files:
        with open(f_name, encoding="latin-1") as f:
            for line in f:
                yield line.strip('\n')

def brands(*files):
    for f_name in files:
        with open(f_name,  encoding="latin-1") as f:
            yield from f

In [None]:
for brand in brands(*files):
    print(brand, end=', ')

Alfa Romeo
, Aston Martin
, Audi
, Bentley
, Benz
, BMW
, Bugatti
, Cadillac
, Chevrolet
, Chrysler
, Citroën
, Corvette
, DAF
, Dacia
, Daewoo
, Daihatsu
, Datsun
, De Lorean
, Dino
, Dodge, Farboud
, Ferrari
, Fiat
, Ford
, Honda
, Hummer
, Hyundai
, Jaguar
, Jeep
, KIA
, Koenigsegg
, Lada
, Lamborghini
, Lancia
, Land Rover
, Lexus
, Ligier
, Lincoln
, Lotus
, Martini, Maserati
, Maybach
, Mazda
, McLaren
, Mercedes-Benz
, Mini
, Mitsubishi
, Nissan
, Noble
, Opel
, Peugeot
, Pontiac
, Porsche
, Renault
, Rolls-Royce
, Saab
, Seat
, Å koda
, Smart
, Spyker
, Subaru
, Suzuki
, Toyota
, Vauxhall
, Volkswagen
, Volvo, 

In [None]:
def gen_clean_read(file):
    with open(file, encoding="latin-1") as f:
        for line in f:
            yield line.strip('\n')

f1 = gen_clean_read('car-brands-1.txt')
for line in f1:
    print(line, end=', ')

Alfa Romeo, Aston Martin, Audi, Bentley, Benz, BMW, Bugatti, Cadillac, Chevrolet, Chrysler, Citroën, Corvette, DAF, Dacia, Daewoo, Daihatsu, Datsun, De Lorean, Dino, Dodge, 

In [None]:
files = 'car-brands-1.txt', 'car-brands-2.txt', 'car-brands-3.txt'

In [None]:
def brands(*files):
    for file in files:
        yield from gen_clean_read(file)

In [None]:
for brand in brands(*files):
    print(brand, end=', ')

Alfa Romeo, Aston Martin, Audi, Bentley, Benz, BMW, Bugatti, Cadillac, Chevrolet, Chrysler, Citroën, Corvette, DAF, Dacia, Daewoo, Daihatsu, Datsun, De Lorean, Dino, Dodge, Farboud, Ferrari, Fiat, Ford, Honda, Hummer, Hyundai, Jaguar, Jeep, KIA, Koenigsegg, Lada, Lamborghini, Lancia, Land Rover, Lexus, Ligier, Lincoln, Lotus, Martini, Maserati, Maybach, Mazda, McLaren, Mercedes-Benz, Mini, Mitsubishi, Nissan, Noble, Opel, Peugeot, Pontiac, Porsche, Renault, Rolls-Royce, Saab, Seat, Å koda, Smart, Spyker, Subaru, Suzuki, Toyota, Vauxhall, Volkswagen, Volvo, 

# Aggregators

In [None]:
def squares(n):
    for i in range(n):
        yield i**2

In [None]:
list(squares(5))

[0, 1, 4, 9, 16]

In [None]:
min(squares(5))

0

In [None]:
max(squares(5))

16

In [None]:
sq = squares(5)

max(sq)

16

In [None]:
max(sq)

ValueError: max() arg is an empty sequence

In [None]:
list(squares(5))

[0, 1, 4, 9, 16]

In [None]:
sum(squares(5))

30

In [None]:
prod(squares(5))

NameError: name 'prod' is not defined

In [None]:
import sys

sys.version

'3.7.9 (default, Oct 13 2020, 21:10:49) \n[GCC 8.3.0]'

# Any

The `any` function is a predicate (a function that returns `True` or `False`) that takes an iterable and returns `True` if **any** elements of that iterable are True (or have an associated True truth-value, i.e. **truthy**).

Remember that by default custom objects are always truthy:

In [None]:
class Person:
    pass

In [None]:
p = Person()
bool(p)


True

In [None]:
class MySeq:
    def __init__(self, n):
        self.n = n
        print('init')

    def __len__(self):
        print('len')
        return self.n
    
    def __getitem__(self, s):
        print('getitem')
        pass

In [None]:
my_seq = MySeq(0)

init


In [None]:
bool(my_seq)

len


False

In [None]:
my_seq = MySeq(10)

init


In [None]:
bool(my_seq)

len


True

In [None]:
any([0, '', None])

False

In [None]:
any([0, '', None, 'hello'])

True

# All

The `all` function is very similar to the `any` function, but it determines if **all** the elements of the iterable are truthy.

Basically it is equivalent to doing an `and` between all the elements oif the iterable and casting the result to a Boolean.

In [None]:
all([1, 'abc', [1, 2], range(5)])

True

In [None]:
all([1, 'abc', [1, 2], range(5), ''])

False

In [None]:
from numbers import Number

In [None]:
isinstance(10, Number), isinstance(10.5, Number)

(True, True)

In [None]:
isinstance(2+3j, Number)

True

In [None]:
from decimal import Decimal

isinstance(Decimal('10.3'), Number)

True

In [None]:
isinstance(True, Number)

True

In [None]:
isinstance('100', Number)

False

In [None]:
isinstance([10, 20], Number)

False

In [None]:
l = [10, 20, 30, 40]

is_all_numbers = True
for item in l:
    if not isinstance(item, Number):
        is_all_numbers = False
        break
print(is_all_numbers)

True


In [None]:
l = [10, 20, 30, 40, 'hello']

is_all_numbers = True
for item in l:
    if not isinstance(item, Number):
        is_all_numbers = False
        break
print(is_all_numbers)

False


In [None]:
l = [10, 20, 30, 40, 'hello']
is_all_numbers = False
for item in l:
    if not isinstance(item, Number):
        break
else: # nobreak --> all numbers
    is_all_numbers = True
print(is_all_numbers)

False


In [None]:
map(str, [0, 1, 2, 3, 4])

<map at 0x7f2dee7a4450>

In [None]:
list(map(str, [0, 1, 2, 3, 4]))

['0', '1', '2', '3', '4']

In [None]:
def is_number(x):
    return is_instance(x, Number)

In [None]:
lambda x: isinstance(x, Number)

<function __main__.<lambda>(x)>

In [None]:
l

[10, 20, 30, 40, 'hello']

In [None]:
list(map(lambda l = [10, 20, 30, 40, 'hello']
all(map(lambda x: isinstance(x, Number), l))x: isinstance(x, Numl = [10, 20, 30, 40, 'hello']
all(map(lambda x: isinstance(x, Number), l))ber), l))

[True, True, True, True, False]

In [None]:
l = [10, 20, 30, 40, 'hello']
all(map(lambda x: isinstance(x, Number), l))

False

In [None]:
l = [10, 20, 30, 40]
all(map(lambda x: isinstance(x, Number), l))

True

In [None]:
l = [10, 20, 30, 40]
all(isinstance(x, Number) for x in l)

True

In [None]:
l = [10, 20, 30, 40, 'hello']
is_all_numbers = False
for item in l:
    if not isinstance(item, Number):
        break
else: # nobreak --> all numbers
    is_all_numbers = True
print(is_all_numbers)


l = [10, 20, 30, 40, 'hello']
all(isinstance(x, Number) for x in l)

False

In [None]:
with open('car-brands.txt', encoding="latin-1" ) as f:
    for row in f:
        print(len(row), row, end='')

11 Alfa Romeo
13 Aston Martin
5 Audi
8 Bentley
5 Benz
4 BMW
8 Bugatti
9 Cadillac
10 Chevrolet
9 Chrysler
8 Citroën
9 Corvette
4 DAF
6 Dacia
7 Daewoo
9 Daihatsu
7 Datsun
10 De Lorean
5 Dino
5 Dodge

In [None]:
with open('car-brands.txt', encoding="latin-1") as f:
    result = all(map(lambda row: len(row) >= 3, f))
print(result)

True


In [None]:
with open('car-brands.txt', encoding="latin-1") as f:
    result = any(map(lambda row: len(row) > 10, f))
print(result)

True


# Slicing ITerables

In [None]:
l = [1, 2, 3, 4, 5]

In [None]:
l[0:2]

[1, 2]

In [None]:
l[None:None]

[1, 2, 3, 4, 5]

In [None]:
s = slice(0, 2)

In [None]:
l[s]

[1, 2]

In [None]:
import math

def factorials(n):
    for i in range(n):
        yield math.factorial(i)

In [None]:
facts = factorials(100)

In [None]:
facts[0:2]

TypeError: 'generator' object is not subscriptable

In [None]:
def slice_(iterable, start, stop):
    for _ in range(0, start):
        next(iterable)
        
    for _ in range(start, stop):
        yield(next(iterable))

In [None]:
list(slice_(factorials(100), 1, 5))

[1, 2, 6, 24]

In [None]:
list(factorials(10))

[1, 1, 2, 6, 24, 120, 720, 5040, 40320, 362880]

In [None]:
from itertools import islice

In [None]:
islice(factorials(10), 0, 3)

<itertools.islice at 0x7f2dee9d47d0>

In [None]:
list(islice(factorials(10), 0, 3))

[1, 1, 2]

In [None]:
list(islice(factorials(10), 0, 10, 2))

[1, 2, 24, 720, 40320]

In [None]:
list(islice(factorials(10), None, None, 2))

[1, 2, 24, 720, 40320]

In [None]:
def factorials():
    index = 0
    while True:
        yield math.factorial(index)
        index += 1

In [None]:
facts = factorials()
for _ in range(5):
    print(next(facts))

1
1
2
6
24


In [None]:
list(islice(factorials(), 5))

[1, 1, 2, 6, 24]

In [None]:
def factorials():
    index = 0
    while True:
        print(f'yielding factorial({index})...')
        yield math.factorial(index)
        index += 1

list(islice(factorials(), 9))

yielding factorial(0)...
yielding factorial(1)...
yielding factorial(2)...
yielding factorial(3)...
yielding factorial(4)...
yielding factorial(5)...
yielding factorial(6)...
yielding factorial(7)...
yielding factorial(8)...


[1, 1, 2, 6, 24, 120, 720, 5040, 40320]

In [None]:
list(islice(factorials(), None, 10, 2))

yielding factorial(0)...
yielding factorial(1)...
yielding factorial(2)...
yielding factorial(3)...
yielding factorial(4)...
yielding factorial(5)...
yielding factorial(6)...
yielding factorial(7)...
yielding factorial(8)...
yielding factorial(9)...


[1, 2, 24, 720, 40320]

In [None]:
list(islice(factorials(), 5, 10))

yielding factorial(0)...
yielding factorial(1)...
yielding factorial(2)...
yielding factorial(3)...
yielding factorial(4)...
yielding factorial(5)...
yielding factorial(6)...
yielding factorial(7)...
yielding factorial(8)...
yielding factorial(9)...


[120, 720, 5040, 40320, 362880]

In [None]:
l = [1, 2, 3, 4, 5]

In [None]:
s = islice(l, 0, 3)

In [None]:
list(s)

[1, 2, 3]

In [None]:
list(s)

[]

In [None]:
facts = factorials()

In [None]:
next(facts), next(facts), next(facts), next(facts)

yielding factorial(0)...
yielding factorial(1)...
yielding factorial(2)...
yielding factorial(3)...


(1, 1, 2, 6)

In [None]:
list(islice(facts, 0, 3))

yielding factorial(4)...
yielding factorial(5)...
yielding factorial(6)...


[24, 120, 720]

In [None]:
next(facts)

yielding factorial(7)...


5040

In [None]:
def gen_cubes(n):
    for i in range(n):
        print(f'yielding {i}')
        yield i**3

In [None]:
def is_odd(x):
    return x % 2 == 1

In [None]:
is_odd(4), is_odd(81)

(False, True)

In [None]:
filtered = filter(is_odd, gen_cubes(10))

In [None]:
list(filtered)

yielding 0
yielding 1
yielding 2
yielding 3
yielding 4
yielding 5
yielding 6
yielding 7
yielding 8
yielding 9


[1, 27, 125, 343, 729]

In [None]:
def is_even(x):
    return x % 2 == 0

In [None]:
list(filter(is_even, gen_cubes(10)))

yielding 0
yielding 1
yielding 2
yielding 3
yielding 4
yielding 5
yielding 6
yielding 7
yielding 8
yielding 9


[0, 8, 64, 216, 512]

In [None]:
from itertools import filterfalse

In [None]:
evens = filterfalse(is_odd, gen_cubes(10))
list(evens)

yielding 0
yielding 1
yielding 2
yielding 3
yielding 4
yielding 5
yielding 6
yielding 7
yielding 8
yielding 9


[0, 8, 64, 216, 512]

In [None]:
from math import sin, pi

def sine_wave(n):
    start = 0
    max_ = 2 * pi
    step = (max_ - start) / (n-1)
    for _ in range(n):
        yield round(sin(start), 2)
        start += step 

In [None]:
list(sine_wave(15))

[0.0,
 0.43,
 0.78,
 0.97,
 0.97,
 0.78,
 0.43,
 0.0,
 -0.43,
 -0.78,
 -0.97,
 -0.97,
 -0.78,
 -0.43,
 -0.0]

In [None]:
from itertools import takewhile

list(takewhile(lambda x: 0 <= x <= 0.9, sine_wave(15)))

[0.0, 0.43, 0.78]

In [None]:
list(filter(lambda x: 0 <= x <= 0.9, sine_wave(15)))

[0.0, 0.43, 0.78, 0.78, 0.43, 0.0, -0.0]

In [None]:
from itertools import dropwhile

In [None]:
l = [1, 3, 5, 2, 1]

In [None]:
list(dropwhile(lambda x: x < 5, l))

[5, 2, 1]

In [None]:
data = ['a', 'b', 'c', 'd', 'e']
selectors = [True, False, 1, 0]

In [None]:
list(zip(data, selectors))

[('a', True), ('b', False), ('c', 1), ('d', 0)]

In [None]:
[item for item, truth_value in zip(data, selectors) if truth_value]

['a', 'c']

In [None]:
from itertools import compress

In [None]:
# [item for item, truth_value in zip(data, selectors) if truth_value]

data = ['a', 'b', 'c', 'd', 'e']
selectors = [True, False, 1, 0]
list(compress(data, selectors))

['a', 'c']

In [None]:
from itertools import (
    count,
    cycle,
    repeat, 
    islice)

In [None]:
g = count(10)

In [None]:
next(g)
f = count(10)
for i in range(10):
    next(f)
f

count(20)

In [None]:
list(islice(g, 5))

[13, 14, 15, 16, 17]

In [None]:
g = count(10, step=2)

In [None]:
list(islice(g, 5))

[10, 12, 14, 16, 18]

In [None]:
g = count(10.5, 0.5)

In [None]:
list(islice(g, 5))

[10.5, 11.0, 11.5, 12.0, 12.5]

In [None]:
g = count(1+1j, 1+2j)

In [None]:
list(islice(g, 5))

[(1+1j), (2+3j), (3+5j), (4+7j), (5+9j)]

In [None]:
from decimal import Decimal

In [None]:
g = count(Decimal('0.0'), Decimal('0.1'))

In [None]:
list(islice(g, 5))

[Decimal('0.0'),
 Decimal('0.1'),
 Decimal('0.2'),
 Decimal('0.3'),
 Decimal('0.4')]

In [None]:
g = cycle(('red', 'green', 'blue'))

In [None]:
list(islice(g, 8))

['red', 'green', 'blue', 'red', 'green', 'blue', 'red', 'green']

In [None]:
def colors():
    yield 'red'
    yield 'green'
    yield 'blue'

In [None]:
cols = colors()

In [None]:
list(cols)

['red', 'green', 'blue']

In [None]:
list(cols)

[]

In [None]:
cols = colors()
g = cycle(cols)

In [None]:
list(islice(g, 10))

['red', 'green', 'blue', 'red', 'green', 'blue', 'red', 'green', 'blue', 'red']

In [None]:
g

<itertools.cycle at 0x7f2def526780>

In [None]:
from collections import namedtuple

In [None]:
Card = namedtuple('Card', 'rank suit')

In [None]:
def card_deck():
    ranks = tuple(str(num) for num in range(2, 11)) + tuple('JQKA')
    suits = ('Spades', 'Hearts', 'Diamonds', 'Clubs')
    for suit in suits:
        for rank in ranks:
            yield Card(rank, suit)

In [None]:
hands = [list() for _ in range(4)]

hands

[[], [], [], []]

In [None]:
index = 0
for card in card_deck():
    index = index % 4
    hands[index].append(card)
    index += 1

In [None]:
hands

[[Card(rank='2', suit='Spades'),
  Card(rank='6', suit='Spades'),
  Card(rank='10', suit='Spades'),
  Card(rank='A', suit='Spades'),
  Card(rank='5', suit='Hearts'),
  Card(rank='9', suit='Hearts'),
  Card(rank='K', suit='Hearts'),
  Card(rank='4', suit='Diamonds'),
  Card(rank='8', suit='Diamonds'),
  Card(rank='Q', suit='Diamonds'),
  Card(rank='3', suit='Clubs'),
  Card(rank='7', suit='Clubs'),
  Card(rank='J', suit='Clubs')],
 [Card(rank='3', suit='Spades'),
  Card(rank='7', suit='Spades'),
  Card(rank='J', suit='Spades'),
  Card(rank='2', suit='Hearts'),
  Card(rank='6', suit='Hearts'),
  Card(rank='10', suit='Hearts'),
  Card(rank='A', suit='Hearts'),
  Card(rank='5', suit='Diamonds'),
  Card(rank='9', suit='Diamonds'),
  Card(rank='K', suit='Diamonds'),
  Card(rank='4', suit='Clubs'),
  Card(rank='8', suit='Clubs'),
  Card(rank='Q', suit='Clubs')],
 [Card(rank='4', suit='Spades'),
  Card(rank='8', suit='Spades'),
  Card(rank='Q', suit='Spades'),
  Card(rank='3', suit='Hearts'),


In [None]:
hands = [list() for _ in range(4)]

In [None]:
index_cycle = cycle([0, 1, 2, 3])
for card in card_deck():
    hands[next(index_cycle)].append(card)

In [None]:
hands

[[Card(rank='2', suit='Spades'),
  Card(rank='6', suit='Spades'),
  Card(rank='10', suit='Spades'),
  Card(rank='A', suit='Spades'),
  Card(rank='5', suit='Hearts'),
  Card(rank='9', suit='Hearts'),
  Card(rank='K', suit='Hearts'),
  Card(rank='4', suit='Diamonds'),
  Card(rank='8', suit='Diamonds'),
  Card(rank='Q', suit='Diamonds'),
  Card(rank='3', suit='Clubs'),
  Card(rank='7', suit='Clubs'),
  Card(rank='J', suit='Clubs')],
 [Card(rank='3', suit='Spades'),
  Card(rank='7', suit='Spades'),
  Card(rank='J', suit='Spades'),
  Card(rank='2', suit='Hearts'),
  Card(rank='6', suit='Hearts'),
  Card(rank='10', suit='Hearts'),
  Card(rank='A', suit='Hearts'),
  Card(rank='5', suit='Diamonds'),
  Card(rank='9', suit='Diamonds'),
  Card(rank='K', suit='Diamonds'),
  Card(rank='4', suit='Clubs'),
  Card(rank='8', suit='Clubs'),
  Card(rank='Q', suit='Clubs')],
 [Card(rank='4', suit='Spades'),
  Card(rank='8', suit='Spades'),
  Card(rank='Q', suit='Spades'),
  Card(rank='3', suit='Hearts'),


In [None]:
hands = [list() for _ in range(4)]

hands_cycle = cycle(hands)
for card in card_deck():
    next(hands_cycle).append(card)

In [None]:
hands

[[Card(rank='2', suit='Spades'),
  Card(rank='6', suit='Spades'),
  Card(rank='10', suit='Spades'),
  Card(rank='A', suit='Spades'),
  Card(rank='5', suit='Hearts'),
  Card(rank='9', suit='Hearts'),
  Card(rank='K', suit='Hearts'),
  Card(rank='4', suit='Diamonds'),
  Card(rank='8', suit='Diamonds'),
  Card(rank='Q', suit='Diamonds'),
  Card(rank='3', suit='Clubs'),
  Card(rank='7', suit='Clubs'),
  Card(rank='J', suit='Clubs')],
 [Card(rank='3', suit='Spades'),
  Card(rank='7', suit='Spades'),
  Card(rank='J', suit='Spades'),
  Card(rank='2', suit='Hearts'),
  Card(rank='6', suit='Hearts'),
  Card(rank='10', suit='Hearts'),
  Card(rank='A', suit='Hearts'),
  Card(rank='5', suit='Diamonds'),
  Card(rank='9', suit='Diamonds'),
  Card(rank='K', suit='Diamonds'),
  Card(rank='4', suit='Clubs'),
  Card(rank='8', suit='Clubs'),
  Card(rank='Q', suit='Clubs')],
 [Card(rank='4', suit='Spades'),
  Card(rank='8', suit='Spades'),
  Card(rank='Q', suit='Spades'),
  Card(rank='3', suit='Hearts'),


In [None]:
g = repeat('Python')
for _ in range(5):
    print(next(g))

Python
Python
Python
Python
Python


In [None]:
g = repeat('Python', 4)

In [None]:
list(g)

['Python', 'Python', 'Python', 'Python']

In [None]:
l = [1, 2, 3]

In [None]:
result = list(repeat(l, 3))

In [None]:
result

[[1, 2, 3], [1, 2, 3], [1, 2, 3]]

In [None]:
l is result[0], l is result[1], l is result[2]

(True, True, True)

In [None]:
result[0], result[1], result[2]

([1, 2, 3], [1, 2, 3], [1, 2, 3])

In [None]:
result[0][0] = 100

In [None]:
print(result[0], result[1], result[2])

[100, 2, 3] [100, 2, 3] [100, 2, 3]


In [None]:
l = [1, 2, 3]
result = [item[:] for item in repeat(l, 3)]

In [None]:
result

[[1, 2, 3], [1, 2, 3], [1, 2, 3]]

In [None]:
l is result[0], l is result[1], l is result[2]

(False, False, False)

In [None]:
result[0][0] = 100

result

[[100, 2, 3], [1, 2, 3], [1, 2, 3]]

# Assignment

For this project you are given a file that contains some parking ticket violations for NYC.

(It's just a tiny extract!)

If you're wondering where I get these data sets, Kaggle is an **excellent** source of data sets in a whole variety of topics: 
https://www.kaggle.com/

You have to sign up, but it's free.

If you want the full data set, it's available here: https://www.kaggle.com/new-york-city/nyc-parking-tickets/version/2#


For this sample data set, the file is named: 
```
nyc_parking_tickets_extract.csv
```


What are your goals?

##### Goal 1
Create a lazy iterator that will return a named tuple of the data in each row. The data types should be appropriate - i.e. if the column is a date, you should be storing dates in the named tuple, if the field is an integer, then it should be stored as an integer, etc.

##### Goal 2

Calculate the number of violations by car make.

##### Note:
Try to use lazy evaluation as much as possible - it may not always be possible though! That's OK, as long as it's kept to a minimum.
