# Generators

What are generators:

Advantages over lists?

In [4]:
# simple function
def square_numbers(numbers):
    result = []
    for i in numbers:
        result.append(i*i)
    return result

my_nums = range(1,6)
print my_nums

my_nums_squared = square_numbers(my_nums)
print my_nums_squared

[1, 2, 3, 4, 5]
[1, 4, 9, 16, 25]


In [7]:
# list comprehension is possible too
my_nums_squared_lc = [num*num for num in my_nums]
print my_nums_squared_lc

[1, 4, 9, 16, 25]


In [10]:
# take the above square_numbers function and turn it into a generator:
def gen_sqaure_numbers(numbers):
    for i in numbers:
        yield i*i  # note yield and not return

my_num_gen = gen_sqaure_numbers(my_nums)
print my_num_gen  # prints generator object

<generator object gen_sqaure_numbers at 0x7f32f8c2deb0>


In [11]:
# generators don't hold entire result in memory - yield one result at a time, waits for us to ask for next result
print next(my_num_gen) # asking for next value

1


In [12]:
print next(my_num_gen)

4


In [13]:
# note that each time we run next(gen) on generator, the next result is computed and returned
# if run next(gen) more times than what is contained by gen, will get error
# Generator has been exhausted
# usually use for loops with generators

# let's create another gen
my_num_gen2 = gen_sqaure_numbers(my_nums)
for num in my_num_gen2:  # for every item in generator
    print num


1
4
9
16
25


In [18]:
# the above example can be done with list comprehension - in a single line as shown above
# can also use list comprehension to create generator - generator expression

my_num_gen_ge = (num*num for num in my_nums)
print my_num_gen_ge

<generator object <genexpr> at 0x7f32f8be2190>


In [19]:
# can convert generator to list - compute all values from generator and return complete list
print list(my_num_gen_ge)
# however, converting to list loses the advantages/benefits associated with generators

[1, 4, 9, 16, 25]


In [26]:
# generators are better performance wise because it doesn't hold all values in memory - large array
# lets look at a better example to illustrate performance difference
import memory_profiler
import random
import time

names = ['John', 'Corey', 'Adam', 'Steve', 'Rick', 'Thomas']
majors = ['Math', 'Engineering', 'CompSci', 'Arts', 'Business']


# create a list for a given number of people
def people_list(num_people):
    result = []
    for i in xrange(num_people):
        person = {
                    'id': i,
                    'name': random.choice(names),
                    'major': random.choice(majors)
        }
        result.append(person)
    return result
# create a generator for a certain number of people
# note the use of xrange as opposed to range - similar to generators, xrange returns an xrange object
# that generates values as you need them - uses yielding
def people_generator(num_people):
    for i in xrange(num_people):
        person = {
                    'id': i,
                    'name': random.choice(names),
                    'major': random.choice(majors)
        }
    yield person
print 'Memory (Before List): {}MB'.format(memory_profiler.memory_usage())
# time how long it takes to run - list
t1 = time.clock()
people = people_list(1000000)
t2 = time.clock()

print 'Memory (AfterList): {}MB'.format(memory_profiler.memory_usage())
print 'List Took {} seconds'.format(t2-t1) 

print 'Memory (Before Gen): {}MB'.format(memory_profiler.memory_usage())
# time how long it takes to run - generator
t1_g = time.clock()
people_gen = people_generator(1000000)
t2_g = time.clock()

print 'Memory (After Gen): {}MB'.format(memory_profiler.memory_usage())
print 'Gen Took {} seconds'.format(t2_g-t1_g)

Memory (Before List): [367.40625]MB
Memory (AfterList): [374.921875]MB
List Took 1.849961 seconds
Memory (Before Gen): [374.921875]MB
Memory (After Gen): [374.921875]MB
Gen Took 8.10000000016e-05 seconds


In [None]:
# generators therefore give performance benefits in terms of memory usage and speed