# Lesson 34: Python Advanced - Iterators

## Iterators and iterable types - introduction

In [1]:
# Here we will see how the "for" loop works using dates:

import datetime as dt
import sys

start = dt.datetime.now()
print("Execution started at: {}".format(start))

dates = [dt.date(2000,1,1) + dt.timedelta(days = i) for i in range(250000)]
print("Size of dates is {}".format(sys.getsizeof(dates)))

for d in dates:
    pass

stop = dt.datetime.now()

print("Execution stopped at: {}".format(stop))
print("Total time: {}".format(stop - start))

Execution started at: 2023-02-01 12:01:47.915821
Size of dates is 2115944
Execution stopped at: 2023-02-01 12:01:48.179676
Total time: 0:00:00.263855


In [2]:
# Now we will try to make this process faster and smaller in size:

import datetime as dt
import sys

start = dt.datetime.now()
print("Execution started at: {}".format(start))

# We will create the class to produce a list of maxdays:

class MillionDays:
    
    def __init__(self, year, month, day, maxdays):
        self.date = dt.date(year, month, day)
        self.maxdays = maxdays
        
    def __next__(self):
        ret = self.date
        self.date += dt.timedelta(days = 1)
        return ret

# Here I use the class to create a starting date:
md = MillionDays(2000, 1, 1, 250000)

# I check if the class with method next() works:
# print(next(md))
# print(next(md))
# print(next(md))

# Because the method next() works I can make a loop to call it 250000:

for i in range(250000):
    next(md)
    
print("Size of dates is {}".format(sys.getsizeof(md)))

stop = dt.datetime.now()

print("Execution stopped at: {}".format(stop))
print("Total time: {}".format(stop - start))

# Note that now, with class, the loop process is a bit fatser and it takes much less memory than in a standard way.

Execution started at: 2023-02-01 12:01:48.197432
Size of dates is 48
Execution stopped at: 2023-02-01 12:01:48.422249
Total time: 0:00:00.224817


In [3]:
# I can do the same thing by extending the class and making a shorted version of "for" loop:

import datetime as dt
import sys

start = dt.datetime.now()
print("Execution started at: {}".format(start))

class MillionDays:
    
    def __init__(self, year, month, day, maxdays):
        self.date = dt.date(year, month, day)
        self.maxdays = maxdays
        
    # I have to modify this function to introduce when the __next__() should stop working:
    # The class is iterable if it poses __next__ method.
    
    def __next__(self):
        if self.maxdays <= 0:
            raise StopIteration()
        ret = self.date
        self.date += dt.timedelta(days = 1)
        self.maxdays -= 1
        return ret
    
    # The class is an iterator if it has __iter__ method.
    
    def __iter__(self):
        return self
    
md = MillionDays(2000, 1, 1, 250000)
print("Size of dates is {}".format(sys.getsizeof(md)))

# Sometimes, if an object is not iterable, I can add the function iter() to this object:
# for i in iter(md):
#     pass

# But in such a case, there would be an OverFlowError, because maxdays are not imposed anywhere.

# Because iter() does not work so simply, we decided to extend the class so that iter() is there and maxdays
# are imposed in the __next__ method.
# Now the loop works:

for i in md:
    pass


Execution started at: 2023-02-01 12:01:48.435068
Size of dates is 48


## __getitem__ method

In [8]:
# It appears that to make the class iterable we do not have to define the method next() in the class.
# Instead we can use __getitem__ method:

class MillionDays:
    
    def __init__(self, year, month, day, maxdays):
        self.date = dt.date(year, month, day)
        self.maxdays = maxdays
        
    # We define the method __getitem__. It is used to generate an arbitrary day in my calendar by properly setting
    # the "item"
    
    def __getitem__(self, item):
        if item <= self.maxdays:
            return self.date + dt.timedelta(days = item)
        else:
            raise StopIteration()
            
md = MillionDays(2000, 1, 1, 25)

# To show the consequtive days, we use:

print(md[0], md[1], md[3], md[10])

# Now we check if the next() function works:
# print(next(md), next(md), next(md))
# But because we do not have __next__ defined so the class is not an iterator now and we get an error.

# BUT, we can create in this case an artificial iterator, because we see that the class has all properties
# of being iterable (note that iter() is different from __iter__ as it is outside of the class)

it = iter(md)

# And now we can call the next() function:

print(next(it), next(it), next(it))

# Now the "for" loop also works, (but note to change the maxdays, otherwise it will take to long too generate all
# values):

for d in md:
    print(d)
        

2000-01-01 2000-01-02 2000-01-04 2000-01-11
2000-01-01 2000-01-02 2000-01-03
2000-01-01
2000-01-02
2000-01-03
2000-01-04
2000-01-05
2000-01-06
2000-01-07
2000-01-08
2000-01-09
2000-01-10
2000-01-11
2000-01-12
2000-01-13
2000-01-14
2000-01-15
2000-01-16
2000-01-17
2000-01-18
2000-01-19
2000-01-20
2000-01-21
2000-01-22
2000-01-23
2000-01-24
2000-01-25
2000-01-26


## Iterators for system's types

In [11]:
# A tuple is iterable but it is not an iterator:

aTuple = (2, 4, 6, 8)

for x in aTuple:
    print(x)

# This gives an error:
# print(next(aTuple))

# So we define an iterator:

it = iter(aTuple)
print(next(it))

2
4
6
8
2


In [16]:
# A list is iterable, and it is not an iterator:

aList = [1, 3, 5, 7, 9]

for x in aList:
    print(x)
    
# This does not work:
# print(next(aList))

it = iter(aList)
print(next(it))


1
3
5
7
9
1


In [17]:
# The same holds for: a set (aSet = {some elements})

In [19]:
# Now we try an lines of a file:

with open("function_log.txt") as file:
    for line in file:
        print(line)

# So the file is iterable.



----------------------------------------

Function 'ChangeSalary' started at 2023-01-12T01:26:39.996997

Following parameters were used:

Jonson 2000 True



Function returned 2000



----------------------------------------

Function 'ChangeSalary' started at 2023-01-12T01:26:40.001415

Following parameters were used:

Jonson 2000

is_bonus=True

Function returned 2000





In [2]:
# Now we do a similar thing but with while loop:

with open("function_log.txt") as file:
    while True:
        try:
            print(next(file))
        except StopIteration:
            break
            
# We had to add the try-except command to stop the loop after getting the last line of the file.

# So the object "file" is iterable and it has its own iterator.



----------------------------------------

Function 'ChangeSalary' started at 2023-01-12T01:26:39.996997

Following parameters were used:

Jonson 2000 True



Function returned 2000



----------------------------------------

Function 'ChangeSalary' started at 2023-01-12T01:26:40.001415

Following parameters were used:

Jonson 2000

is_bonus=True

Function returned 2000





## A class with external iterator

In [13]:
# Before we saw that the class with iterator has __next__ and __iter__ methods. 
# Now we will see how to build an iterator from a different class:

import datetime as dt

# We define a class
class MillionDaysIterator:
    
    def __init__(self, date, maxdays):
        self.date = date
        self.maxdays = maxdays
    
    def __next__(self):
        if self.maxdays <= 0:
            raise StopIteration()
        ret = self.date
        self.date += dt.timedelta(days = 1)
        self.maxdays -= 1
        return ret
    
    
# My old class does not have noe the __next__ method, but it calls the class with iterator:

class MillionDays:
    
    def __init__(self, year, month, day, maxdays):
        self.date = dt.date(year, month, day)
        self.maxdays = maxdays
        self.iterator = MillionDaysIterator(self.date, self.maxdays)

    # Here I return not the class itself, but the iterator:
    def __iter__(self):
        return self.iterator
    
# Note that now, my first class is an iterator and the second class is iterable. To iterate over the second
# class, I used the iterator of the first class.
    
md = MillionDays(2000, 1, 1, 3)

# To use next, I make the instance "md" of the second class an iterator by using the function iter().

it = iter(md)
print(next(it))

for d in md:
    print(d)

2000-01-01
2000-01-02
2000-01-03
