## Itertools.groupby

In [1]:
import itertools
import string
import random 

In [2]:
string.ascii_lowercase

'abcdefghijklmnopqrstuvwxyz'

In [3]:
x = random.choices(string.ascii_lowercase, k=20)
print(x)

['v', 'p', 'g', 'j', 't', 't', 'k', 'c', 'n', 'f', 'y', 'b', 'k', 'h', 'n', 'x', 'i', 'z', 'o', 'l']


In [4]:
x = sorted(x)  
print(x)

['b', 'c', 'f', 'g', 'h', 'i', 'j', 'k', 'k', 'l', 'n', 'n', 'o', 'p', 't', 't', 'v', 'x', 'y', 'z']


__NOTE:__ To use groupby, first the elements need to be sorted

In [5]:
for key, group in itertools.groupby(x):
    print(f'key: {key}, group:{list(group)}')

key: b, group:['b']
key: c, group:['c']
key: f, group:['f']
key: g, group:['g']
key: h, group:['h']
key: i, group:['i']
key: j, group:['j']
key: k, group:['k', 'k']
key: l, group:['l']
key: n, group:['n', 'n']
key: o, group:['o']
key: p, group:['p']
key: t, group:['t', 't']
key: v, group:['v']
key: x, group:['x']
key: y, group:['y']
key: z, group:['z']


In [6]:
data= [
    ('213123123123', 'ABC'),
    ('45345453453453', 'BCD'),
    ('999999999999', 'BCD'),
    ('4354534534534', 'EFG'),
    ('45345453453453', 'FGH'),
    ('3243242344324343', 'ABC'),
    ('5555577777777', 'DEF'),
]


for key, group in itertools.groupby(data, key=lambda x:x[1]):
    print(f'{key} ==> {list(group)} ')

ABC ==> [('213123123123', 'ABC')] 
BCD ==> [('45345453453453', 'BCD'), ('999999999999', 'BCD')] 
EFG ==> [('4354534534534', 'EFG')] 
FGH ==> [('45345453453453', 'FGH')] 
ABC ==> [('3243242344324343', 'ABC')] 
DEF ==> [('5555577777777', 'DEF')] 


__NOTE:__ Notice that keys are duplicated. This is because of the data is unsorted

In [7]:
data = sorted(data, key=lambda x:x[1])
data

[('213123123123', 'ABC'),
 ('3243242344324343', 'ABC'),
 ('45345453453453', 'BCD'),
 ('999999999999', 'BCD'),
 ('5555577777777', 'DEF'),
 ('4354534534534', 'EFG'),
 ('45345453453453', 'FGH')]

In [8]:
for key, group in itertools.groupby(data, key=lambda x:x[1]):
    print(f'{key} ==> {list(group)} ')

ABC ==> [('213123123123', 'ABC'), ('3243242344324343', 'ABC')] 
BCD ==> [('45345453453453', 'BCD'), ('999999999999', 'BCD')] 
DEF ==> [('5555577777777', 'DEF')] 
EFG ==> [('4354534534534', 'EFG')] 
FGH ==> [('45345453453453', 'FGH')] 


In [9]:
# Ex: group by the even and odd 

def check_even(n):
    if n%2:
        return 'odd'
    return 'even'

def grouper(iterable):
    iterable = sorted(iterable, key= check_even)
    print(iterable)
    
    for key , group in itertools.groupby(iterable, check_even):
        print(f'{key} ===> {list(group)}')
        

In [10]:
x = random.sample(list(range(1, 50)), 5)
print(x)

[25, 49, 5, 16, 21]


In [11]:
grouper(x)

[16, 25, 49, 5, 21]
even ===> [16]
odd ===> [25, 49, 5, 21]


## Itertools.compress
- It can help to get multiple selected elements, with their indices, at once

In [12]:
list(itertools.compress(['a', 'b', 'c', 'd', 'e', 'f'], [True, False, False, True, True, False]))

['a', 'd', 'e']

In [13]:
list(itertools.compress(['a', 'b', 'c', 'd', 'e', 'f'], [0, 1, 1, 1, 1, 1, 1]))

['b', 'c', 'd', 'e', 'f']

## Itertools.count
- Infinite iterator that counts up from initialized value

In [14]:
x = itertools.count(10)
print(x)

count(10)


In [15]:
type(x)

itertools.count

In [16]:
i = 0
while i < 30:
    print(next(x), end= ',')
    i += 1

10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,

In [17]:
x = itertools.count(3)
i = 0
while i < 30:
    print(next(x), end= ',')
    i += 1

3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,

In [18]:
x = itertools.count(3, 2) # (initial_value, step)
i = 0
while i < 30:
    print(next(x), end= ',')
    i += 1

3,5,7,9,11,13,15,17,19,21,23,25,27,29,31,33,35,37,39,41,43,45,47,49,51,53,55,57,59,61,

In [20]:
x = itertools.count(3, 5) # (initial_value, step)
i = 0
while i < 30:
    print(next(x), end= ',')
    i += 1

3,8,13,18,23,28,33,38,43,48,53,58,63,68,73,78,83,88,93,98,103,108,113,118,123,128,133,138,143,148,

In [21]:
# count down 
x = itertools.count(3, -1) # (initial_value, step)
i = 0
while i < 30:
    print(next(x), end= ',')
    i += 1

3,2,1,0,-1,-2,-3,-4,-5,-6,-7,-8,-9,-10,-11,-12,-13,-14,-15,-16,-17,-18,-19,-20,-21,-22,-23,-24,-25,-26,

In [23]:
for i in itertools.islice(range(10), 5):
    print(i, end=',')

0,1,2,3,4,

In [25]:
for i in itertools.islice(itertools.count(10, -1), 5):
    print(i, end=',')

10,9,8,7,6,

In [26]:
list(itertools.islice(itertools.count(10, -1), 5))

[10, 9, 8, 7, 6]

In [27]:
list(itertools.islice(itertools.count(10, -1), 5, 10)) # start, end

[5, 4, 3, 2, 1]

In [28]:
list(itertools.islice(itertools.count(10, -1), 5, 10, 2)) # start, end, step

[5, 3, 1]

In [29]:
x = iter([1, 2, 3, 4])
y = iter((5, 6, 7, 8, 9))
z = iter({10, 11})

print(x, y, z)

<list_iterator object at 0x0000000005E3E320> <tuple_iterator object at 0x0000000005E3E2B0> <set_iterator object at 0x0000000005DD8708>


In [30]:
x + y

TypeError: unsupported operand type(s) for +: 'list_iterator' and 'tuple_iterator'

In [31]:
list(itertools.chain(x, y, z))

[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]

In [43]:
x = iter([1, 2, 3, 4])
y = iter((5, 6, 7, 8, 9))
z = [x, y]
z

[<list_iterator at 0x643f198>, <tuple_iterator at 0x643f128>]

In [45]:
list(itertools.chain(z)) # Limitation

[<list_iterator at 0x643f198>, <tuple_iterator at 0x643f128>]

In [44]:
list(itertools.chain(*z)) 

[1, 2, 3, 4, 5, 6, 7, 8, 9]

In [47]:
x = iter([1, 2, 3, 4])
y = iter((5, 6, 7, 8, 9))
z = [x, y]
list(itertools.chain.from_iterable(z)) 

[1, 2, 3, 4, 5, 6, 7, 8, 9]

In [51]:
a1 = (1, 2, 3, 4, 5, 6)
a2 = ('a', 'b')

print(list(zip(a1, a2)))
print(list(itertools.zip_longest(a1, a2)))
print(list(itertools.zip_longest(a1, a2, fillvalue=0)))

[(1, 'a'), (2, 'b')]
[(1, 'a'), (2, 'b'), (3, None), (4, None), (5, None), (6, None)]
[(1, 'a'), (2, 'b'), (3, 0), (4, 0), (5, 0), (6, 0)]


## Itertools - dropwhile and takewhile

In [52]:
evens = list(range(0, 10, 2))
print(evens)

[0, 2, 4, 6, 8]


In [53]:
list(filter(lambda x:x>2, evens))

[4, 6, 8]

In [54]:
list(itertools.dropwhile(lambda x:x>2, evens))

[0, 2, 4, 6, 8]