# Efficiently combining, counting, and iterating

In [1]:
# data
names = ['Bulbasaur', 'Charmander', 'Squirtle']
hps = [45, 39, 44]

In [2]:
# combining objects

combined_zip = zip(names, hps)
print(type(combined_zip))

# zip type needs unpacking
[*combined_zip]

<class 'zip'>


[('Bulbasaur', 45), ('Charmander', 39), ('Squirtle', 44)]

In [3]:
# counting with loops
type_counts = {}
for poke_type in names:
    if poke_type not in type_counts:
        type_counts[poke_type] = 1
    else:
        type_counts[poke_type] += 1

type_counts

{'Bulbasaur': 1, 'Charmander': 1, 'Squirtle': 1}

In [4]:
# using Counter from the collection module
# more efficient approach

from collections import Counter

type_counts = Counter(names)
print(type_counts)

# order by highest to lowest counts

Counter({'Bulbasaur': 1, 'Charmander': 1, 'Squirtle': 1})


In [5]:
names = ['a','a', 'a', 'b', 'b', 'c', 'd', 'd', 'f', 'f', 'z','z', 'w']

In [8]:
%%timeit -r2 -n10
type_counts = {}
for poke_type in names:
    if poke_type not in type_counts:
        type_counts[poke_type] = 1
    else:
        type_counts[poke_type] += 1
print(type_counts)

{'a': 3, 'b': 2, 'c': 1, 'd': 2, 'f': 2, 'z': 2, 'w': 1}
{'a': 3, 'b': 2, 'c': 1, 'd': 2, 'f': 2, 'z': 2, 'w': 1}
{'a': 3, 'b': 2, 'c': 1, 'd': 2, 'f': 2, 'z': 2, 'w': 1}
{'a': 3, 'b': 2, 'c': 1, 'd': 2, 'f': 2, 'z': 2, 'w': 1}
{'a': 3, 'b': 2, 'c': 1, 'd': 2, 'f': 2, 'z': 2, 'w': 1}
{'a': 3, 'b': 2, 'c': 1, 'd': 2, 'f': 2, 'z': 2, 'w': 1}
{'a': 3, 'b': 2, 'c': 1, 'd': 2, 'f': 2, 'z': 2, 'w': 1}
{'a': 3, 'b': 2, 'c': 1, 'd': 2, 'f': 2, 'z': 2, 'w': 1}
{'a': 3, 'b': 2, 'c': 1, 'd': 2, 'f': 2, 'z': 2, 'w': 1}
{'a': 3, 'b': 2, 'c': 1, 'd': 2, 'f': 2, 'z': 2, 'w': 1}
{'a': 3, 'b': 2, 'c': 1, 'd': 2, 'f': 2, 'z': 2, 'w': 1}
{'a': 3, 'b': 2, 'c': 1, 'd': 2, 'f': 2, 'z': 2, 'w': 1}
{'a': 3, 'b': 2, 'c': 1, 'd': 2, 'f': 2, 'z': 2, 'w': 1}
{'a': 3, 'b': 2, 'c': 1, 'd': 2, 'f': 2, 'z': 2, 'w': 1}
{'a': 3, 'b': 2, 'c': 1, 'd': 2, 'f': 2, 'z': 2, 'w': 1}
{'a': 3, 'b': 2, 'c': 1, 'd': 2, 'f': 2, 'z': 2, 'w': 1}
{'a': 3, 'b': 2, 'c': 1, 'd': 2, 'f': 2, 'z': 2, 'w': 1}
{'a': 3, 'b': 2, 'c': 1, 'd': 2

In [9]:
%timeit -r2 -n10 Counter(names)
print(Counter(names))

4.67 µs ± 966 ns per loop (mean ± std. dev. of 2 runs, 10 loops each)
Counter({'a': 3, 'b': 2, 'd': 2, 'f': 2, 'z': 2, 'c': 1, 'w': 1})


# Combinations

In [10]:
# with loop

poke_types = ['Bug', 'Fire', 'Ghost', 'Grass', 'Water']
combos = []

for x in poke_types:
    for y in poke_types:
        if x == y:
            continue # back to 'for y in poke_type'
        if ((x, y) not in combos) & ((y, x) not in combos):
            combos.append((x , y))
            
print(combos)

[('Bug', 'Fire'), ('Bug', 'Ghost'), ('Bug', 'Grass'), ('Bug', 'Water'), ('Fire', 'Ghost'), ('Fire', 'Grass'), ('Fire', 'Water'), ('Ghost', 'Grass'), ('Ghost', 'Water'), ('Grass', 'Water')]


In [12]:
# itertools, combinations

# import library
from itertools import combinations
combos_list = combinations(poke_types,2)

# combinations type
print(type(combos_list))

# unzip combinations
print([*combos_list])

<class 'itertools.combinations'>
[('Bug', 'Fire'), ('Bug', 'Ghost'), ('Bug', 'Grass'), ('Bug', 'Water'), ('Fire', 'Ghost'), ('Fire', 'Grass'), ('Fire', 'Water'), ('Ghost', 'Grass'), ('Ghost', 'Water'), ('Grass', 'Water')]


# Set theory

In [1]:
# comparing objects

# leverage the branch of Mathematics, 'sets'

# python built-in set datatype


In [2]:
# data
list_a = ['Bulbsaur', 'Charmander', 'Squirtle']
list_b = ['Caterpie', 'Pidgey', 'Squirtle']

## Intersection method

In [3]:
# using for loop

in_common = []

for pokemon_a in list_a:
    for pokemon_b in list_b:
        if pokemon_a == pokemon_b:
            in_common.append(pokemon_a)
print(in_common)

['Squirtle']


In [6]:
# using set

set_a = set(list_a)
set_b = set(list_b)

# intersection method
print(set_a.intersection(set_b))

{'Squirtle'}


In [7]:
%%timeit

in_common = []

for pokemon_a in list_a:
    for pokemon_b in list_b:
        if pokemon_a == pokemon_b:
            in_common.append(pokemon_a)

591 ns ± 25.5 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)


In [8]:
%timeit set_a.intersection(set_b)

155 ns ± 2.97 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)


## Difference method (group - intersection)

In [9]:
# only exist in one set but not in another

set_a.difference(set_b)

{'Bulbsaur', 'Charmander'}

## Symmetric difference (union - intersection)

In [10]:
# union - intersection

set_a.symmetric_difference(set_b)

{'Bulbsaur', 'Caterpie', 'Charmander', 'Pidgey'}

## Union

In [12]:
# union
set_a.union(set_b)

{'Bulbsaur', 'Caterpie', 'Charmander', 'Pidgey', 'Squirtle'}

## Membership testing

In [13]:
names_list = ['Abomasnow', 'Abra', 'Absol', 'Zubat']
names_tuple = ('Abomasnow', 'Abra', 'Absol', 'Zubat')
names_set = {'Abomasnow', 'Abra', 'Absol', 'Zubat'}

In [16]:
# comparing runtime

list_runtime = %timeit 'Zubat' in names_list
tuple_runtime = %timeit 'Zubat' in names_tuple
set_runtime = %timeit 'Zubat' in names_set

# set runtime is the shortest.

72.3 ns ± 3.07 ns per loop (mean ± std. dev. of 7 runs, 10000000 loops each)
85.3 ns ± 8.84 ns per loop (mean ± std. dev. of 7 runs, 10000000 loops each)
40.1 ns ± 1.22 ns per loop (mean ± std. dev. of 7 runs, 10000000 loops each)


In [17]:
group = ['a', 'b', 'a', 'c', 'z', 'z']
group_set = set(group)
print(group_set)

{'z', 'c', 'b', 'a'}


# Eliminating loops

In [1]:
# looping is piece-by-piece operation, so often inefficient

# List of HP, Attack, Defense, Speed
poke_stats = [[90, 92, 75, 60], [25, 30, 15,90], [65, 130, 60, 74]]

# sum of the each stats
# For loop appoach
total = []
for row in poke_stats:
    total.append(sum(row))

In [2]:
# list comprehension
total = [sum(row) for row in poke_stats]

In [3]:
# map function
total = [*map(sum, poke_stats)]

In [4]:
# combinations

poke_types = ['Bug', 'Fire', 'Ghost', 'Grass', 'Water']

In [5]:
# Nested for loop approach
combos = []
for x in poke_types:
    for y in poke_types:
        if x == y:
            continue
        if ((x, y) not in combos) & ((y, x) not in combos):
            combos.append((x, y))

In [6]:
# built-in module approach
from itertools import combinations
combos2 = [*combinations(poke_types, 2)]

In [6]:
# eliminate loops with NumPy

import numpy as np

# List of HP, Attack, Defense, Speed
poke_stats = [[90, 92, 75, 60], [25, 30, 15,90], [65, 130, 60, 74]]

poke_stats = np.array(poke_stats)

# obtain mean value by rows
print(poke_stats.mean(axis = 1))

# obtain mean value by column
print(poke_stats.mean(axis = 0))

[79.25 40.   82.25]
[60.         84.         50.         74.66666667]


In [14]:
# writing better loops

# when inevitably you've got to use loops, at least write them better

# rules 1. move one-time calculations outside the loop

# data
names = ['Absol', 'Aron', 'Jynx', 'Natu', 'Onix']
attacks = np.array([130, 70, 50, 50, 45])

In [18]:
for pokemon, attack in zip(names, attacks):
    total_attack_avg = attacks.mean()
    
    if attack > total_attack_avg:
        print("{}'s attack: {} >  average: {}!".format(pokemon, attack, total_attack_avg))
    else:
        pass

Absol's attack: 130 >  average: 69.0!
Aron's attack: 70 >  average: 69.0!


In [24]:
%%timeit
for pokemon, attack in zip(names, attacks):
    total_attack_avg = attacks.mean()  # calculating the same value every loop
    
    if attack > total_attack_avg:
        "{}'s attack: {} >  average: {}!".format(pokemon, attack, total_attack_avg)
    else:
        pass

51.4 µs ± 1.87 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)


In [26]:
%%timeit
total_attack_avg = attacks.mean()  # moving one-time calculation outside the loop

for pokemon, attack in zip(names, attacks):
   
    if attack > total_attack_avg:
        "{}'s attack: {} >  average: {}!".format(pokemon, attack, total_attack_avg)
    else:
        pass

20.7 µs ± 286 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)


In [28]:
# rule 2. Use holistic conversions outside the loop

# data
names = ['Pikachu', 'Squirtile', 'Articuno']
legend_status = [False, False, True]
generations = [1, 1, 1]

In [36]:
poke_data = []
for poke_tuple in zip(names, legend_status, generations):
    poke_list = list(poke_tuple)  # typecast every loop
    poke_data.append(poke_list)

In [37]:
poke_data_tuples = []
for poke_tuple in zip(names, legend_status, generations):
    poke_data_tuples.append(poke_tuple)

# holistic conversion (all at once, not each loop)
poke_data = [*map(list, poke_data_tuples)]