In [1]:
#Python (Basics)

In [64]:
import numpy as np
from time import time
import multiprocessing as mp

In [65]:
def howmany_within_range(row, minimum=4, maximum=8):
    """Returns how many numbers lie within `maximum` and `minimum` in a given `row`"""
    count = 0
    for n in row:
        if minimum <= n <= maximum:
            count = count + 1
    return count

In [66]:
# Prepare data
np.random.RandomState(100)
arr = np.random.randint(0, 10, size=[5000, 10000])
data = arr.tolist()
print(arr.shape, len(data))

(5000, 10000) 5000


In [67]:
# Solution Without Paralleization
ini = time()
results = []
for row in data:
    results.append(howmany_within_range(row, minimum=4, maximum=8))
print(time() - ini)
print(results[:5])

3.261914014816284
[4885, 5038, 4934, 5071, 4963]


In [68]:
#Parallelization
"""
Parallelism consists of performing *multiple operations* at the same time. 
Multiprocessing entails spreading tasks over a computer’s central 
processing units (CPUs, or cores). Multiprocessing is well-suited for 
CPU-bound tasks: tightly bound for loops and mathematical computations usually fall into this category.
"""

'\nParallelism consists of performing *multiple operations* at the same time. \nMultiprocessing entails spreading tasks over a computer’s central \nprocessing units (CPUs, or cores). Multiprocessing is well-suited for \nCPU-bound tasks: tightly bound for loops and mathematical computations usually fall into this category.\n'

In [69]:
print("Num cpu's:", mp.cpu_count())

Num cpu's: 40


In [70]:
#Pool + map
ini = time()
pool = mp.Pool(3)
results = pool.map(howmany_within_range, [row for row in data])
pool.close()

print(time() - ini)
print(results[:5])

2.7608766555786133
[4885, 5038, 4934, 5071, 4963]


In [73]:
#JobLib
from joblib import Parallel, delayed

In [74]:
ini = time()
Parallel(n_jobs=3, backend="multiprocessing")(delayed(howmany_within_range)(row) for row in data)
print(time() - ini)
print(results[:5])

3.0009326934814453
[4885, 5038, 4934, 5071, 4963]


In [23]:
#Asynchronism

In [None]:
"""
When you execute something synchronously, you wait for it to finish before moving on to another task. When you 
execute something asynchronously, you can move on to another task before it finishes.
"""

In [84]:
#Pool + apply_async
ini = time()
pool = mp.Pool(3)
results = [pool.apply_async(howmany_within_range, args=(row, 4, 8)) for row in data]
pool.close() 
pool.join()

print(time() - ini)
print([results[i].get() for i in range(5)])

3.0649795532226562
[4885, 5038, 4934, 5071, 4963]
