# Testing multiprocess execution times

In [1]:
import multiprocessing as mp

In [2]:
import datetime
import os

class StopWatch:

    start = None
    
    def __init__(self):
        self.reset()
    
    def reset(self):
        self.start = datetime.datetime.now()
    
    def read(self, in_seconds=False):
        delta = datetime.datetime.now() - self.start
        if in_seconds:
            return (delta.total_seconds())
        return(delta)

### Prepare data and execute sequentially

In [3]:
import numpy as np
from time import time


np.random.RandomState(100)
arr = np.random.randint(0, 20, size=[200000, 20])
data = arr.tolist()
data[:5]

def howmany_within_range(row, minimum, maximum):
    """Returns how many numbers lie within `maximum` and `minimum` in a given `row`"""
    count = 0
    for n in row:
        if minimum <= n <= maximum:
            count = count + 1
    return count

results = []
sw = StopWatch()
for row in data:
    results.append(howmany_within_range(row, minimum=4, maximum=8))

print(sw.read())
print(results[:10])
#> [3, 1, 4, 4, 4, 2, 1, 1, 3, 3]

0:00:00.180513
[2, 7, 4, 7, 3, 6, 5, 5, 3, 3]


### Parallelizing using Pool.apply()

In [4]:
import multiprocessing as mp

# Step 1: Init multiprocessing.Pool()
pool = mp.Pool(mp.cpu_count())

# Step 2: `pool.apply` the `howmany_within_range()`
sw = StopWatch()
results = [pool.apply(howmany_within_range, args=(row, 4, 8)) for row in data]

# Step 3: Don't forget to close
pool.close()    

print(sw.read())
print(results[:10])
#> [3, 1, 4, 4, 4, 2, 1, 1, 3, 3]

0:00:12.670082
[2, 7, 4, 7, 3, 6, 5, 5, 3, 3]


In [5]:
import multiprocessing as mp

sw=StopWatch()
with mp.Pool(mp.cpu_count()) as pool:
    results = []
    for row in data:
        results.append(pool.apply(howmany_within_range, args=(row, 4, 8)))
        
print(sw.read())
print(results[:10])
#> [3, 1, 4, 4, 4, 2, 1, 1, 3, 3]

0:00:12.372885
[2, 7, 4, 7, 3, 6, 5, 5, 3, 3]


### Parallelizing using Pool.map()

In [6]:
import multiprocessing as mp

# Redefine, with only 1 mandatory argument.
def howmany_within_range_rowonly(row, minimum=4, maximum=8):
    count = 0
    for n in row:
        if minimum <= n <= maximum:
            count = count + 1
    return count

sw = StopWatch()
with mp.Pool(mp.cpu_count()) as pool:
    results = pool.map(howmany_within_range_rowonly, data)
                
print(sw.read())
print(results[:10])
#> [3, 1, 4, 4, 4, 2, 1, 1, 3, 3]

0:00:00.245415
[2, 7, 4, 7, 3, 6, 5, 5, 3, 3]


---

# Parallel processing with Pool.apply_async()


In [11]:
import multiprocessing as mp
pool = mp.Pool(mp.cpu_count())

results = []

### Step 1: Redefine, to accept `i`, the iteration number

In [12]:
def howmany_within_range2(i, row, minimum, maximum):
    """Returns how many numbers lie within `maximum` and `minimum` in a given `row`"""
    count = 0
    for n in row:
        if minimum <= n <= maximum:
            count = count + 1
    return (i, count)

### Step 2: Define callback function to collect the output in `results`

In [13]:
def collect_result(result):
    global results
    results.append(result)

### Step 3: Use loop to parallelize

In [14]:
sw = StopWatch()
for i, row in enumerate(data):
    pool.apply_async(howmany_within_range2, args=(i, row, 4, 8), callback=collect_result)

# Step 4: Close Pool and let all the processes complete    

pool.close()
pool.join()  # postpones the execution of next line of code until all processes in the queue are done.

print(sw.read())

0:00:06.237361


### Step 5: Sort results [OPTIONAL]

In [15]:
results.sort(key=lambda x: x[0])
results_final = [r for i, r in results]

print(results_final[:10])

[2, 7, 4, 7, 3, 6, 5, 5, 3, 3]


# Parallel processing with Pool.apply_async() without callback function

In [16]:
import multiprocessing as mp
pool = mp.Pool(mp.cpu_count())

results = []

### call apply_async() without callback

In [17]:
len(data)

200000

In [18]:
sw = StopWatch()
result_objects = [pool.apply_async(howmany_within_range2, args=(i, row, 4, 8)) for i, row in enumerate(data)]
pool.close()
pool.join()
print(sw.read())

0:00:05.670303


### result_objects is a list of pool.ApplyResult objects

In [19]:
results = [r.get()[1] for r in result_objects]


print(results[:10])

[2, 7, 4, 7, 3, 6, 5, 5, 3, 3]
