In [1]:
import concurrent.futures
import time

In [2]:
number_list = list(range(1, 11))

In [13]:
# impractical function that counts from 0 to 10 million and then returns a product of two numbers (i*number)
def count(number):
    for i in range(0,10000000):
        i += 1
    return i*number

In [37]:
# this function calls count and prints the result 
def evaluate(item):
    result_item = count(item)
    print('Item %s, result %s' % (item, result_item))
    return result_item

#### Sequential / synchronously

In [38]:
# this snippet calls the evaluate method synchronously 
start_time = time.perf_counter()
for item in number_list:
    evaluate(item)
    
print(f'Sequential Execution in {(time.perf_counter() - start_time)} seconds')

# on my machine it took 4.84 seconds

Item 1, result 10000000
Item 2, result 20000000
Item 3, result 30000000
Item 4, result 40000000
Item 5, result 50000000
Item 6, result 60000000
Item 7, result 70000000
Item 8, result 80000000
Item 9, result 90000000
Item 10, result 100000000
Sequential Execution in 5.089225718999842 seconds


#### Pool class

In [46]:
from multiprocessing import Pool

start_time = time.perf_counter()
if __name__ == '__main__':
    with Pool(5) as p:
        print(p.map(evaluate, number_list))
print(f'Sequential Execution in {(time.perf_counter() - start_time)} seconds')

Item 2, result 20000000
Item 4, result 40000000
Item 1, result 10000000
Item 5, result 50000000
Item 3, result 30000000
Item 9, result 90000000
Item 10, result 100000000
Item 7, result 70000000
Item 8, result 80000000
Item 6, result 60000000
[10000000, 20000000, 30000000, 40000000, 50000000, 60000000, 70000000, 80000000, 90000000, 100000000]
Sequential Execution in 1.1557320900001287 seconds


#### Thread pool with five workers

In [21]:
# now lets use the ThreadPoolExecutor with 5 workers
start_time = time.perf_counter()
with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor:
    for item in number_list:
        executor.submit(evaluate, item)
print(f'Sequential Execution in {(time.perf_counter() - start_time)} seconds')

# on my machine it took 5.01 seconds

Item 1, result 10000000
Item 2, result 20000000
Item 4, result 40000000
Item 3, result 30000000
Item 5, result 50000000
Item 6, result 60000000Item 8, result 80000000

Item 7, result 70000000
Item 9, result 90000000
Item 10, result 100000000
Sequential Execution in 5.017712871000072 seconds


#### Process pool with five workers

In [27]:
# now lets use the ProcessPoolExecutor

start_time = time.perf_counter()
with concurrent.futures.ProcessPoolExecutor(max_workers=5) as executor:
    for item in number_list:
        executor.submit(evaluate, item)
print(f'Sequential Execution in {(time.perf_counter() - start_time)} seconds')

# on my machine it took 1.12 seconds

Item 5, result 50000000
Item 2, result 20000000
Item 3, result 30000000
Item 4, result 40000000
Item 1, result 10000000
Item 10, result 100000000
Item 8, result 80000000
Item 6, result 60000000
Item 7, result 70000000
Item 9, result 90000000
Sequential Execution in 1.120922174000043 seconds


#### Why such a big difference between ThreadPool and ProcessPool?  
The main reason is that, unlike threads, pool of processes do not require any synchronization mechanisms.

#### Example of fetching urls with ThreadPool and ProcessPool  

In [32]:
import concurrent.futures
import urllib.request
 
URLS = ['https://www.google.com/',
        'https://www.cnn.com/',
        'https://www.foxnews.com',
        'https://www.espn.com',
        'https://www.yahoo.com',
        'http://this-domain-does-not-exist-i-think.com/']
 
# Retrieve a single page and report the url and contents
def load_url(url, timeout):
    with urllib.request.urlopen(url, timeout=timeout) as conn:
        return conn.read()
    
start_time = time.perf_counter() 
# We can use a with statement to ensure threads are cleaned up promptly
with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor:
    # Start the load operations and mark each future with its URL
    future_to_url = {executor.submit(load_url, url, 60): url for url in URLS}
    for future in concurrent.futures.as_completed(future_to_url):
        url = future_to_url[future]
        try:
            data = future.result()
        except Exception as exc:
            print('%r generated an exception: %s' % (url, exc))
        else:
            print('%r page is %d bytes' % (url, len(data)))
            
print(f'Sequential Execution in {(time.perf_counter() - start_time)} seconds')

'https://www.google.com/' page is 13544 bytes
'http://this-domain-does-not-exist-i-think.com/' generated an exception: <urlopen error [Errno 8] nodename nor servname provided, or not known>
'https://www.foxnews.com' page is 322984 bytes
'https://www.espn.com' page is 456840 bytes
'https://www.cnn.com/' page is 1124679 bytes
'https://www.yahoo.com' page is 409066 bytes
Sequential Execution in 0.8635887099999309 seconds


In [33]:
import concurrent.futures
import urllib.request
 
URLS = ['https://www.google.com/',
        'https://www.cnn.com/',
        'https://www.foxnews.com',
        'https://www.espn.com',
        'https://www.yahoo.com',
        'http://this-domain-does-not-exist-i-think.com/']
 
# Retrieve a single page and report the url and contents
def load_url(url, timeout):
    with urllib.request.urlopen(url, timeout=timeout) as conn:
        return conn.read()
    
start_time = time.perf_counter() 
# We can use a with statement to ensure threads are cleaned up promptly
with concurrent.futures.ProcessPoolExecutor(max_workers=5) as executor:
    # Start the load operations and mark each future with its URL
    future_to_url = {executor.submit(load_url, url, 60): url for url in URLS}
    for future in concurrent.futures.as_completed(future_to_url):
        url = future_to_url[future]
        try:
            data = future.result()
        except Exception as exc:
            print('%r generated an exception: %s' % (url, exc))
        else:
            print('%r page is %d bytes' % (url, len(data)))
            
print(f'Sequential Execution in {(time.perf_counter() - start_time)} seconds')

'https://www.google.com/' page is 12824 bytes
'http://this-domain-does-not-exist-i-think.com/' generated an exception: <urlopen error [Errno 8] nodename nor servname provided, or not known>
'https://www.foxnews.com' page is 322984 bytes
'https://www.espn.com' page is 456842 bytes
'https://www.cnn.com/' page is 1124679 bytes
'https://www.yahoo.com' page is 405484 bytes
Sequential Execution in 0.8694377980000354 seconds
