## Lab 9: Concurrency 

In [4]:
import time
import threading

def do_something():
    print('Sleeping 1 second...')
    time.sleep(1)
    print('Done sleeping')

In [5]:
start = time.time()

do_something()
do_something()

finish = time.time()

print(f'Finished in {(finish - start):.1f} seconds')

Sleeping 1 second...
Done sleeping
Sleeping 1 second...
Done sleeping
Finished in 2.0 seconds


#### Sequential code

<img   src="images/im1.png" alt="Drawing" style="width: 500px;"/>


#### Multi threaded code

<img   src="images/im2.png" alt="Drawing" style="width: 500px;"/>


In [7]:
#things are not paralle here, 
#func2 only starts running when func one starts waiting

start = time.time()

t1 = threading.Thread(target = do_something)
t2 = threading.Thread(target = do_something)

t1.start()
t2.start()

#wait for the threads to finish
t1.join()
t2.join()

finish = time.time()

print(f'Finished in {(finish - start):.1f} seconds')
#most of the time is spent in waiting

Sleeping 1 second...Sleeping 1 second...

Done sleeping
Done sleeping
Finished in 1.0 seconds


In [8]:
start = time.time()

t1 = threading.Thread(target = do_something)
t2 = threading.Thread(target = do_something)

t1.start()
t2.start()

#wait for the threads to finish
t1.join()
t2.join()

finish = time.time()

print(f'Finished in {(finish - start):.1f} seconds')

Sleeping 1 second...
Sleeping 1 second...
Finished in 0.0 seconds
Done sleeping
Done sleeping


In [9]:
def do_something_specific(seconds):
    print(f'Sleeping {seconds} seconds...')
    time.sleep(seconds)
    print('Done sleeping')

In [10]:
start = time.time()

threads = []

for _ in range(10):
    t = threading.Thread(target = do_something_specific, 
                         args = [2])
    t.start()
    threads.append(t)

for thread in threads:
    thread.join()

finish = time.time()

print(f'Finished in {(finish - start):.1f} seconds')

Sleeping 2 seconds...
Sleeping 2 seconds...
Sleeping 2 seconds...
Sleeping 2 seconds...Sleeping 2 seconds...

Sleeping 2 seconds...Sleeping 2 seconds...

Sleeping 2 seconds...
Sleeping 2 seconds...
Sleeping 2 seconds...
Done sleepingDone sleeping

Done sleeping
Done sleepingDone sleepingDone sleeping


Done sleepingDone sleepingDone sleeping


Done sleeping
Finished in 2.0 seconds


### Queue

In [13]:
from queue import Queue 

my_queue = Queue(maxsize=0) #FIFO queue
my_queue.put(1)
my_queue.put(2)
my_queue.put(3)
print(my_queue.get()) #first thing in first thing out
print(my_queue.get()) #first thing in first thing out
print(my_queue)

1
2
<queue.Queue object at 0x1140ac6a0>


In [17]:
q = Queue()
num_threads = 2

In [18]:
def worker():
    while True:
        #print the thread id of the code
        print(f'Waiting for message, id = {threading.get_ident()}')
        item = q.get()
        print(f'Message received = {item}, id = {threading.get_ident()}')
        
        if item is not None:
            do_work(item)
            q.task_done() 
            #this thread is done getting the item from the queue
        else:
            q.task_done()
            break

In [22]:
def do_work(item):
    print(f'Processing message .... {threading.get_ident()} -- {item}')
    time.sleep(2)
    print(f'Message processed .... {threading.get_ident()} -- {item}')

In [23]:
threads = []

for _ in range(num_threads):
    t = threading.Thread(target = worker)
    t.start()
    threads.append(t)

Waiting for message, id = 123145546878976
Waiting for message, id = 123145552134144


In [24]:
#Add items to queue
for item in ['wuphf','dot','com']:
    q.put(item)
# id receiving it the same as id processing it

Message received = wuphf, id = 123145541623808Message received = dot, id = 123145536368640Message received = com, id = 123145546878976
Processing message .... 123145536368640 -- dot

Processing message .... 123145541623808 -- wuphf

Processing message .... 123145546878976 -- com
Message processed .... 123145536368640 -- dotMessage processed .... 123145541623808 -- wuphfMessage processed .... 123145546878976 -- com
Waiting for message, id = 123145536368640

Waiting for message, id = 123145541623808

Waiting for message, id = 123145546878976


In [25]:
#Print all running threads
threading.enumerate()

[<_MainThread(MainThread, started 4607161792)>,
 <Thread(Thread-2, started daemon 123145513201664)>,
 <Heartbeat(Thread-3, started daemon 123145518456832)>,
 <HistorySavingThread(IPythonHistorySavingThread, started 123145524785152)>,
 <ParentPollerUnix(Thread-1, started daemon 123145531113472)>,
 <Thread(Thread-18, started 123145536368640)>,
 <Thread(Thread-19, started 123145541623808)>,
 <Thread(Thread-20, started 123145546878976)>,
 <Thread(Thread-21, started 123145552134144)>]

In [26]:
#stop threads
for _ in range(num_threads):
    q.put(None)

Message received = None, id = 123145536368640
Message received = None, id = 123145552134144


### Multiprocessing

Tasks are executed on multiple processors / cpus

In [3]:
import multiprocessing as mp
print("Number of processors: ", mp.cpu_count())

Number of processors:  4


### Example: Calculate e^x of elements of an array

In [6]:
import numpy as np
arr = np.random.randint(0, 10, size=[5000])
data = arr.tolist()


In [7]:
def factorial_upto(n):
    res = [1]
    
    for i in range(1, n + 1):
        res.append(res[-1] * i)
    return res

def taylor_exp(x,n=1000):
    
    factorials = factorial_upto(n)
    
    res = 0
    for i in range(n):
        res += x**i / factorials[i]
    
    return res

### Sequential solution

In [8]:
start = time.time()
results = []
for x in data:
    results.append(taylor_exp(x))

print(f'Finished in {(time.time() - start):.4f} seconds')


Finished in 10.8182 seconds


In [13]:
results

[54.598150033144265,
 7.389056098930649,
 148.41315910257657,
 1096.6331584284578,
 403.4287934927351,
 2.7182818284590455,
 148.41315910257657,
 7.389056098930649,
 2980.957987041728,
 54.598150033144265,
 54.598150033144265,
 403.4287934927351,
 1.0,
 7.389056098930649,
 20.08553692318766,
 20.08553692318766,
 1096.6331584284578,
 2.7182818284590455,
 54.598150033144265,
 7.389056098930649,
 1.0,
 1.0,
 148.41315910257657,
 7.389056098930649,
 2.7182818284590455,
 54.598150033144265,
 403.4287934927351,
 8103.083927575384,
 2980.957987041728,
 2980.957987041728,
 1.0,
 54.598150033144265,
 2980.957987041728,
 8103.083927575384,
 8103.083927575384,
 1.0,
 1.0,
 2.7182818284590455,
 7.389056098930649,
 1.0,
 2980.957987041728,
 7.389056098930649,
 2980.957987041728,
 20.08553692318766,
 1.0,
 20.08553692318766,
 54.598150033144265,
 2.7182818284590455,
 1096.6331584284578,
 2980.957987041728,
 1096.6331584284578,
 54.598150033144265,
 54.598150033144265,
 54.598150033144265,
 1.0,
 403

### Parallelizing using Pool.map

In [9]:
start = time.time()

pool = mp.Pool(mp.cpu_count())

results_mp = pool.map(taylor_exp, [x for x in data])

pool.close() #we close the pool processes we just opened
print(f'Finished in {(time.time() - start):.4f} seconds')

assert (results_mp == results)

Finished in 5.3450 seconds


In [11]:
results_mp

[54.598150033144265,
 7.389056098930649,
 148.41315910257657,
 1096.6331584284578,
 403.4287934927351,
 2.7182818284590455,
 148.41315910257657,
 7.389056098930649,
 2980.957987041728,
 54.598150033144265,
 54.598150033144265,
 403.4287934927351,
 1.0,
 7.389056098930649,
 20.08553692318766,
 20.08553692318766,
 1096.6331584284578,
 2.7182818284590455,
 54.598150033144265,
 7.389056098930649,
 1.0,
 1.0,
 148.41315910257657,
 7.389056098930649,
 2.7182818284590455,
 54.598150033144265,
 403.4287934927351,
 8103.083927575384,
 2980.957987041728,
 2980.957987041728,
 1.0,
 54.598150033144265,
 2980.957987041728,
 8103.083927575384,
 8103.083927575384,
 1.0,
 1.0,
 2.7182818284590455,
 7.389056098930649,
 1.0,
 2980.957987041728,
 7.389056098930649,
 2980.957987041728,
 20.08553692318766,
 1.0,
 20.08553692318766,
 54.598150033144265,
 2.7182818284590455,
 1096.6331584284578,
 2980.957987041728,
 1096.6331584284578,
 54.598150033144265,
 54.598150033144265,
 54.598150033144265,
 1.0,
 403

### Parallelizing using Pool.starmap

Lets us pass multiple arguments

In [10]:
start = time.time()

pool = mp.Pool(mp.cpu_count())

#make us parse in multiple arguments in the function
results_smp = pool.starmap(taylor_exp, [(x,1000) for x in data])

pool.close()
print(f'Finished in {(time.time() - start):.4f} seconds')

#or you could do 
# with Pool(mp.cpu_count()) as p:
#     ....
#this way, we dont ahve to close the pool
 


assert (results_smp == results)

Finished in 5.0840 seconds


In [12]:
results_smp

[54.598150033144265,
 7.389056098930649,
 148.41315910257657,
 1096.6331584284578,
 403.4287934927351,
 2.7182818284590455,
 148.41315910257657,
 7.389056098930649,
 2980.957987041728,
 54.598150033144265,
 54.598150033144265,
 403.4287934927351,
 1.0,
 7.389056098930649,
 20.08553692318766,
 20.08553692318766,
 1096.6331584284578,
 2.7182818284590455,
 54.598150033144265,
 7.389056098930649,
 1.0,
 1.0,
 148.41315910257657,
 7.389056098930649,
 2.7182818284590455,
 54.598150033144265,
 403.4287934927351,
 8103.083927575384,
 2980.957987041728,
 2980.957987041728,
 1.0,
 54.598150033144265,
 2980.957987041728,
 8103.083927575384,
 8103.083927575384,
 1.0,
 1.0,
 2.7182818284590455,
 7.389056098930649,
 1.0,
 2980.957987041728,
 7.389056098930649,
 2980.957987041728,
 20.08553692318766,
 1.0,
 20.08553692318766,
 54.598150033144265,
 2.7182818284590455,
 1096.6331584284578,
 2980.957987041728,
 1096.6331584284578,
 54.598150033144265,
 54.598150033144265,
 54.598150033144265,
 1.0,
 403

### Global interpreter lock

Only one thread can access the interpreter at a time due to GIL

Python releases GIL 

- while a thread is waiting for IO
- while numpy is doing an array operation

In [33]:
import math

def f(x): #Doesnot releast GIL
    print (x)
    y = [1]*5000000
    [math.exp(i) for i in y]
    
def g(x):   #Releases GIL
    print (x)
    y = np.ones(5000000)
    np.exp(y)

def do_work(q,func):
    while True:
        item = q.get()
        
        if item is not None:
            func(item)
            q.task_done()
        else:
            q.task_done()
            break

### serial f()

In [34]:
start = time.time()

for i in range(10):
    f(i)

print(f'Finished in {(time.time() - start):.4f} seconds')


0
1
2
3
4
5
6
7
8
9
Finished in 11.1527 seconds


### threaded f()

In [35]:
start = time.time()

q = Queue()
num_threads = 4

for i in range(num_threads):
    worker = threading.Thread(target = do_work, args = (q,f)) # refer to q
    worker.setDaemon(True) # this stop the threads when the program quits  
    worker.start()         # start the threads

# now we have started 10 threads:

for i in range(10):
    q.put(i)

q.join()
print(f'Finished in {(time.time() - start):.4f} seconds')


0123



45

6
7
89

Finished in 11.6681 seconds


### parallel f()

In [36]:
start = time.time()

pool = mp.Pool(4)

results_mp = pool.map(f, [x for x in range(10)])

pool.close()
print(f'Finished in {(time.time() - start):.4f} seconds')


2
0
1
3
4
5
6
7
8
9
Finished in 7.3621 seconds


### serial g()

In [37]:
start = time.time()
# all threads do operations concurrently due to GLI is released
for i in range(100):
    g(i)

print(f'Finished in {(time.time() - start):.4f} seconds')


0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
Finished in 8.8305 seconds


### threaded g()

In [None]:
start = time.time()

q = Queue()
num_threads = 4

for i in range(num_threads):
    worker = threading.Thread(target = do_work, args = (q,g)) # refer to q
    worker.setDaemon(True) # this stop the threads when the program quits  
    worker.start()         # start the threads

# now we have started 10 threads:

for i in range(100):
    q.put(i)

q.join()
print(f'Finished in {(time.time() - start):.4f} seconds')
