In [None]:
%%html
<style>
h1, h2, h3, h4, h5 {
    color: darkblue;
    font-weight: bold !important;
}
h2 {
    border-bottom: 8px solid darkblue !important;
    padding-bottom: 8px;
}
h3 {
    border-bottom: 2px solid darkblue !important;
    padding-bottom: 6px;
}
.info, .success, .warning, .error {
    border: 1px solid;
    margin: 10px 0px;
    padding:15px 10px;
}
.info {
    color: #00529b;
    background-color: #bde5f8;
}
.success {
    color: #4f8a10;
    background-color: #dff2bf;
}
.warning {
    color: #9f6000;
    background-color: #FEEFB3;
}
.error {
    color: #D8000C;
    background-color: #FFBABA;
}
.language-bash {
    font-weight: 900;
}
.ex {
    font-weight: 900;
    color: rgba(27,27,255,0.87) !important;
}
.mn {
    font-family: Menlo, Consolas, "DejaVu Sans Mono", monospace
}
table {
    margin-left: 0 !important;}
</style>

# Day 2: Up and Running with Python

## 2.6 Multithreading and Multiprocessing

### Sequential Execution

-   Threads can provide concurrency, even if they're not truly parallel.

<span class='ex'>Example: Sequential</span>

In [None]:
%%file ./sequential_1.py
import time
import requests

def cpu_task(number):
    for i in range(1000):
        sqr = number*number
    global squares
    squares.append(sqr)

def io_task(url):
    response = requests.get(url)
    global length
    length[url] = len(response.content)

########################
start_time = time.time()

squares = []
for n in list(range(1,11)):
    cpu_task(n)

end_time = time.time()
print(f'squares={squares}.\n Total time = {end_time-start_time}')
######################


########################
start_time = time.time()

urls = ['http://LinuxJournal.com',
        'http://en.wikipedia.org',
        'http://Facebook.com',
        'http://NYTimes.com',
        'http://thetech.com',
        'http://news.ycombinator.com',
        'http://WashingtonPost.com',
        'http://thetech.com',
        'http://Haaretz.co.il',
        'http://lerner.co.il'
       ]
length = {}
for url in urls:
    io_task(url)

end_time = time.time()
print(f'length={length}.\n Total time = {end_time-start_time}')
######################

In [None]:
!python ./sequential_1.py

### Multithreading

<span class='ex'>Example: Multithreading</span>

In [None]:
%%file ./thread_1.py
import time
import requests
import threading

def cpu_task(number):
    for i in range(100):
        sqr = number*number
    global squares
    squares.append(sqr)

def io_task(url):
    response = requests.get(url)
    global length
    length[url] = len(response.content)
    
########################
start_time = time.time()

squares = []
threads = []

for n in range(1,11):
    t = threading.Thread(target=cpu_task, args=(n,))
    threads.append(t)
    t.start()

for t in threads:
    t.join()

end_time = time.time()
print(f'squares={squares}.\n Total time = {end_time-start_time}')
######################


########################
start_time = time.time()

urls = ['http://LinuxJournal.com',
        'http://en.wikipedia.org',
        'http://Facebook.com',
        'http://NYTimes.com',
        'http://thetech.com',
        'http://news.ycombinator.com',
        'http://WashingtonPost.com',
        'http://thetech.com',
        'http://Haaretz.co.il',
        'http://lerner.co.il'
       ]
length = {}
threads = []

for url in urls:
    t = threading.Thread(target=io_task, args=(url,))
    threads.append(t)
    t.start()

for t in threads:
    t.join()

end_time = time.time()
print(f'length={length}.\n Total time = {end_time-start_time}')
######################

In [None]:
!python ./thread_1.py

#### It is noticed that
-   The multithreading program performs faster then the task is IO-bound but not CPU-bound.
-   There is a race condition in updating shared resources: `square` and `length`.

### Communication among Threads with Queue

<span class='ex'>Example: Communication among thread using queue</span>

In [None]:
%%file ./thread_2.py
import time
import requests
import threading
from queue import Queue

def cpu_task(number):
    for i in range(100):
        sqr = number*number
    global queue
    queue.put(sqr)

def io_task(url):
    response = requests.get(url)
    global queue
    queue.put((url,len(response.content)))

########################
start_time = time.time()

threads = []
queue = Queue()

for n in list(range(1,11)):
    t = threading.Thread(target=cpu_task, args=(n,))
    threads.append(t)
    t.start()

for t in threads:
    t.join()

end_time = time.time()

mylist= []
while not queue.empty():
    mylist.append(queue.get())

print(f'squares={mylist}.\n Total time = {end_time-start_time}')
######################


########################
start_time = time.time()

urls = ['http://LinuxJournal.com',
        'http://en.wikipedia.org',
        'http://Facebook.com',
        'http://NYTimes.com',
        'http://thetech.com',
        'http://news.ycombinator.com',
        'http://WashingtonPost.com',
        'http://thetech.com',
        'http://Haaretz.co.il',
        'http://lerner.co.il'
       ]
queue = Queue()

for url in urls:
    t = threading.Thread(target=io_task, args=(url,))
    threads.append(t)
    t.start()

for t in threads:
    t.join()
    
end_time = time.time()

mylist= []
while not queue.empty():
    mylist.append(queue.get())

print(f'length={mylist}.\n Total time = {end_time-start_time}')
######################

In [None]:
!python ./thread_2.py

### Multiprocessing
-   Python's "multiprocessing" module feels like threads, but actually launches processes.

<span class="ex">Example: Multiprocessing</span>

In [None]:
%%file ./multiprocessing_1.py
import time
import requests
import multiprocessing as mp

def cpu_task(queue, number):
    for i in range(100):
        sqr = number*number
    queue.put(sqr)

def io_task(queue, url):
    response = requests.get(url)
    queue.put((url,len(response.content)))


if __name__ == '__main__':

    ########################
    start_time = time.time()

    processes = []
    queue = mp.Queue()

    for n in list(range(1,11)):
        p = mp.Process(target=cpu_task, args=(queue, n))
        processes.append(p)
        p.start()

    for p in processes:
        p.join()

    mylist= []
    while not queue.empty():
        mylist.append(queue.get())
        
    end_time = time.time()
    print(f'squares={mylist}.\n Total time = {end_time-start_time}')
    ######################


    ########################
    start_time = time.time()

    urls = ['http://LinuxJournal.com',
            'http://en.wikipedia.org',
            'http://Facebook.com',
            'http://NYTimes.com',
            'http://thetech.com',
            'http://news.ycombinator.com',
            'http://WashingtonPost.com',
            'http://thetech.com',
            'http://Haaretz.co.il',
            'http://lerner.co.il'
           ]

    for url in urls:
        p = mp.Process(target=io_task, args=(queue, url))
        processes.append(p)
        p.start()

    for p in processes:
        p.join()
    
    mylist= []
    while not queue.empty():
        mylist.append(queue.get())

    end_time = time.time()
    print(f'length={mylist}.\n Total time = {end_time-start_time}')
    ######################

In [None]:
!python ./multiprocessing_1.py