# Python 멀티 쓰레딩(multi threading) vs 멀티 프로세싱(multi processing)

## 시작 전 알아야할 사실

### 프로세스와 쓰레드

- 

### CPU Burst와 I/O Burst

- Burst: 어떤 현상이 짧은 시간 안에 집중적으로 일어나는 일. 데이터를 전송 할 때 어떤 부분에서 오류가 집중적으로 일어나거나, 주기억 장치의 내용을 캐시 기억 장치에 블록 단위로 한꺼번에 전송하는 것 등을 가리킨다.
- CPU Burst
  - CPU 명령 작업이 연속되는 경우
  - 이미지 프로세싱, 모델 학습
- I/O Burst
  - I/O 명령 작업이 연속되는 경우
  - 백엔드 API 서버

### CPU Bounding과 IO Bounding

![not_push_test](schedule.png)

- CPU Bounding: CPU Burst의 비중이 높은 프로세스
- I/O Bounding: I/O Burst의 비중이 높은 프로세스

## 멀티 쓰레딩와 멀티 프로세스

## 출처

1. "CPU Bound vs I/O Bound", 2021.01.05., https://taes-k.github.io/2021/06/05/cpu-io-bound/
2. "[운영체제]CPU burst VS I/O burst", 2019.07.19., https://jhnyang.tistory.com/25
3. "버스트", 2008.01.15., https://terms.naver.com/entry.naver?docId=1590886&cid=50373&categoryId=50373
4. "cpu bound, io bound 의미를 설명합니다! 이에 따른 스레드 개수를 정하는 팁도 알려드립니다!", 2022.01.10., https://youtu.be/qnVKEwjG_gM

In [1]:
from pycomplex import *
import threading
import multiprocessing as mp
import gc

In [2]:
# CNT_THREAD = 8
# CNT_MULTI_PROCESS = 8

# EPOCH = 10000000
# STD_PRINT = EPOCH // 10


In [3]:
def dfs(now, max_depth):
    if now > max_depth:
        return [now]
    
    return dfs(now+1, max_depth) + dfs(now+1, max_depth)


# def worker_thread(idx, now, max_depth):
    # r = dfs(now, max_depth)
def worker_thread(idx, s, e, std_print):
    print(f"[{idx:03d} thread] start")
    
    list_iter = [i for i in range(s, e)]
    for i in list_iter:
        if i % std_print == 0:
            print(f"[{idx:03d} thread] {s:9d} ~ {e-1:9d}, now: {i:9d}")
    
    print(f"[{idx:03d} thread] End")
    
    
# def worker_process(idx, s, e, std_print):
#     print(f"[{idx:03d} process] start")
    
#     list_iter = [i for i in range(s, e)]
#     for i in list_iter:
#         if i % std_print == 0:
#             print(f"[{idx:03d} process] {s:9d} ~ {e-1:9d}, now: {i:9d}")
    
#     print(f"[{idx:03d} process] End")

def exam_threading(cnt_thread, epoch, std_print):
    print(f"========== [ example {cnt_thread:3d} threads start ] ==========")
    
    exam_thread = [threading.Thread(target=worker_thread, args=(i, i*epoch, (i+1)*epoch, std_print)) for i in range(cnt_thread)]
    for t in exam_thread:
        t.start()
    for t in exam_thread:
        t.join()        

    print(f"========== [ example {cnt_thread:3d} threads end ] ==========")

    
# def exam_multiprocessing(cnt_process, epoch, std_print):
#     print(f"========== [ example {cnt_process:3d} processes start ] ==========")
    
#     # pools = mp.Pool(cnt_process)
    
#     # pools.starmap(worker_process, [(i, i*epoch, (i+1)*epoch, std_print) for i in range(cnt_process)])
    
#     # pools.close()
#     # pools.join()
    
#     exam_process = [mp.Process(name=f"{i+1} process", target=worker_process, args=(i, i*epoch, (i+1)*epoch, std_print)) for i in range(cnt_process)]
#     print(exam_process)
#     for p in exam_process:
#         p.start()
#     # for p in exam_process:
#     #     p.join()        

#     print(f"========== [ example {cnt_process:3d} processes end ] ==========")

    


In [4]:
exam_multiprocessing(8, 2*10**7, 2*10**6)


NameError: name 'exam_multiprocessing' is not defined

In [5]:
exam_threading(8, 2*10**7, 2*10**6)


[000 thread] start
[001 thread] start
[002 thread] start
[003 thread] start
[004 thread] start
[005 thread] start
[006 thread] start
[007 thread] start
[000 thread]         0 ~  19999999, now:         0
[004 thread]  80000000 ~  99999999, now:  80000000
[007 thread] 140000000 ~ 159999999, now: 140000000
[001 thread]  20000000 ~  39999999, now:  20000000
[007 thread] 140000000 ~ 159999999, now: 142000000
[004 thread]  80000000 ~  99999999, now:  82000000
[000 thread]         0 ~  19999999, now:   2000000
[002 thread]  40000000 ~  59999999, now:  40000000
[003 thread]  60000000 ~  79999999, now:  60000000
[007 thread] 140000000 ~ 159999999, now: 144000000
[000 thread]         0 ~  19999999, now:   4000000
[006 thread] 120000000 ~ 139999999, now: 120000000
[005 thread] 100000000 ~ 119999999, now: 100000000
[000 thread]         0 ~  19999999, now:   6000000
[001 thread]  20000000 ~  39999999, now:  22000000
[004 thread]  80000000 ~  99999999, now:  84000000[000 thread]         0 ~  1999999

In [6]:
# exam_threading(8, 2*10**7, 2*10**6)

check_time_memory_print(exam_threading, 8, 2*10**7, 2*10**6)



[000 thread] start
[001 thread] start
[002 thread] start
[003 thread] start
[004 thread] start
[005 thread] start
[006 thread] start
[007 thread] start
[000 thread]         0 ~  19999999, now:         0
[005 thread] 100000000 ~ 119999999, now: 100000000
[000 thread]         0 ~  19999999, now:   2000000
[005 thread] 100000000 ~ 119999999, now: 102000000
[003 thread]  60000000 ~  79999999, now:  60000000
[007 thread] 140000000 ~ 159999999, now: 140000000
[000 thread]         0 ~  19999999, now:   4000000
[001 thread]  20000000 ~  39999999, now:  20000000
[003 thread]  60000000 ~  79999999, now:  62000000
[005 thread] 100000000 ~ 119999999, now: 104000000
[001 thread]  20000000 ~  39999999, now:  22000000
[007 thread] 140000000 ~ 159999999, now: 142000000
[002 thread]  40000000 ~  59999999, now:  40000000
[000 thread]         0 ~  19999999, now:   6000000
[005 thread] 100000000 ~ 119999999, now: 106000000[006 thread] 120000000 ~ 139999999, now: 120000000
[004 thread]  80000000 ~  9999999

In [7]:
gc.collect(generation=2)
check_time_memory_print(dfs, 1, 25)

func               :                        dfs
args               :                    (1, 25)
Start Time         : 2022-08-22 21:10:34.175443
CPU percent        :                   6.200  %
Memory percent     :                  55.400  %
Current memory KB  :                  93.145 KB
End Time           : 2022-08-22 21:10:42.197320
Running Time       :             0:00:08.021877
CPU percent        :                   7.400  %
Memory percent     :                  57.000  %
Current memory KB  :                 351.410 KB


In [8]:
def func1():
    ttt = [i for i in range(10000000)]
    print(ttt[:10])

a = mp.Process(target=func1)

a.start()


In [4]:
from not_push import *

# idx, s, e, std_print
# a= mp.Process(target=worker_process, args=(8, 10**7, 2*10**7, 10**7//10))
# a.start()
p = mp.Pool(processes=4)

output = p.starmap(,)