In [1]:
from dask.distributed import Client

In [2]:
client = Client(n_workers = 4, threads_per_worker=4)

In [3]:
client

0,1
Client  Scheduler: tcp://127.0.0.1:60120  Dashboard: http://127.0.0.1:8787/status,Cluster  Workers: 4  Cores: 16  Memory: 51.26 GB


In [9]:
# Future instance object : status & results
# dask.distributed API 는 즉시 실행됨( delayed : lazy )

import time
def slow_pow(x, y):
    time.sleep(1)
    return x**y

res = client.submit(slow_pow, 10, 10) # pending
res

In [6]:
res.result() # rseult 반환

10000000000

In [18]:
%%time
# sync 실행
powers_of_10 = []
for i in range(1, 20):
    res = slow_pow(i, 10)
    powers_of_10.append(res)
print(powers_of_10)

[1, 1024, 59049, 1048576, 9765625, 60466176, 282475249, 1073741824, 3486784401, 10000000000, 25937424601, 61917364224, 137858491849, 289254654976, 576650390625, 1099511627776, 2015993900449, 3570467226624, 6131066257801]
Wall time: 19.2 s


In [17]:
%%time
# parallel 실행
powers_of_10 = []
for i in range(1, 20):
    future = client.submit(slow_pow, i, 10)
    powers_of_10.append(future)
print([future.result() for future in powers_of_10])

[1, 1024, 59049, 1048576, 9765625, 60466176, 282475249, 1073741824, 3486784401, 10000000000, 25937424601, 61917364224, 137858491849, 289254654976, 576650390625, 1099511627776, 2015993900449, 3570467226624, 6131066257801]
Wall time: 1.21 s


In [19]:
%%time
# map 함수 적용
futures = client.map(slow_pow, [1,2,3,4,5], [10]*5)
print([future.result() for future in futures])

[1, 1024, 59049, 1048576, 9765625]
Wall time: 1.1 s


In [21]:
%%time
#gather
futures = []
for i in range(1, 11):
    future = client.submit(slow_pow, i, 5)
    futures.append(future)
    
print(client.gather(futures))

[1, 32, 243, 1024, 3125, 7776, 16807, 32768, 59049, 100000]
Wall time: 1.07 s


In [23]:
#scatter : 여러 workers에 데이터를 scatter
#data용량이 크면 worker간에 데이터를 주고 받는데 시간이 오래 걸림

data_futures = client.scatter([1,2,3,4,5,6,7,8])
data_futures

[<Future: status: finished, type: int, key: int-c0a8a20f903a4915b94db8de3ea63195>,
 <Future: status: finished, type: int, key: int-58e78e1b34eb49a68c65b54815d1b158>,
 <Future: status: finished, type: int, key: int-d3395e15f605bc35ab1bac6341a285e2>,
 <Future: status: finished, type: int, key: int-5cd9541ea58b401f115b751e79eabbff>,
 <Future: status: finished, type: int, key: int-ce9a05dd6ec76c6a6d171b0c055f3127>,
 <Future: status: finished, type: int, key: int-7ec5d3339274cee5cb507a4e4d28e791>,
 <Future: status: finished, type: int, key: int-06e5a71c9839bd98760be56f629b24cc>,
 <Future: status: finished, type: int, key: int-ea1fa36eb048f89cc9b6b045a2a731d2>]

In [31]:
%%time
#scatter로 실행
def slow_add(x,y):
    time.sleep(1)
    return x + y
# 인자를 future 객체로 넣어줌
futures = []
for i in range(0, 8, 2):
    print(data_futures[i].result(), data_futures[i+1].result()) 
    res = client.submit(slow_add, data_futures[i], data_futures[i+1])
    futures.append(res)
print(client.gather(futures))

1 2
3 4
5 6
7 8
[3, 7, 11, 15]
Wall time: 1.11 s


In [33]:
%%time
futures = []
for i in range(1, 31):
    if i%2 == 0:
        res = client.submit(slow_pow, i, 2)
    else:
        res = client.submit(slow_pow, i,3)
    futures.append(res)
print([future.result() for future in futures][:10])

[1, 4, 27, 16, 125, 36, 343, 64, 729, 100]
Wall time: 2.24 s


In [35]:
%%time
# as_completed: 결과가 순서대로 끝나지 않음
from dask.distributed import as_completed
futures = []
for i in range(1, 31):
    if i%2 == 0:
        res = client.submit(slow_pow, i, 2)
    else:
        res = client.submit(slow_pow, i,3)
    futures.append(res)
print([future.result() for future in as_completed(futures)][:10]) 

[1, 64, 144, 729, 2197, 343, 1331, 196, 100, 16]
Wall time: 2.18 s


In [48]:
%%time
futures = []
for i in range(1,11):
    res = client.submit(slow_pow, i, 2)
    futures.append(res)
from pprint import pprint
pprint(futures)
pprint([future.result() for futures in futures])

[<Future: status: finished, type: int, key: slow_pow-a661130857a31f92e7e16ef8afbe7d30>,
 <Future: status: finished, type: int, key: slow_pow-3ee5b2a77fd5e6964c9c48c6f905a9c5>,
 <Future: status: finished, type: int, key: slow_pow-3f5b10f27fba9db2728ee51acf0e045b>,
 <Future: status: finished, type: int, key: slow_pow-074bb042944f02473a6e1fc2cd687061>,
 <Future: status: finished, type: int, key: slow_pow-616d45ea923e8fe00f302d4ce58c9abd>,
 <Future: status: finished, type: int, key: slow_pow-3ca02fe8a7181517177d5fe18d9071a1>,
 <Future: status: finished, type: int, key: slow_pow-10177bfdd748b4557d1499fa6affe240>,
 <Future: status: finished, type: int, key: slow_pow-d5e893e74be20c0a16864ef08c5574d5>,
 <Future: status: finished, type: int, key: slow_pow-828983e218ec0a9889055c5ea3f87aec>,
 <Future: status: finished, type: int, key: slow_pow-356dbb6912017a0c1055175b51c41c18>]
[100000, 100000, 100000, 100000, 100000, 100000, 100000, 100000, 100000, 100000]
Wall time: 105 ms


distributed.utils - ERROR - '<' not supported between instances of 'NoneType' and 'tuple'
Traceback (most recent call last):
  File "C:\ProgramData\Anaconda3\lib\site-packages\distributed\utils.py", line 648, in log_errors
    yield
  File "C:\ProgramData\Anaconda3\lib\site-packages\distributed\bokeh\scheduler.py", line 1139, in graph_doc
    graph = GraphPlot(scheduler, sizing_mode='stretch_both')
  File "C:\ProgramData\Anaconda3\lib\site-packages\distributed\bokeh\scheduler.py", line 615, in __init__
    self.layout = GraphLayout(scheduler)
  File "C:\ProgramData\Anaconda3\lib\site-packages\distributed\diagnostics\graph_layout.py", line 38, in __init__
    priority=priority)
  File "C:\ProgramData\Anaconda3\lib\site-packages\distributed\diagnostics\graph_layout.py", line 42, in update_graph
    stack = sorted(dependencies, key=lambda k: priority.get(k, 0), reverse=True)
TypeError: '<' not supported between instances of 'NoneType' and 'tuple'
tornado.application - ERROR - Uncaught exc

In [46]:
%%time
# wait
from dask.distributed import wait
futures = []
for i in range(1,11):
    res = client.submit(slow_pow, i, 2)
    futures.append(res)

result_dict = wait(futures, return_when ="ALL_COMPLETED")
pprint(result_dict)

DoneAndNotDoneFutures(done={<Future: status: finished, type: int, key: slow_pow-356dbb6912017a0c1055175b51c41c18>, <Future: status: finished, type: int, key: slow_pow-3ee5b2a77fd5e6964c9c48c6f905a9c5>, <Future: status: finished, type: int, key: slow_pow-828983e218ec0a9889055c5ea3f87aec>, <Future: status: finished, type: int, key: slow_pow-3ca02fe8a7181517177d5fe18d9071a1>, <Future: status: finished, type: int, key: slow_pow-3f5b10f27fba9db2728ee51acf0e045b>, <Future: status: finished, type: int, key: slow_pow-616d45ea923e8fe00f302d4ce58c9abd>, <Future: status: finished, type: int, key: slow_pow-074bb042944f02473a6e1fc2cd687061>, <Future: status: finished, type: int, key: slow_pow-d5e893e74be20c0a16864ef08c5574d5>, <Future: status: finished, type: int, key: slow_pow-10177bfdd748b4557d1499fa6affe240>, <Future: status: finished, type: int, key: slow_pow-a661130857a31f92e7e16ef8afbe7d30>}, not_done=set())
Wall time: 3.97 ms


In [47]:
[future.result() for future in result_dict.done][:10]

[100, 4, 81, 36, 9, 25, 16, 64, 49, 1]