In [1]:
import dask.array as da
import cupy

from distributed import Client
from dask_hip import LocalHIPCluster
from time import time

In [2]:
def run_multi_gpu(device_array):
    t0 = time()
    (device_array + 1)[::2, ::2].sum().compute()
    t1 = time()
    print(f'{t1 - t0:.4f}')

In [3]:
def main():
    # prep random Dask array in **device** memory
    rs_d = da.random.RandomState(RandomState=cupy.random.RandomState)

    # actual allocation of Dask array on device memory
    x_d = rs_d.normal(10,
                      1,
                      size=(50_000, 50_000),
                      chunks=(1_000, 1_000))

    print('\nProcessing smaller cupy array...')
    with LocalHIPCluster(HIP_VISIBLE_DEVICES="0") as cluster:
        with Client(cluster) as client:
            run_multi_gpu(x_d)

    # bigger array
    x_d2 = rs_d.normal(10,
                       1,
                       size=(400_000, 400_000),    # array size 64X
                       chunks=(40_000, 40_000))    # total chunks: 10 x 10 = 100

    print('\nProcessing larger cupy array...')
    with LocalHIPCluster(HIP_VISIBLE_DEVICES="0") as cluster:
        with Client(cluster) as client:
            run_multi_gpu(x_d2)

    print('\nCompleted!')

In [4]:
main()


Processing smaller cupy array...


2023-09-22 20:38:50,424 - distributed.preloading - INFO - Creating preload: dask_hip.initialize
2023-09-22 20:38:50,424 - distributed.preloading - INFO - Import preload module: dask_hip.initialize


19.6551
device count inside worker init = 8

Processing larger cupy array...


2023-09-22 20:39:11,672 - distributed.preloading - INFO - Creating preload: dask_hip.initialize
2023-09-22 20:39:11,672 - distributed.preloading - INFO - Import preload module: dask_hip.initialize


64.3060
device count inside worker init = 8

Completed!


In [4]:
cluster = LocalHIPCluster(HIP_VISIBLE_DEVICES="0,1,2,3,4,5,6,7")

2023-09-22 21:29:44,128 - distributed.preloading - INFO - Creating preload: dask_hip.initialize
2023-09-22 21:29:44,128 - distributed.preloading - INFO - Import preload module: dask_hip.initialize
2023-09-22 21:29:44,129 - distributed.preloading - INFO - Creating preload: dask_hip.initialize
2023-09-22 21:29:44,129 - distributed.preloading - INFO - Import preload module: dask_hip.initialize
2023-09-22 21:29:44,138 - distributed.preloading - INFO - Creating preload: dask_hip.initialize
2023-09-22 21:29:44,139 - distributed.preloading - INFO - Import preload module: dask_hip.initialize
2023-09-22 21:29:44,149 - distributed.preloading - INFO - Creating preload: dask_hip.initialize
2023-09-22 21:29:44,150 - distributed.preloading - INFO - Import preload module: dask_hip.initialize
2023-09-22 21:29:44,150 - distributed.preloading - INFO - Creating preload: dask_hip.initialize
2023-09-22 21:29:44,150 - distributed.preloading - INFO - Import preload module: dask_hip.initialize
2023-09-22 21:2

In [5]:
cluster

0,1
Dashboard: http://127.0.0.1:8787/status,Workers: 8
Total threads: 8,Total memory: 503.72 GiB
Status: running,Using processes: True

0,1
Comm: tcp://127.0.0.1:39639,Workers: 8
Dashboard: http://127.0.0.1:8787/status,Total threads: 8
Started: Just now,Total memory: 503.72 GiB

0,1
Comm: tcp://127.0.0.1:44195,Total threads: 1
Dashboard: http://127.0.0.1:38727/status,Memory: 62.96 GiB
Nanny: tcp://127.0.0.1:35831,
Local directory: /tmp/dask-worker-space/worker-sq4sxfgo,Local directory: /tmp/dask-worker-space/worker-sq4sxfgo
GPU: Instinct MI210,GPU memory: 63.98 GiB

0,1
Comm: tcp://127.0.0.1:46363,Total threads: 1
Dashboard: http://127.0.0.1:40117/status,Memory: 62.96 GiB
Nanny: tcp://127.0.0.1:36755,
Local directory: /tmp/dask-worker-space/worker-9tm6u_o1,Local directory: /tmp/dask-worker-space/worker-9tm6u_o1
GPU: Instinct MI210,GPU memory: 63.98 GiB

0,1
Comm: tcp://127.0.0.1:37861,Total threads: 1
Dashboard: http://127.0.0.1:38391/status,Memory: 62.96 GiB
Nanny: tcp://127.0.0.1:33417,
Local directory: /tmp/dask-worker-space/worker-6ac79wqn,Local directory: /tmp/dask-worker-space/worker-6ac79wqn
GPU: Instinct MI210,GPU memory: 63.98 GiB

0,1
Comm: tcp://127.0.0.1:41881,Total threads: 1
Dashboard: http://127.0.0.1:46491/status,Memory: 62.96 GiB
Nanny: tcp://127.0.0.1:45469,
Local directory: /tmp/dask-worker-space/worker-nujatuq0,Local directory: /tmp/dask-worker-space/worker-nujatuq0
GPU: Instinct MI210,GPU memory: 63.98 GiB

0,1
Comm: tcp://127.0.0.1:46365,Total threads: 1
Dashboard: http://127.0.0.1:42297/status,Memory: 62.96 GiB
Nanny: tcp://127.0.0.1:39565,
Local directory: /tmp/dask-worker-space/worker-zah5hp_4,Local directory: /tmp/dask-worker-space/worker-zah5hp_4
GPU: Instinct MI210,GPU memory: 63.98 GiB

0,1
Comm: tcp://127.0.0.1:42995,Total threads: 1
Dashboard: http://127.0.0.1:37819/status,Memory: 62.96 GiB
Nanny: tcp://127.0.0.1:34957,
Local directory: /tmp/dask-worker-space/worker-9zc9g4js,Local directory: /tmp/dask-worker-space/worker-9zc9g4js
GPU: Instinct MI210,GPU memory: 63.98 GiB

0,1
Comm: tcp://127.0.0.1:39293,Total threads: 1
Dashboard: http://127.0.0.1:34753/status,Memory: 62.96 GiB
Nanny: tcp://127.0.0.1:46455,
Local directory: /tmp/dask-worker-space/worker-hp36j_xu,Local directory: /tmp/dask-worker-space/worker-hp36j_xu
GPU: Instinct MI210,GPU memory: 63.98 GiB

0,1
Comm: tcp://127.0.0.1:33139,Total threads: 1
Dashboard: http://127.0.0.1:35373/status,Memory: 62.96 GiB
Nanny: tcp://127.0.0.1:41811,
Local directory: /tmp/dask-worker-space/worker-n90dpyvx,Local directory: /tmp/dask-worker-space/worker-n90dpyvx
GPU: Instinct MI210,GPU memory: 63.98 GiB


In [6]:
client = Client(cluster)

In [8]:
rs_d = da.random.RandomState(RandomState=cupy.random.RandomState)

In [9]:
x_d2 = rs_d.normal(10, 1, size=(400_000, 400_000), chunks=(40_000, 40_000))

In [12]:
run_multi_gpu(x_d2)

9.0685


In [13]:
client.shutdown()

device count inside worker init = 8
device count inside worker init = 8
device count inside worker init = 8
device count inside worker init = 8
device count inside worker init = 8
device count inside worker init = 8
device count inside worker init = 8
device count inside worker init = 8


In [14]:
cluster.close()