In [1]:
from dask_cloudprovider.aws import EC2Cluster
from dask.distributed import Client
import configparser
import os
import contextlib
import re
import dask
from platform import python_version

def get_aws_credentials():
    """Read in your AWS credentials file and convert to environment variables."""
    parser = configparser.RawConfigParser()

    parser.read(os.path.expanduser('~/.aws/config'))
    config = parser.items('default')

    parser.read(os.path.expanduser('~/.aws/credentials'))
    credentials = parser.items('default')

    all_credentials = {key.upper(): value for key, value in [*config, *credentials]}
    with contextlib.suppress(KeyError):
        all_credentials["AWS_REGION"] = all_credentials.pop("REGION")

    return all_credentials

# Pass in AWS Credentials + any extra packages you would like to install on cluster via `pip`
env_vars = get_aws_credentials()
env_vars["EXTRA_PIP_PACKAGES"] = "s3fs"

# Select software installed on scheduler + worker instances based on client Python + Dask versions
# versions need to match across client, scheduler, worker -- slight mismatches are OK, though
py_v = '-py' + re.findall(r'\d{1}.\d+', python_version())[0]
dask_docker_tag = f"daskdev/dask:{dask.__version__ + py_v}"
print('Docker Image: ', dask_docker_tag)

# launch a cluster of 5 r5.large instances (10 vCPUs):
# 1 scheduler
# 4 workers (2 threads + 16 GB RAM each)
cluster = EC2Cluster(instance_type='r5.large',
                     n_workers=4,
                     security=False,
                     docker_image=dask_docker_tag,
                     env_vars=env_vars
)

Docker Image:  daskdev/dask:2025.4.1-py3.13
Creating scheduler instance
Created instance i-0f77ff63eb2d42b35 as dask-43b84950-scheduler
Waiting for scheduler to run at 54.208.152.98:8786


KeyboardInterrupt: 

In [None]:
client = Client(cluster)
client # note that slight mismatches between client, scheduler, and worker software are fine