Setup an existing AzureML Workspace. To create a new Workspace follow instructions at: https://learn.microsoft.com/en-us/azure/machine-learning/how-to-manage-workspace?tabs=azure-portal#create-a-workspace

In [None]:
!pip install azure-ai-ml

In [None]:
import datetime, time

from azure.ai.ml import MLClient
from azure.ai.ml.entities import (
    ManagedOnlineEndpoint,
    ManagedOnlineDeployment,
    BatchDeployment,
    Model,
    Environment,
    BuildContext,
    CodeConfiguration,
    OnlineRequestSettings,
    ProbeSettings
)
from azure.core.exceptions import ResourceNotFoundError
from azure.identity import DefaultAzureCredential, InteractiveBrowserCredential

In [None]:
TENANT_ID = "<TENANT_ID>"
SUBSCRIPTION_ID = "<SUBSCRIPTION_ID>"
RESOURCE_GROUP = "<RESOURCE_GROUP>"
AZUREML_WORKSPACE_NAME = "<AZUREML_WORKSPACE_NAME>"

In [None]:
credential = None
try:
    credential = DefaultAzureCredential()
    # Check if given credential can get token successfully.
    credential.get_token("https://management.azure.com/.default")
except Exception as ex:
    # Fall back to InteractiveBrowserCredential in case DefaultAzureCredential not work
    # This will open a browser page for
    credential = InteractiveBrowserCredential()

In [None]:
ml_client = None

try:
    ml_client = MLClient.from_config(credential=credential)
except Exception as ex:
    # NOTE: Update following workspace information to contain
    #       your subscription ID, resource group name, and workspace name
    client_config = {
        "tenant_id" : TENANT_ID,
        "subscription_id": SUBSCRIPTION_ID,
        "resource_group": RESOURCE_GROUP,
        "workspace_name": AZUREML_WORKSPACE_NAME
    }

    # write and reload from config file
    import json, os

    config_path = "./.azureml/config.json"
    os.makedirs(os.path.dirname(config_path), exist_ok=True)
    with open(config_path, "w") as fo:
        fo.write(json.dumps(client_config))
    ml_client = MLClient.from_config(credential=credential, path=config_path)

In [None]:
# The below code downloads from an unofficial source.
# You can also download the model manually from the official source: https://huggingface.co/CompVis/stable-diffusion-v-1-4-original

import requests

#model_download_url = "https://drive.yerf.org/wl/?id=EBfTrmcCCUAGaQBXVIj5lJmEhjoP1tgl&mode=grid&download=1"
model_download_url = "https://huggingface.co/CompVis/stable-diffusion-v-1-4-original/resolve/main/sd-v1-4.ckpt"

model_file_path = "stable-diffusion-v1.ckpt"
#model_file_path = "./sd-v1-4.ckpt"
response = requests.get(model_download_url)
response.raise_for_status()
with open(model_file_path, "wb") as f:
    f.write(response.content)

In [None]:
model_name = "stable-diffusion-v1"

try:
    model = ml_client.models.get(model_name, version="1")
except ResourceNotFoundError:
    model = Model(
        path=model_file_path,
        name=model_name,
    )
    model = ml_client.create_or_update(model)

In [None]:
!(git submodule init && git submodule update)

In [None]:
# TODO: build=BuildContext(path="./stable-diffusion-api"),

environment_name = "stable-diffusion-v1-new"

try:
    env = ml_client.environments.get(environment_name, version="7")
except ResourceNotFoundError:
    env = Environment(
        name=environment_name,
        build=BuildContext(path="./stable-diffusion-api"),
        inference_config={
            "liveness_route": {"port": 5000, "path": "/api/health"},
            "readiness_route": {"port": 5000, "path": "/api/health"},
            "scoring_route": {"port": 5000, "path": "/api/txt2img"},
        }
    )
    #env = Environment(
    #    name=environment_name,
    #    image="docker.io/shivammg/stable-diffusion:v1",
    #    inference_config={
    #        "liveness_route": {"port": 5000, "path": "/api/health"},
    #        "readiness_route": {"port": 5000, "path": "/api/health"},
    #        "scoring_route": {"port": 5000, "path": "/api/txt2img"},
    #    }
    #)
    env = ml_client.environments.create_or_update(env)

In [None]:
# コンテナのビルドが終わるまで待ちたいが、環境(コンテナ)に紐づくステータスが取れない
#envs = ml_client.environments.list(name=environment_name)
#for a in envs:
#    print(a)

#jobs = ml_client.jobs.list()
#for job in jobs:
    #if job.type == "command":
        #print(job.status)
        #print(job.experiment_name)
        #print(job.base_path)
        #print(job.creation_context)
        #print(job)

        #for prop in job.properties:
        #    print(prop)
        #print(job.services)
#    break

In [None]:
endpoint_name_prefix = "stable-diffusion-"
endpoint_name = None
endpoint = None

for e in ml_client.online_endpoints.list():
    if e.name.startswith(endpoint_name_prefix):
        endpoint = e
        endpoint_name = e.name
        break

if not endpoint:
    endpoint_name = endpoint_name_prefix + datetime.datetime.now().strftime("%Y%m%d%H%M")  # Endpoint name needs to be unique within a region
    endpoint = ManagedOnlineEndpoint(
        name=endpoint_name,
        auth_mode="key",
    )
    ml_client.online_endpoints.begin_create_or_update(endpoint)

In [None]:
for _ in range(100):
    endpoint = ml_client.online_endpoints.get(endpoint_name)
    if endpoint.provisioning_state == 'Succeeded':
        break
    time.sleep(10)

In [None]:
deployment_name = "stable-diffusion-deployment"
deployment = ManagedOnlineDeployment(
    name=deployment_name,
    endpoint_name=endpoint_name,
    model=model,
    environment=env,
    environment_variables={
        "MODEL_DIR": f"/var/azureml-app/azureml-models/{model_name}/1",
    },
    instance_type="Standard_NC24ads_A100_v4",
    instance_count=1,
    request_settings=OnlineRequestSettings(
        request_timeout_ms=60*1000,
        max_concurrent_requests_per_instance=2,
        max_queue_wait_ms=1*1000,
    ),
    readiness_probe=ProbeSettings(
        failure_threshold=30,
        timeout=10,
        period=20,
        initial_delay=20,
    ),
    liveness_probe=ProbeSettings(
        failure_threshold=15,
        timeout=5,
        period=10,
        initial_delay=10,
    ),
    app_insights_enabled=True
)
ml_client.online_deployments.begin_create_or_update(deployment)

In [None]:
for _ in range(100):
    deployment = ml_client.online_deployments.get(deployment_name, endpoint_name=endpoint_name)
    if deployment.provisioning_state == 'Succeeded':
        break
    time.sleep(10)

In [None]:
endpoint.traffic = {deployment_name: 100}
ml_client.online_endpoints.begin_create_or_update(endpoint)

In [None]:
import json

endpoint_host = endpoint.scoring_uri.removesuffix('/api/txt2img')
keys = ml_client.online_endpoints.get_keys(endpoint_name)
endpoint_details = {
    'host': endpoint_host,
    'key': keys.primary_key
}

with open('online-endpoint.json', 'w') as f:
    f.write(json.dumps(endpoint_details))