# Deploy trained model to Endpoint

Endpoints provide a unified interface to invoke and manage model deployments across compute types [What are endpoints and deployments?](https://learn.microsoft.com/en-us/azure/machine-learning/concept-endpoints?view=azureml-api-2)


In [None]:
import base64
import json
from azure.ai.ml import MLClient, Input
from azure.ai.ml.entities import ManagedOnlineEndpoint, ManagedOnlineDeployment, Model, CodeConfiguration, OnlineRequestSettings, Environment
#from azure.ai.ml.entities import ManagedOnlineEndpoint, ManagedOnlineDeployment, Model, Environment, JobService, Data, CodeConfiguration, OnlineRequestSettings, AmlCompute

from azure.ai.ml.constants import AssetTypes
from azure.identity import DefaultAzureCredential

## Create MLClient

In [4]:
try:
    credential = DefaultAzureCredential()
    # Check if given credential can get token successfully.
    credential.get_token("https://management.azure.com/.default")
except Exception as ex:
    # Fall back to InteractiveBrowserCredential in case DefaultAzureCredential not work
    # This will open a browser page for
    credential = InteractiveBrowserCredential()

ml_client = MLClient.from_config(credential=credential)

Found the config file in: ./config.json


## Define global variables

In [5]:
# Deployment
online_endpoint_name = 'SRN-endpoint'
registered_model_name = 'custom-model'
deployment_name = 'blue'
inference_env_name = 'inference-env'

## Get latest model

In [7]:
model = ml_client.models.get(name=registered_model_name, label='latest')


## Create Endpoint

In [8]:
endpoint_name = online_endpoint_name

endpoint = ManagedOnlineEndpoint(
    name=endpoint_name,
    description="An online endpoint to generate predictions",
    auth_mode="key",
    tags={"foo": "bar"},
)

ml_client.begin_create_or_update(endpoint)

## Create inference environment

In [10]:

scoring_environment = Environment(
    image="mcr.microsoft.com/azureml/openmpi4.1.0-cuda11.1-cudnn8-ubuntu20.04:latest",
    conda_file="./scoring-env.yaml",
    name=inference_env_name,
    description="Brain tumor segmentation inference environment")

ml_client.environments.create_or_update(scoring_environment)

Environment({'is_anonymous': False, 'auto_increment_version': False, 'name': 'inference-env', 'description': 'Brain tumor segmentation inference environment', 'tags': {}, 'properties': {}, 'id': '/subscriptions/b7d41fc8-d35d-41db-92ed-1f7f1d32d4d9/resourceGroups/monai-3d-rg/providers/Microsoft.MachineLearningServices/workspaces/aml-monai-3d/environments/inference-env/versions/1', 'Resource__source_path': None, 'base_path': '/mnt/batch/tasks/shared/LS_root/mounts/clusters/nc96adsa100-gpu/code/Users/aspiridonov/monai-3D-segmentation/brats-mri-segmentation-segresnet/notebooks', 'creation_context': <azure.ai.ml.entities._system_data.SystemData object at 0x7f9a8a0a71c0>, 'serialize': <msrest.serialization.Serializer object at 0x7f9a8a0a6f80>, 'version': '1', 'latest_version': None, 'conda_file': {'channels': ['anaconda', 'pytorch', 'conda-forge'], 'dependencies': ['python==3.10.4', 'pytorch==1.11.0', 'torchvision==0.12.0', 'tqdm==4.64.0', 'mlflow==1.26.0', 'pip==21.2.4', {'pip': ['azureml-i

## Deploy the model

In [17]:
latest_env_name = inference_env_name + ':1' 
blue_deployment = ManagedOnlineDeployment(
    name=deployment_name,
    endpoint_name=online_endpoint_name,
    model=model,
    environment=latest_env_name,
    code_configuration=CodeConfiguration(
        code="./",
        scoring_script="score.py"
    ),
    instance_type="Standard_NC4as_T4_v3",
    instance_count=1,
    request_settings= OnlineRequestSettings(request_timeout_ms = 90000),
)

deployment = ml_client.begin_create_or_update(blue_deployment)

## Set Endpoint traffic

In [None]:
# blue deployment takes 100 traffic
endpoint.traffic = {deployment_name: 100}
ml_client.online_endpoints.begin_create_or_update(endpoint).result()