# Deployment

In this notebook we display how to serve a model in FHE through AWS using python.

In the first block of code we train, compile and serialize our model.

In [1]:
import io
import shutil
from pathlib import Path

import numpy
import requests
from sklearn.datasets import load_breast_cancer
from tqdm import tqdm

from concrete.ml.deployment import FHEModelClient, FHEModelDev
from concrete.ml.deployment.deploy_to_aws import AWSInstance, deploy_to_aws
from concrete.ml.sklearn import XGBClassifier

# Let's first get some data and train a model.
X, y = load_breast_cancer(return_X_y=True)

assert isinstance(X, numpy.ndarray)
assert isinstance(y, numpy.ndarray)

# Split X into X_model_owner and X_client
X_train = X[:-10]
y_train = y[:-10]

X_test = X[-10:]
y_test = y[-10:]

# Train the model and compile it
model = XGBClassifier(n_bits=2, n_estimators=8, max_depth=3)
model.fit(X_train, y_train)


path_to_model = Path("./serialized_model").resolve()
if path_to_model.exists():
    shutil.rmtree(path_to_model)
model.compile(X_train)
dev = FHEModelDev(path_to_model, model)
dev.save(via_mlir=True)  # Needed for cross-platform deployment

No CUDA runtime is found, using CUDA_HOME='/usr/local/cuda'


In Concrete ML we expose a context manager called `AWSDeploy` that deploys the model to a AWS EC2 instance.

Using a context manager allows for proper termination of the instance once the computation is finished.

For persisting the instance/server consider using the `deploy_to_aws` method instead.

In [2]:
status_ok = 200
client_folder = Path("./client_folder")
client_folder.mkdir(exist_ok=True)

with AWSInstance(
    instance_type="c5.large",
    verbose=True,
    instance_name="cml_notebook_demo",
) as instance_metadata:
    deploy_to_aws(
        path_to_model=path_to_model,
        instance_metadata=instance_metadata,
        verbose=True,
        wait_bar=True,
    )
    url = instance_metadata["URL"]

    zip_response = requests.get(f"{url}/get_client")
    assert zip_response.status_code == status_ok
    with open(client_folder / "client.zip", "wb") as file:
        file.write(zip_response.content)

    # Let's create the client using what we got from the server
    client = FHEModelClient(path_dir=client_folder, key_dir=client_folder / "keys")

    # The client first need to create the private and evaluation keys.
    client.generate_private_and_evaluation_keys()

    # Get the serialized evaluation keys
    serialized_evaluation_keys = client.get_serialized_evaluation_keys()
    assert isinstance(serialized_evaluation_keys, bytes)

    # Evaluation keys can be quite large files but only have to be shared once with the server.

    # Check the size of the evaluation keys (in MB)
    print(f"Evaluation keys size: {len(serialized_evaluation_keys) / (10**6):.2f} MB")

    # Let's send this evaluation key to the server (this has to be done only once)
    # send_evaluation_key_to_server(serialized_evaluation_keys)

    # Now we have everything for the client to interact with the server

    # We create a loop to send the input to the server and receive the encrypted prediction
    execution_time = []
    encrypted_input = None
    clear_input = None

    response = requests.post(
        f"{url}/add_key", files={"key": io.BytesIO(initial_bytes=serialized_evaluation_keys)}
    )
    assert response.status_code == status_ok
    uid = response.json()["uid"]

    inferences = []
    for i in tqdm(range(len(X_test))):
        clear_input = X_test[[i], :]

        assert isinstance(clear_input, numpy.ndarray)
        encrypted_input = client.quantize_encrypt_serialize(clear_input)
        assert isinstance(encrypted_input, bytes)

        inferences.append(
            requests.post(
                f"{url}/compute",
                files={
                    "model_input": io.BytesIO(encrypted_input),
                },
                data={
                    "uid": uid,
                },
            )
        )

# Unpack the results
decrypted_predictions = []
for result in inferences:
    assert result.status_code == status_ok
    encrypted_result = result.content
    decrypted_prediction = client.deserialize_decrypt_dequantize(encrypted_result)[0]
    decrypted_predictions.append(decrypted_prediction)

Security Group Created sg-07f05840eb7ade459 in vpc vpc-0677a14476c51231f.
Instance took 15.56025743484497 seconds to start running
Waiting for SSH connexion to be available...


9it [00:15,  1.78s/it]


SSH connexion available.
upload files...
upload finished.
ssh -i /home/luis/Documents/3068-can-we-call-your-deployment-script-directly-from-py-notebooks/src/concrete/ml/deployment/ssh_keys/cml_notebook_demo-2023_03_21_10_41_02-keypair.pem -o StrictHostKeyChecking=no -o IdentitiesOnly=yes -o RequestTTY=yes ubuntu@13.37.225.25 "tmux new-session -d -s cml_notebook_demo-2023_03_21_10_41_02 sudo chmod -R 777 /home/ubuntu/venv && sudo apt install -y python3.8 && sudo apt install -y python3.8-distutils && virtualenv deployment_venv --python=python3.8 && source deployment_venv/bin/activate && python -m pip install -r server_requirements.txt && python -m pip install concrete-ml==0.6.1 && python -m pip install concrete-compiler==0.24.0rc5 && PORT=5000 PATH_TO_MODEL="./serialized_model" python ./server.py"
Pseudo-terminal will not be allocated because stdin is not a terminal.






Reading package lists...

Building dependency tree...

Reading state information...

The following additional packag

Setting up python3.8-lib2to3 (3.8.16-1+jammy1) ...

Setting up python3.8-distutils (3.8.16-1+jammy1) ...



	LANGUAGE = (unset),

	LC_ALL = (unset),

	LC_TIME = "fr_FR.UTF-8",

	LC_MONETARY = "fr_FR.UTF-8",

	LC_ADDRESS = "fr_FR.UTF-8",

	LC_TELEPHONE = "fr_FR.UTF-8",

	LC_NAME = "fr_FR.UTF-8",

	LC_MEASUREMENT = "fr_FR.UTF-8",

	LC_IDENTIFICATION = "fr_FR.UTF-8",

	LC_NUMERIC = "fr_FR.UTF-8",

	LC_PAPER = "fr_FR.UTF-8",

	LANG = "C.UTF-8"

    are supported and installed on your system.




Running kernel seems to be up-to-date.



No services need to be restarted.



No containers need to be restarted.



No user sessions are running outdated binaries.



No VM guests are running outdated hypervisor (qemu) binaries on this host.

created virtual environment CPython3.8.16.final.0-64 in 1282ms

  creator CPython3Posix(dest=/home/ubuntu/deployment_venv, clear=False, no_vcs_ignore=False, global=False)

  seeder FromAppData(download=False, pip=bundle, setuptools=bundle, wheel=bundle, via=

Collecting PyYAML

  Downloading PyYAML-6.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (701 kB)

     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 701.2/701.2 KB 82.1 MB/s eta 0:00:00

Collecting fonttools>=4.22.0

  Downloading fonttools-4.39.2-py3-none-any.whl (1.0 MB)

     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 1.0/1.0 MB 97.2 MB/s eta 0:00:00

Collecting cycler>=0.10

  Downloading cycler-0.11.0-py3-none-any.whl (6.4 kB)

Collecting pyparsing>=2.3.1

  Downloading pyparsing-3.0.9-py3-none-any.whl (98 kB)

     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 98.3/98.3 KB 27.0 MB/s eta 0:00:00

Collecting importlib-resources>=3.2.0

  Downloading importlib_resources-5.12.0-py3-none-any.whl (36 kB)

Collecting kiwisolver>=1.0.1

  Downloading kiwisolver-1.4.4-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.whl (1.2 MB)

     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 1.2/1.2 MB 99.0 MB/s eta 0:00:00

Collecting contourpy>=1.0.1

  Downloading 

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:16<00:00,  1.63s/it]


We can now check that the predictions from the server are actually the same as the model in clear.

In [3]:
(numpy.array(decrypted_predictions) == model.predict_proba(X_test)).all()

True