In [None]:
# Copyright 2021 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# 反馈或问题？
如有任何反馈或疑问，请打开一个问题（issue）（https://github.com/googleapis/python-aiplatform/issues）。

# Python的Vertex SDK: 使用客户管理的加密密钥（CMEK）示例进行AutoML图像分类训练

要使用此Jupyter笔记本，在Colab中创建一个副本并打开它。您可以运行每个步骤或单元，并查看其结果。要运行一个单元格，请使用Shift+Enter。Colab会自动显示每个单元格中最后一行的返回值。

这个笔记本演示了如何使用CMEK训练AutoML图像分类模型。您需要提供一个存储数据集的存储桶。

注意：在测试此SDK时，您可能会产生关于培训、预测、存储或其他GCP产品使用的费用。

安装SDK后，内核将自动重新启动。

In [None]:
!pip3 uninstall -y google-cloud-aiplatform
!pip3 install --upgrade google-cloud-kms
!pip3 install google-cloud-aiplatform
import IPython

app = IPython.Application.instance()
app.kernel.do_shutdown(True)

在下面的单元格中输入您的项目ID和GCS存储桶，请运行该单元格，确保Cloud SDK为该笔记本中的所有命令使用正确的项目。

In [None]:
import sys

if "google.colab" in sys.modules:
    from google.colab import auth

    auth.authenticate_user()

In [None]:
REGION = "YOUR REGION"  # e.g. us-central1
MY_PROJECT = "YOUR PROJECT ID"
MY_STAGING_BUCKET = "gs://YOUR BUCKET"  # bucket should be in same region as ucaip

设置客户管理的加密密钥

默认情况下，Google Cloud 在数据处于静止状态时会自动使用由Google管理的加密密钥对数据进行加密。如果您对保护数据的密钥有特定的合规性或监管要求，您可以在训练作业中使用客户管理的加密密钥（CMEK）。

有关在Vertex AI上使用CMEK的更多信息，请参阅：[https://cloud.google.com/vertex-ai/docs/general/cmek#before_you_begin](https://cloud.google.com/vertex-ai/docs/general/cmek#before_you_begin)

您可以按照上述指南创建一个密钥，或者执行下面的笔记本单元格。

1. 在Google云平台上注册您的应用程序以使用云密钥管理服务（KMS）API，请访问https://console.cloud.google.com/flows/enableapi?apiid=cloudkms.googleapis.com

2. 创建一个密钥环

创建一个密钥环

In [None]:
KEY_RING_ID = "your_key_ring_name"


# Reference: https://cloud.google.com/kms/docs/samples/kms-create-key-ring
def create_key_ring(project_id, location_id, id):
    """
    Creates a new key ring in Cloud KMS

    Args:
        project_id (string): Google Cloud project ID (e.g. 'my-project').
        location_id (string): Cloud KMS location (e.g. 'us-east1').
        id (string): ID of the key ring to create (e.g. 'my-key-ring').

    Returns:
        KeyRing: Cloud KMS key ring.

    """

    # Import the client library.
    from google.cloud import kms

    # Create the client.
    client = kms.KeyManagementServiceClient()

    # Build the parent location name.
    location_name = f"projects/{project_id}/locations/{location_id}"

    # Build the key ring.
    key_ring = {}

    # Call the API.
    created_key_ring = client.create_key_ring(
        request={"parent": location_name, "key_ring_id": id, "key_ring": key_ring}
    )
    print("Created key ring: {}".format(created_key_ring.name))
    return created_key_ring


create_key_ring(project_id=MY_PROJECT, location_id=REGION, id=KEY_RING_ID)

创建一个密钥 (chuàngjiàn yīgè mìyào)

In [None]:
KEY_ID = "your_key_id"


# Reference: https://cloud.google.com/kms/docs/samples/kms-create-key-symmetric-encrypt-decrypt
def create_key_symmetric_encrypt_decrypt(project_id, location_id, key_ring_id, id):
    """
    Creates a new symmetric encryption/decryption key in Cloud KMS.

    Args:
        project_id (string): Google Cloud project ID (e.g. 'my-project').
        location_id (string): Cloud KMS location (e.g. 'us-east1').
        key_ring_id (string): ID of the Cloud KMS key ring (e.g. 'my-key-ring').
        id (string): ID of the key to create (e.g. 'my-symmetric-key').

    Returns:
        CryptoKey: Cloud KMS key.

    """

    # Import the client library.
    from google.cloud import kms

    # Create the client.
    client = kms.KeyManagementServiceClient()

    # Build the parent key ring name.
    key_ring_name = client.key_ring_path(project_id, location_id, key_ring_id)

    # Build the key.
    purpose = kms.CryptoKey.CryptoKeyPurpose.ENCRYPT_DECRYPT
    algorithm = (
        kms.CryptoKeyVersion.CryptoKeyVersionAlgorithm.GOOGLE_SYMMETRIC_ENCRYPTION
    )
    key = {
        "purpose": purpose,
        "version_template": {
            "algorithm": algorithm,
        },
    }

    # Call the API.
    created_key = client.create_crypto_key(
        request={"parent": key_ring_name, "crypto_key_id": id, "crypto_key": key}
    )
    print("Created symmetric key: {}".format(created_key.name))
    return created_key


create_key_symmetric_encrypt_decrypt(
    project_id=MY_PROJECT, location_id=REGION, key_ring_id=KEY_RING_ID, id=KEY_ID
)

向顶点 AI 服务帐号授予权限。

In [None]:
# Reference: https://cloud.google.com/vertex-ai/docs/general/cmek#granting_permissions
# Get the service account
SERVICE_ACCOUNT = ! gcloud projects get-iam-policy {MY_PROJECT} \
  --flatten="bindings[].members" \
  --format="table(bindings.members)" \
  --filter="bindings.role:roles/aiplatform.serviceAgent" \
  | grep -oP "service-.+?@gcp-sa-aiplatform.iam.gserviceaccount.com"
SERVICE_ACCOUNT = SERVICE_ACCOUNT[0]

print(f"Service account is: {SERVICE_ACCOUNT}")

# Give permissions
!gcloud kms keys add-iam-policy-binding {KEY_ID} \
  --keyring={KEY_RING_ID} \
  --location={REGION} \
  --project={MY_PROJECT} \
  --member=serviceAccount:{SERVICE_ACCOUNT} \
  --role=roles/cloudkms.cryptoKeyEncrypterDecrypter

In [None]:
# Create the full resource identifier for the created key
ENCRYPTION_SPEC_KEY_NAME = f"projects/{MY_PROJECT}/locations/{REGION}/keyRings/{KEY_RING_ID}/cryptoKeys/{KEY_ID}"

## 初始化Python的Vertex SDK

为Vertex AI初始化*client*

在此笔记本运行期间创建的所有资源都将使用上面创建的加密密钥加密。

您可以在每次函数调用时覆盖加密密钥。

In [None]:
from google.cloud import aiplatform

aiplatform.init(
    project=MY_PROJECT,
    staging_bucket=MY_STAGING_BUCKET,
    location=REGION,
    encryption_spec_key_name=ENCRYPTION_SPEC_KEY_NAME,
)

# 从CSV创建托管图像数据集

此部分将从花卉数据集创建一个托管图像数据集。有关此数据集的更多信息，请访问https://www.tensorflow.org/datasets/catalog/tf_flowers。

In [None]:
IMPORT_FILE = (
    "gs://cloud-samples-data/vision/automl_classification/flowers/all_data_v2.csv"
)

ds = aiplatform.ImageDataset.create(
    display_name="flowers",
    gcs_source=[IMPORT_FILE],
    import_schema_uri=aiplatform.schema.dataset.ioformat.image.single_label_classification,
)

ds.resource_name

启动一个训练任务来创建模型

训练一个AutoML图像分类模型。

In [None]:
job = aiplatform.AutoMLImageTrainingJob(
    display_name="train-iris-automl-mbsdk-1",
    prediction_type="classification",
    multi_label=False,
    model_type="CLOUD",
    base_model=None,
)

# This will take around half an hour to run
model = job.run(
    dataset=ds,
    model_display_name="iris-classification-model-mbsdk",
    training_fraction_split=0.6,
    validation_fraction_split=0.2,
    test_fraction_split=0.2,
    budget_milli_node_hours=8000,
    disable_early_stopping=False,
)

部署您的模型，然后等待模型部署完成后再进行预测。

In [None]:
endpoint = model.deploy()

# 预测终端
- 从导入到数据集的数据中取一个样本
- 这个样本将被编码为base64并传递到终端进行预测

In [None]:
test_item = !gsutil cat $IMPORT_FILE | head -n1
test_item, test_label = str(test_item[0]).split(",")

print(test_item, test_label)

In [None]:
import base64

import tensorflow as tf

with tf.io.gfile.GFile(test_item, "rb") as f:
    content = f.read()

# The format of each instance should conform to the deployed model's prediction input schema.
instances_list = [{"content": base64.b64encode(content).decode("utf-8")}]

prediction = endpoint.predict(instances=instances_list)

prediction

取消模型从端点

In [None]:
endpoint.undeploy_all()