In [None]:
# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Vertex AI模型花园 - AutoGluon

<table align="left">
  <td>
    <a href="https://colab.research.google.com/github/GoogleCloudPlatform/vertex-ai-samples/blob/main/notebooks/community/model_garden/model_garden_pytorch_autogluon.ipynb">
      <img src="https://cloud.google.com/ml-engine/images/colab-logo-32px.png" alt="Colab logo"> 在Colab中运行
    </a>
  </td>

  <td>
    <a href="https://github.com/GoogleCloudPlatform/vertex-ai-samples/blob/main/notebooks/community/model_garden/model_garden_pytorch_autogluon.ipynb">
      <img src="https://cloud.google.com/ml-engine/images/github-logo-32px.png" alt="GitHub logo"> 在GitHub上查看
    </a>
  </td>
  <td>
    <a href="https://console.cloud.google.com/vertex-ai/notebooks/deploy-notebook?download_url=https://raw.githubusercontent.com/GoogleCloudPlatform/vertex-ai-samples/main/notebooks/community/model_garden/model_garden_pytorch_autogluon.ipynb">
      <img src="https://lh3.googleusercontent.com/UiNooY4LUgW_oTvpsNhPpQzsstV5W8F7rYgxgGBD85cWJoLmrOzhVs_ksK_vgx40SHs7jCqkTkCk=e14-rj-sc0xffffff-h130-w32" alt="Vertex AI logo"> 在Vertex AI Workbench中打开
    </a>
  </td>
</table>

注意：此笔记本已在以下环境中进行测试：

- Python版本= 3.10

## 概述

本笔记本演示了在CPU上微调基于PyTorch的[Autogluon模型用于表格数据](https://auto.gluon.ai/stable/tutorials/tabular/index.html)，并将其部署到Vertex AI以进行在线预测。

### 目标

在本教程中，您将学习如何：

- 调整 PyTorch AutoGluon 表格模型。
- 将模型上传至[模型注册表](https://cloud.google.com/vertex-ai/docs/model-registry/introduction)。
- 在[端点](https://cloud.google.com/vertex-ai/docs/predictions/using-private-endpoints)上部署该模型。
- 为表格数据运行在线预测。

本教程使用以下 Google Cloud ML 服务和资源：

- Vertex AI 训练
- Vertex AI 模型注册表
- Vertex AI 在线预测

### 数据集

您可以在[此处找到示例数据集的详细信息](https://auto.gluon.ai/stable/tutorials/tabular/tabular-quick-start.html#example-data)。

### 成本

本教程使用Google Cloud的计费组件：

* Vertex AI
* 云存储

了解[Vertex AI定价](https://cloud.google.com/vertex-ai/pricing)和[云存储定价](https://cloud.google.com/storage/pricing)，并使用[定价计算器](https://cloud.google.com/products/calculator/)根据您预期的使用情况生成成本估算。

## 安装

安装以下所需的包来执行这个笔记本。

In [None]:
# Install the packages.
! pip3 install --upgrade google-cloud-aiplatform

### 仅限协作

In [None]:
# Automatically restart kernel after installs so that your environment can access the new packages.
import IPython

app = IPython.Application.instance()
app.kernel.do_shutdown(True)

## 开始之前

### 设置您的Google Cloud项目

**无论您使用什么笔记本环境，以下步骤都是必需的。**

1. [选择或创建一个Google Cloud项目](https://console.cloud.google.com/cloud-resource-manager)。当您第一次创建帐户时，您将获得300美元的免费信用额用于计算/存储费用。

2. [确保为您的项目启用了计费](https://cloud.google.com/billing/docs/how-to/modify-project)。

3. [启用Vertex AI API和Compute Engine API](https://console.cloud.google.com/flows/enableapi?apiid=aiplatform.googleapis.com,compute_component)。

4. 如果您在本地运行此笔记本，您需要安装[Cloud SDK](https://cloud.google.com/sdk)。

5. [创建一个服务账户](https://cloud.google.com/iam/docs/service-accounts-create#iam-service-accounts-create-console)，赋予`Vertex AI User`和`Storage Object Admin`角色，用于部署微调模型到Vertex AI端点。

设置您的项目ID

**如果您不知道您的项目ID**，请尝试以下操作：
* 运行`gcloud config list`。
* 运行`gcloud projects list`。
* 查看支持页面：[查找项目ID](https://support.google.com/googleapi/answer/7014113)

In [None]:
PROJECT_ID = "your-project-id"  # @param {type:"string"}

# Set the project id
! gcloud config set project {PROJECT_ID}

#### 区域

您也可以更改 Vertex AI 使用的 `REGION` 变量。了解更多关于 [Vertex AI 区域](https://cloud.google.com/vertex-ai/docs/general/locations)。

In [None]:
REGION = "us-central1"  # @param {type: "string"}

### 验证您的Google Cloud账户

根据您的Jupyter环境，您可能需要手动进行验证。请按照以下相关说明。

1. Vertex AI 工作台
* 由于您已经通过身份验证，无需进行任何操作。

本地JupyterLab实例，取消注释并运行：

In [None]:
# ! gcloud auth login

3. 打开Colab，取消注释并运行:

In [None]:
# from google.colab import auth
# auth.authenticate_user()

请查看如何在https://cloud.google.com/storage/docs/gsutil/commands/iam#ch-examples给您的服务账户授予云存储权限。

In [None]:
# The service account for deploying fine tuned model.
# The service account looks like:
# '<account_name>@<project>.iam.gserviceaccount.com'
SERVICE_ACCOUNT = "your-service-account"  # @param {type:"string"}

创建一个云存储桶

创建一个存储桶，用于存储中间产物，如数据集。

In [None]:
BUCKET_URI = f"gs://your-bucket-name-{PROJECT_ID}-unique"  # @param {type:"string"}

只有当您的存储桶尚不存在时：运行以下单元格以创建您的云存储桶。

In [None]:
! gsutil mb -l {REGION} -p {PROJECT_ID} {BUCKET_URI}

### 导入库

In [None]:
import os
from datetime import datetime

from google.cloud import aiplatform

### 初始化Vertex AI SDK用于Python

为您的项目初始化Vertex AI SDK用于Python。

In [None]:
staging_bucket = os.path.join(BUCKET_URI, "autogluon_staging")
aiplatform.init(project=PROJECT_ID, location=REGION, staging_bucket=staging_bucket)

### 定义常量

In [None]:
# The pre-built training docker image.
TRAIN_DOCKER_URI = "us-docker.pkg.dev/vertex-ai/vertex-vision-model-garden-dockers/pytorch-autogluon-train:20240124_0927_RC00"
# The pre-built serving docker image.
SERVE_DOCKER_URI = "us-docker.pkg.dev/vertex-ai/vertex-vision-model-garden-dockers/pytorch-autogluon-serve:20240124_0938_RC00"
# Serving port.
PORT = 8501

### 定义常用功能

本节定义了以下功能：

- 将云存储路径（例如`gs://bucket-name`）转换为 GCSFuse 路径格式（例如`/gcsfuse/bucket-name`）。
- 部署训练好的模型到 Vertex AI 端点进行预测。

In [None]:
def gcs_fuse_path(path: str) -> str:
    """Try to convert path to gcsfuse path if it starts with gs:// else do not modify it."""
    path = path.strip()
    if path.startswith("gs://"):
        return "/gcs/" + path[5:]
    return path


def deploy_model(model_path):
    """Deploy the model to Vertex AI Endpoint for prediction."""
    model_name = "autogluon"
    endpoint = aiplatform.Endpoint.create(display_name=f"{model_name}-endpoint")
    serving_env = {
        "model_path": model_path,
        "DEPLOY_SOURCE": "notebook",
    }
    # Since the model_id is a GCS path, use artifact_uri to pass it
    # to the serving docker.
    artifact_uri = model_path
    model = aiplatform.Model.upload(
        display_name=model_name,
        serving_container_image_uri=SERVE_DOCKER_URI,
        serving_container_ports=[PORT],
        serving_container_predict_route="/predict",
        serving_container_health_route="/ping",
        serving_container_environment_variables=serving_env,
        artifact_uri=artifact_uri,
    )
    model.deploy(
        endpoint=endpoint,
        machine_type="n1-highmem-16",
        deploy_request_timeout=1800,
        service_account=SERVICE_ACCOUNT,
    )
    return model, endpoint

使用Vertex AI SDK 创建和运行使用model-garden PyTorch AutoGluon 训练 Docker 的训练作业。训练使用一个 CPU，在训练作业开始后大约运行 3 分钟。

In [None]:
# Set up training docker arguments.

TIMESTAMP = datetime.now().strftime("%Y%m%d_%H%M%S")
JOB_NAME = "pytorch_autogluon" + TIMESTAMP

finetuning_workdir = os.path.join(BUCKET_URI, JOB_NAME)
train_data_path = (
    "https://raw.githubusercontent.com/mli/ag-docs/main/knot_theory/train.csv"
)
# The column id to predict.
label = "signature"

# We are using the
docker_args_list = [
    "--train_data_path",
    train_data_path,
    "--label",
    label,
    "--model_save_path",
    f"{gcs_fuse_path(finetuning_workdir)}",
]
print(docker_args_list)

In [None]:
# Create and run the training job.
# Click on the generated link in the output under "View backing custom job:" to see your run in the Cloud Console.
container_uri = TRAIN_DOCKER_URI
job = aiplatform.CustomContainerTrainingJob(
    display_name=JOB_NAME,
    container_uri=container_uri,
)
model = job.run(
    args=docker_args_list,
    base_output_dir=f"{finetuning_workdir}",
    replica_count=1,
    machine_type="n1-highmem-16",
)

运行在线预测

使用训练好的模型进行在线预测。

上传经过训练的模型，并部署到终端点进行预测。这一步大约需要20分钟才能完成。

In [None]:
model, endpoint = deploy_model(model_path=finetuning_workdir)

发送查询数据的预测请求。此示例查询数据的预期“signature”标签为“-2”。您还可以发送逗号分隔的多个查询。

In [None]:
instances = [
    {
        "Unnamed: 0": 70746,
        "chern_simons": 0.0905302166938781,
        "cusp_volume": 12.226321765565215,
        "hyperbolic_adjoint_torsion_degree": 0,
        "hyperbolic_torsion_degree": 10,
        "injectivity_radius": 0.5077560544013977,
        "longitudinal_translation": 10.685555458068848,
        "meridinal_translation_imag": 1.1441915035247805,
        "meridinal_translation_real": -0.5191566348075867,
        "short_geodesic_imag_part": -2.7606005668640137,
        "short_geodesic_real_part": 1.0155121088027954,
        "Symmetry_0": 0.0,
        "Symmetry_D3": 0.0,
        "Symmetry_D4": 0.0,
        "Symmetry_D6": 0.0,
        "Symmetry_D8": 0.0,
        "Symmetry_Z/2 + Z/2": 1.0,
        "volume": 11.393224716186523,
    },
]
predictions = endpoint.predict(instances=instances).predictions
print(predictions)

清理

要清理此项目中使用的所有Google Cloud资源，您可以[删除用于教程的Google Cloud项目](https://cloud.google.com/resource-manager/docs/creating-managing-projects#shutting_down_projects)。

否则，您可以删除在本教程中创建的各个资源。

In [None]:
# Delete endpoint resource.
endpoint.delete(force=True)

# Delete model resource.
model.delete()

# Delete Cloud Storage objects that were created.
delete_bucket = False
if delete_bucket or os.getenv("IS_TESTING"):
    ! gsutil -m rm -r $BUCKET_URI