In [None]:
# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# 在线预测基于私有端点的PSC

<table align="left">
  <td style="text-align: center">
    <a href="https://colab.research.google.com/github/GoogleCloudPlatform/vertex-ai-samples/blob/main/notebooks/official/prediction/get_started_with_psc_private_endpoint.ipynb">
      <img src="https://cloud.google.com/ml-engine/images/colab-logo-32px.png" alt="Google Colaboratory logo"><br> 在Colab中打开
    </a>
  </td>
  <td style="text-align: center">
    <a href="https://console.cloud.google.com/vertex-ai/colab/import/https:%2F%2Fraw.githubusercontent.com%2FGoogleCloudPlatform%2Fvertex-ai-samples%2Fmain%2Fnotebooks%2Fofficial%2Fprediction%2Fget_started_with_psc_private_endpoint.ipynb">
      <img width="32px" src="https://cloud.google.com/ml-engine/images/colab-enterprise-logo-32px.png" alt="Google Cloud Colab Enterprise logo"><br> 在Colab企业版中打开
    </a>
  </td>    
  <td style="text-align: center">
    <a href="https://console.cloud.google.com/vertex-ai/workbench/deploy-notebook?download_url=https://raw.githubusercontent.com/GoogleCloudPlatform/vertex-ai-samples/main/notebooks/official/prediction/get_started_with_psc_private_endpoint.ipynb">
      <img src="https://lh3.googleusercontent.com/UiNooY4LUgW_oTvpsNhPpQzsstV5W8F7rYgxgGBD85cWJoLmrOzhVs_ksK_vgx40SHs7jCqkTkCk=e14-rj-sc0xffffff-h130-w32" alt="Vertex AI logo"><br> 在Workbench中打开
    </a>
  </td>
  <td style="text-align: center">
    <a href="https://github.com/GoogleCloudPlatform/vertex-ai-samples/blob/main/notebooks/official/prediction/get_started_with_psc_private_endpoint.ipynb">
      <img src="https://cloud.google.com/ml-engine/images/github-logo-32px.png" alt="GitHub logo"><br> 在GitHub上查看
    </a>
  </td>
</table>

## 概观

与当前的PSA私有终端点相比，基于PSC的私有终端点具有以下优势：
1. 设置过程更简单：目前，用户唯一需要做的额外步骤是在他们的VPC中创建一个终端点。并且在我们的GA发布之前，PSC会自动完成这一步骤。

2. 不再有IP耗尽问题：GKE集群将托管在租户项目的VPC中，因此我们可以创建更大的集群，并且不会受到用户VPC中IP耗尽问题的影响。

3. 与公共终端点统一体验：API与公共终端点相同，因此用户可以使用我们的SDK/客户端库。我们也提供配额、IAM和监控指标，就像公共终端点一样。

开始。

### 安装Python的Vertex AI SDK以及其他必需的包

In [None]:
! pip3 install --upgrade --user --quiet google-cloud-aiplatform

### 重新启动运行时（仅限Colab）

为了使用新安装的软件包，您必须重新启动 Google Colab 上的运行时。

In [None]:
import sys

if "google.colab" in sys.modules:

    import IPython

    app = IPython.Application.instance()
    app.kernel.do_shutdown(True)

<div class="alert alert-block alert-warning">
<b>⚠️内核将重新启动。在继续下一步之前，请等待它完成。⚠️</b>
</div>

### 验证您的笔记本环境（仅限Colab）

在Google Colab上验证您的环境。

In [None]:
import sys

if "google.colab" in sys.modules:

    from google.colab import auth

    auth.authenticate_user()

### 设置谷歌云项目信息，并初始化 Python 的 Vertex AI SDK

要开始使用 Vertex AI，您必须拥有一个现有的谷歌云项目，并[启用 Vertex AI API](https://console.cloud.google.com/flows/enableapi?apiid=aiplatform.googleapis.com)。了解更多关于[设置项目和开发环境](https://cloud.google.com/vertex-ai/docs/start/cloud-environment)的信息。

In [None]:
PROJECT_ID = "[your-project-id]"  # @param {type:"string"}
LOCATION = "us-central1"  # @param {type:"string"}

In [None]:
# Create GCS Bucket
BUCKET_URI = "gs://your-bucket-name-unique"  # @param {type:"string"}
! gsutil mb -l {LOCATION} -p {PROJECT_ID} {BUCKET_URI}

In [None]:
from google.cloud import aiplatform

aiplatform.init(project=PROJECT_ID, location=LOCATION, staging_bucket=BUCKET_URI)

准备测试模型

我们准备了一些测试模型，您可以自由使用您自己的模型。

In [None]:
# Copy Models to the Bucket
! gsutil cp -r "gs://cloud-samples-data/vertex-ai/prediction/test-models-requests/*" {BUCKET_URI}

### 上传模型

In [None]:
# Depending on which model you wanna use, uncomment the corresponding section below and run the block.

# TF Model
DISPLAY_NAME = "tensorflow model"  # @param {type:"string"}
ARTIFACT_URI = BUCKET_URI + "/tensorflow"
IMAGE_URI = "us-docker.pkg.dev/vertex-ai/prediction/tf2-cpu.2-12:latest"
REQUEST_FILE = "tensorflow_request.json"


# Pytorch Model
# DISPLAY_NAME="Pytorch model"
# ARTIFACT_URI=BUCKET_URI+"/pytorch"
# IMAGE_URI="us-docker.pkg.dev/vertex-ai/prediction/pytorch-cpu.2-0:latest"
# REQUEST_FILE="pytorch_request.json"


# Sklearn Model
# DISPLAY_NAME="Sklearn model"
# ARTIFACT_URI=BUCKET_URI+"/sklearn"
# IMAGE_URI="us-docker.pkg.dev/vertex-ai/prediction/sklearn-cpu.1-2:latest"
# REQUEST_FILE="sklearn_request.json"


# xgboost Model
# DISPLAY_NAME="xgboost model"
# ARTIFACT_URI=BUCKET_URI+"/xgboost"
# IMAGE_URI="us-docker.pkg.dev/vertex-ai/prediction/xgboost-cpu.1-7:latest"
# REQUEST_FILE="xgboost_request.json"

In [None]:
model = aiplatform.Model.upload(
    display_name=DISPLAY_NAME,
    artifact_uri=ARTIFACT_URI,
    serving_container_image_uri=IMAGE_URI,
    sync=False,
)

model.wait()

### 创建基于PSC的预测私有终端

In [None]:
psc_endpoint = aiplatform.PrivateEndpoint.create(
    display_name="psc-endpoint",
    project=PROJECT_ID,
    location=LOCATION,
    private_service_connect_config=aiplatform.PrivateEndpoint.PrivateServiceConnectConfig(
        project_allowlist=[PROJECT_ID],
    ),
)

或者，发送http调用来创建终端点。您需要手动替换下面的所有变量。

In [None]:
# ! curl -X POST -H "Content-Type: application/json" -H "Authorization: Bearer `gcloud auth print-access-token`" https://${LOCATION}-aiplatform.googleapis.com/v1/projects/${PROJECT_ID}/locations/{LOCATION}/endpoints -d \
# '{ \
#     displayName: "psc-endpoint", \
#     privateServiceConnectConfig: { \
#       enablePrivateServiceConnect: true, \
#       projectAllowlist: ["{PROJECT_ID}"] \
#     }, \
# }'

部署模型

In [None]:
psc_endpoint.deploy(model=model, traffic_percentage=100, machine_type="e2-standard-8")

psc_endpoint.list_models()

在消费者项目中创建转发规则

首先，找到来自终端点和部署模型的服务附件。

In [None]:
service_attachment = psc_endpoint.list_models()[0].private_endpoints.service_attachment
print(service_attachment)

然后，创建一个针对服务附件的地址和转发规则。在这个示例中，使用默认网络和子网，请根据您的VPC网络和子网进行替换。

In [None]:
! gcloud compute addresses create psc-prediction \
    --region={LOCATION} \
    --subnet=default

! gcloud compute forwarding-rules create op-psc-endpoint \
    --network=default \
    --address=psc-prediction \
    --target-service-attachment={service_attachment} \
    --region={LOCATION}

保存上方的IP地址。

In [None]:
IP_ADDRESS = ! gcloud compute forwarding-rules describe op-psc-endpoint --region={LOCATION} --format='value(IPAddress)'
IP_ADDRESS = IP_ADDRESS[0]
print(IP_ADDRESS)

## 进行预测

从这一点开始，下面的所有代码都必须在与您的PSC端点相同的VPC、相同的地区的GCP VM中运行。

如果您正在使用Vertex AI Workbench或Colab Enterprise，那就没问题。

如果您要创建一个GCE VM，请确保已启用Cloud Platform访问范围。

In [None]:
# Download the requests files:
! gsutil cp {BUCKET_URI}/requests/* ./

In [None]:
import os

if not os.getenv("IS_TESTING"):
    import json

    import urllib3

    urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)

    with open(REQUEST_FILE) as json_file:
        data = json.load(json_file)
        response = psc_endpoint.predict(
            instances=data["instances"], endpoint_override=IP_ADDRESS
        )
        print(response)

### 預測請求

或者，您可以直接向IP地址发送HTTP请求。请确保在请求中替换所有变量。

In [None]:
ENDPOINT_RESOURCE_NAME = psc_endpoint.resource_name

In [None]:
import os

if not os.getenv("IS_TESTING"):
    # Predict
    ! curl --insecure -H "Content-Type: application/json" -H "Authorization: Bearer `gcloud auth print-access-token`"  https://{IP_ADDRESS}/v1/{ENDPOINT_RESOURCE_NAME}:predict -d@{REQUEST_FILE}

    # # RawPredict
    # ! curl -v --insecure -H "Content-Type: application/json" -H "Authorization: Bearer `gcloud auth print-access-token`" https://{IP_ADDRESS}/v1/{ENDPOINT_RESOURCE_NAME}:rawPredict -d@{REQUEST_FILE}

部署另一个模型并更新流量分配

部署另一个模型，并将流量分配更新为50:50，在部署完成后，您可以再次运行多次预测，您应该能够看到部署的模型ID是不同的。

In [None]:
psc_endpoint.deploy(model=model, traffic_percentage=50, machine_type="e2-standard-8")

In [None]:
import os

if not os.getenv("IS_TESTING"):
    import json

    import urllib3

    urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)

    counter = {}
    with open(REQUEST_FILE) as json_file:
        data = json.load(json_file)
        for i in range(1000):
            response = psc_endpoint.predict(
                instances=data["instances"], endpoint_override=IP_ADDRESS
            )
            if response.deployed_model_id in counter.keys():
                counter[response.deployed_model_id] += 1
            else:
                counter[response.deployed_model_id] = 1
    print(counter)

你可以使用以下命令更新流量分配，并再次运行上面的代码。

In [None]:
import os

if not os.getenv("IS_TESTING"):
    deployed_model_id_0 = list(counter)[0]
    deployed_model_id_1 = list(counter)[1]

    psc_endpoint.update(
        traffic_split={deployed_model_id_0: 20, deployed_model_id_1: 80}
    )

清理

In [None]:
psc_endpoint.undeploy_all()
psc_endpoint.delete()
model.delete()

In [None]:
! gcloud compute forwarding-rules delete op-psc-endpoint --region={LOCATION}  --quiet

! gcloud compute addresses delete psc-prediction --region={LOCATION} --quiet

如果需要的话，删除桶。

In [None]:
! gsutil rm -r {BUCKET_URI}

可以根据需要使用以下命令清理所有私有终结点和模型。

In [None]:
for pe in aiplatform.PrivateEndpoint.list():
    pe.undeploy_all()
    pe.delete()