In [None]:
# Copyright 2021 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Vertex AI 自动机器学习文本情感分析模型

## 安装

安装最新（预览）版本的 Vertex SDK。

In [None]:
! pip3 install -U google-cloud-aiplatform --user

安装谷歌 *云存储* 库。

In [None]:
! pip3 install google-cloud-storage

### 重新启动内核

一旦您安装了Vertex SDK 和 Google *cloud-storage*，您需要重新启动笔记本内核，以便它可以找到这些包。

In [None]:
import os

if not os.getenv("AUTORUN"):
    # Automatically restart kernel after installs
    import IPython

    app = IPython.Application.instance()
    app.kernel.do_shutdown(True)

## 开始之前

### GPU 运行时

*如果有此选项，请确保在 GPU 运行时中运行此笔记本。在 Colab 中，选择* **运行时 > 更改运行时类型 > GPU**

### 设置您的 GCP 项目

**无论您的笔记本环境如何，以下步骤都是必需的。**

1. [选择或创建一个 GCP 项目](https://console.cloud.google.com/cloud-resource-manager)。当您第一次创建账号时，您将获得 $300 的免费信用用于计算/存储成本。

2. [确保您的项目已启用计费。](https://cloud.google.com/billing/docs/how-to/modify-project)

3. [启用 Vertex APIs 和 Compute Engine APIs。](https://console.cloud.google.com/flows/enableapi?apiid=ml.googleapis.com,compute_component)

4. [Google Cloud SDK](https://cloud.google.com/sdk) 已安装在 Google Cloud 笔记本中。

5. 在下面的单元格中输入您的项目 ID。然后运行该单元格，以确保
Cloud SDK 对本笔记本中的所有命令使用正确的项目。

**注意**：Jupyter 运行以`!`为前缀的行作为 shell 命令，并将以`$`为前缀的 Python 变量插入这些命令中。

In [None]:
PROJECT_ID = "[your-project-id]"  # @param {type:"string"}

In [None]:
if PROJECT_ID == "" or PROJECT_ID is None or PROJECT_ID == "[your-project-id]":
    # Get your GCP project id from gcloud
    shell_output = !gcloud config list --format 'value(core.project)' 2>/dev/null
    PROJECT_ID = shell_output[0]
    print("Project ID:", PROJECT_ID)

In [None]:
! gcloud config set project $PROJECT_ID

#### 区域

您还可以更改“REGION”变量，该变量用于整个笔记本的操作。以下是 Vertex AI 支持的区域。我们建议尽可能选择距离您最近的区域。

- 美洲：`us-central1`
- 欧洲：`europe-west4`
- 亚太地区：`asia-east1`

您不能使用多区域存储桶进行 Vertex 训练。并非所有区域都支持所有 Vertex 服务。有关每个区域的最新支持，请参阅[Vertex AI 服务的区域支持](https://cloud.google.com/vertex-ai/docs/general/locations)

In [None]:
REGION = "us-central1"  # @param {type: "string"}

时间戳

如果您正在进行实时教程会话，则可能在使用共享测试帐户或项目。为了避免在创建的资源上发生用户名冲突，您可以为每个实例会话创建一个时间戳，并附加到将在本教程中创建的资源的名称上。

In [None]:
from datetime import datetime

TIMESTAMP = datetime.now().strftime("%Y%m%d%H%M%S")

### 验证您的GCP账户

**如果您正在使用Google云笔记本**，您的环境已经是经过验证的。请跳过这一步。

*注意：如果您正在使用Vertex笔记本并运行单元格，则单元格会自动跳过执行身份验证步骤。*

In [None]:
import os
import sys

# If you are running this notebook in Colab, run this cell and follow the
# instructions to authenticate your Google Cloud account. This provides access
# to your Cloud Storage bucket and lets you submit training jobs and prediction
# requests.

# If on Vertex, then don't execute this code
if not os.path.exists("/opt/deeplearning/metadata/env_version"):
    if "google.colab" in sys.modules:
        from google.colab import auth as google_auth

        google_auth.authenticate_user()

    # If you are running this tutorial in a notebook locally, replace the string
    # below with the path to your service account key and run this cell to
    # authenticate your Google Cloud account.
    else:
        %env GOOGLE_APPLICATION_CREDENTIALS your_path_to_credentials.json

    # Log in to your account on Google Cloud
    ! gcloud auth login

### 创建一个云存储桶

**无论你使用的是哪种笔记本环境，以下步骤都是必须的。**

本教程旨在使用处于公共云存储桶中的训练数据以及本地云存储桶进行批量预测。您也可以使用自己存储在本地云存储桶中的训练数据。

在下方设置您的云存储桶的名称。它必须在所有云存储桶中是唯一的。

In [None]:
BUCKET_NAME = "[your-bucket-name]"  # @param {type:"string"}

In [None]:
if BUCKET_NAME == "" or BUCKET_NAME is None or BUCKET_NAME == "[your-bucket-name]":
    BUCKET_NAME = PROJECT_ID + "aip-" + TIMESTAMP

只有在您的存储桶尚不存在时：运行以下单元格以创建您的云存储存储桶。

In [None]:
! gsutil mb -l $REGION gs://$BUCKET_NAME

最后，通过检查云存储桶的内容来验证访问权限。

In [None]:
! gsutil ls -al gs://$BUCKET_NAME

### 设置变量

接下来，设置一些在教程中使用的变量。
### 导入库并定义常量

导入Vertex SDK

将Vertex SDK导入到我们的Python环境中。

In [None]:
import json
import os
import sys
import time

from google.cloud.aiplatform import gapic as aip
from google.protobuf import json_format
from google.protobuf.json_format import MessageToJson
from google.protobuf.struct_pb2 import Struct, Value

设置以下的Vertex AI 常量：

- `API_ENDPOINT`：Vertex AI 的 API 服务端点，用于数据集、模型、作业、流水线和终端点服务。
- `PARENT`：Vertex AI 的位置根路径，用于数据集、模型和终端点资源。

In [None]:
# API Endpoint
API_ENDPOINT = "us-central1-aiplatform.googleapis.com"

# Vertex AI location root path for your dataset, model and endpoint resources
PARENT = "projects/" + PROJECT_ID + "/locations/" + REGION

#### AutoML常量

接下来，设置与AutoML文本实体提取数据集和训练相关的常量：

- 数据集模式：告诉托管数据集服务是哪种类型的数据集。
- 数据标注（注释）架构：告诉托管数据集服务数据是如何标记（注释）的。
- 数据集训练架构：告诉Vertex AI流水线服务要为哪种任务（例如分类）训练模型。

In [None]:
# Text Dataset type
TEXT_SCHEMA = "google-cloud-aiplatform/schema/dataset/metadata/text_1.0.0.yaml"
# Text Labeling type
IMPORT_SCHEMA_TEXT_SENTIMENT = "gs://google-cloud-aiplatform/schema/dataset/ioformat/text_sentiment_io_format_1.0.0.yaml"
# Text Training task
TRAINING_TEXT_SENTIMENT_SCHEMA = "gs://google-cloud-aiplatform/schema/trainingjob/definition/automl_text_sentiment_1.0.0.yaml"

## 客户端

Vertex SDK 作为一个客户端/服务器模型工作。在您的一侧（Python脚本），您将创建一个客户端，向服务器（Vertex）发送请求并接收响应。

在本教程中，您将使用多个客户端，请提前设置它们。

- 用于托管数据集的数据集服务。
- 用于托管模型的模型服务。
- 用于训练的管道服务。
- 用于部署的端点服务。
- 用于批处理作业和自定义训练的作业服务。
- 用于提供预测服务。

*注意*：预测服务有不同的服务端点。

In [None]:
# client options same for all services
client_options = {"api_endpoint": API_ENDPOINT}


def create_dataset_client():
    client = aip.DatasetServiceClient(client_options=client_options)
    return client


def create_model_client():
    client = aip.ModelServiceClient(client_options=client_options)
    return client


def create_pipeline_client():
    client = aip.PipelineServiceClient(client_options=client_options)
    return client


def create_endpoint_client():
    client = aip.EndpointServiceClient(client_options=client_options)
    return client


def create_prediction_client():
    client = aip.PredictionServiceClient(client_options=client_options)
    return client


def create_job_client():
    client = aip.JobServiceClient(client_options=client_options)
    return client


clients = {}
clients["dataset"] = create_dataset_client()
clients["model"] = create_model_client()
clients["pipeline"] = create_pipeline_client()
clients["endpoint"] = create_endpoint_client()
clients["prediction"] = create_prediction_client()
clients["job"] = create_job_client()

for client in clients.items():
    print(client)

In [None]:
IMPORT_FILE = "gs://cloud-samples-data/language/claritin-split.csv"

In [None]:
! gsutil cat $IMPORT_FILE | head -n 10

示例输出：
```
训练，@freewrytin上帝对Claritin太好了，2,4
训练，我需要Claritin。太糟糕了。我什么时候变得对过敏反应过敏了？，3,4
训练，感谢上帝赐予Claritin。,4,4
训练，“更糟糕的是，我昨天达到了喷鼻剂的3天使用限制，这意味着我必须依赖Claritin。”，2,4
训练，是时候服用一些Claritin或Allegra或其他什么药物了。我需要我的声音，3,4
训练，哦我的RT @imsydneycharles：我只是想在某个地方记录下我同时服用Claritin和Benadryl……以防我晕倒，2,4
训练，取一个Claritin _ÛªÛ_Ûª_ÛªÌâ FML！！，3,4
训练，Sarcelles的洛拉他汀通用A指挥官：洛拉他汀通用A指挥官 Sarcelles Claritin =Ûª_Ûª__ http://t.co/mOleL8AM，2,4
训练，“Zyrtec，Claritin，Suddafed，鼻喷雾..我觉得自己像个吸食这些过敏药的药物成瘾者。请过敏季节..消失吧！！”，1,4
训练，“Ûª_Ûª_ÛªÕ@SheLovesThatD：如果她有过敏反应，请给她Claritin D。Ûª_Ûª_Ì_å @Sweeno_thakid41 @B_Original16 @luke_CYwalker14”，3,4
```

创建一个数据集

### 准备数据

### [projects.locations.datasets.create](https://cloud.google.com/vertex-ai/docs/reference/rest/v1beta1/projects.locations.datasets/create)

### [projects.locations.datasets.create](https://cloud.google.com/vertex-ai/docs/reference/rest/v1beta1/projects.locations.datasets/create)

请求

In [None]:
DATA_SCHEMA = TEXT_SCHEMA

dataset = {
    "display_name": "claritin_" + TIMESTAMP,
    "metadata_schema_uri": "gs://" + DATA_SCHEMA,
}

print(
    MessageToJson(
        aip.CreateDatasetRequest(parent=PARENT, dataset=dataset).__dict__["_pb"]
    )
)

**示例输出**：
```
{
  "parent": "projects/migration-ucaip-training/locations/us-central1",
  "dataset": {
    "displayName": "claritin_20210301212135",
    "metadataSchemaUri": "gs://google-cloud-aiplatform/schema/dataset/metadata/text_1.0.0.yaml"
  }
}
```

#### 电话

In [None]:
request = clients["dataset"].create_dataset(parent=PARENT, dataset=dataset)

#### 回复

In [None]:
result = request.result()

print(MessageToJson(result.__dict__["_pb"]))

{
  "name": "projects/116273516712/locations/us-central1/datasets/3047617533776494592",
  "displayName": "claritin_20210301212135",
  "metadataSchemaUri": "gs://google-cloud-aiplatform/schema/dataset/metadata/text_1.0.0.yaml",
  "labels": {
    "aiplatform.googleapis.com/dataset_metadata_schema": "TEXT"
  },
  "metadata": {
    "dataItemSchemaUri": "gs://google-cloud-aiplatform/schema/dataset/dataitem/text_1.0.0.yaml"
  }
}

In [None]:
# The full unique ID for the dataset
dataset_id = result.name
# The short numeric ID for the dataset
dataset_short_id = dataset_id.split("/")[-1]

print(dataset_id)

### [projects.locations.datasets.import](https://cloud.google.com/vertex-ai/docs/reference/rest/v1beta1/projects.locations.datasets/import)
### [项目.位置.数据集.导入](https://cloud.google.com/vertex-ai/docs/reference/rest/v1beta1/projects.locations.datasets/import)

请求

In [None]:
LABEL_SCHEMA = IMPORT_SCHEMA_TEXT_SENTIMENT

import_config = {
    "gcs_source": {"uris": [IMPORT_FILE]},
    "import_schema_uri": LABEL_SCHEMA,
}

print(
    MessageToJson(
        aip.ImportDataRequest(name=dataset_id, import_configs=[import_config]).__dict__[
            "_pb"
        ]
    )
)

```
{
  "name": "projects/116273516712/locations/us-central1/datasets/3047617533776494592",
  "importConfigs": [
    {
      "gcsSource": {
        "uris": [
          "gs://cloud-samples-data/language/claritin-split.csv"
        ]
      },
      "importSchemaUri": "gs://google-cloud-aiplatform/schema/dataset/ioformat/text_sentiment_io_format_1.0.0.yaml"
    }
  ]
}
```
```
{
  "名称": "projects/116273516712/locations/us-central1/datasets/3047617533776494592",
  "导入配置": [
    {
      "gcs来源": {
        "uris": [
          "gs://cloud-samples-data/language/claritin-split.csv"
        ]
      },
      "importSchemaUri": "gs://google-cloud-aiplatform/schema/dataset/ioformat/text_sentiment_io_format_1.0.0.yaml"
    }
  ]
}
```

#### 电话

In [None]:
request = clients["dataset"].import_data(
    name=dataset_id, import_configs=[import_config]
)

#### 回應

In [None]:
result = request.result()

print(MessageToJson(result.__dict__["_pb"]))

示例输出:
```
{}
```

训练模型

### [projects.locations.trainingPipelines.create](https://cloud.google.com/vertex-ai/docs/reference/rest/v1beta1/projects.locations.trainingPipelines/create)

### [projects.locations.trainingPipelines.create](https://cloud.google.com/vertex-ai/docs/reference/rest/v1beta1/projects.locations.trainingPipelines/create)

#### 请求

In [None]:
TRAINING_SCHEMA = TRAINING_TEXT_SENTIMENT_SCHEMA

task = Value(struct_value=Struct(fields={"sentiment_max": Value(number_value=4)}))

training_pipeline = {
    "display_name": "claritin_" + TIMESTAMP,
    "input_data_config": {"dataset_id": dataset_short_id},
    "model_to_upload": {"display_name": "claritin_" + TIMESTAMP},
    "training_task_definition": TRAINING_SCHEMA,
    "training_task_inputs": task,
}

print(
    MessageToJson(
        aip.CreateTrainingPipelineRequest(
            parent=PARENT,
            training_pipeline=training_pipeline,
        ).__dict__["_pb"]
    )
)

{
  "parent": "projects/migration-ucaip-training/locations/us-central1",
  "trainingPipeline": {
    "displayName": "claritin_20210301212135",
    "inputDataConfig": {
      "datasetId": "3047617533776494592"
    },
    "trainingTaskDefinition": "gs://google-cloud-aiplatform/schema/trainingjob/definition/automl_text_sentiment_1.0.0.yaml",
    "trainingTaskInputs": {
      "sentiment_max": 4.0
    },
    "modelToUpload": {
      "displayName": "claritin_20210301212135"
    }
  }
}

#### 电话

In [None]:
request = clients["pipeline"].create_training_pipeline(
    parent=PARENT, training_pipeline=training_pipeline
)

#### 回应

In [None]:
print(MessageToJson(request.__dict__["_pb"]))

{
  "name": "projects/116273516712/locations/us-central1/trainingPipelines/5293990158067040256",
  "displayName": "claritin_20210301212135",
  "inputDataConfig": {
    "datasetId": "3047617533776494592"
  },
  "trainingTaskDefinition": "gs://google-cloud-aiplatform/schema/trainingjob/definition/automl_text_sentiment_1.0.0.yaml",
  "trainingTaskInputs": {
    "sentimentMax": 4.0
  },
  "modelToUpload": {
    "displayName": "claritin_20210301212135"
  },
  "state": "PIPELINE_STATE_PENDING",
  "createTime": "2021-03-01T21:32:03.085444Z",
  "updateTime": "2021-03-01T21:32:03.085444Z"
}

In [None]:
# The full unique ID for the training pipeline
training_pipeline_id = request.name
# The short numeric ID for the training pipeline
training_pipeline_short_id = training_pipeline_id.split("/")[-1]

print(training_pipeline_id)

### [projects.locations.trainingPipelines.get](https://cloud.google.com/vertex-ai/docs/reference/rest/v1beta1/projects.locations.trainingPipelines/get)

### [projects.locations.trainingPipelines.get](https://cloud.google.com/vertex-ai/docs/reference/rest/v1beta1/projects.locations.trainingPipelines/get)

呼叫

In [None]:
request = clients["pipeline"].get_training_pipeline(name=training_pipeline_id)

回应

In [None]:
print(MessageToJson(request.__dict__["_pb"]))

{
  "name": "projects/116273516712/locations/us-central1/trainingPipelines/5293990158067040256",
  "displayName": "claritin_20210301212135",
  "inputDataConfig": {
    "datasetId": "3047617533776494592"
  },
  "trainingTaskDefinition": "gs://google-cloud-aiplatform/schema/trainingjob/definition/automl_text_sentiment_1.0.0.yaml",
  "trainingTaskInputs": {
    "sentimentMax": 4.0
  },
  "modelToUpload": {
    "displayName": "claritin_20210301212135"
  },
  "state": "PIPELINE_STATE_PENDING",
  "createTime": "2021-03-01T21:32:03.085444Z",
  "updateTime": "2021-03-01T21:32:03.085444Z"
}

In [None]:
while True:
    response = clients["pipeline"].get_training_pipeline(name=training_pipeline_id)
    if response.state != aip.PipelineState.PIPELINE_STATE_SUCCEEDED:
        print("Training job has not completed:", response.state)
        if response.state == aip.PipelineState.PIPELINE_STATE_FAILED:
            break
    else:
        model_id = response.model_to_upload.name
        print("Training Time:", response.end_time - response.start_time)
        break
    time.sleep(20)

print(model_id)

评估模型

### [projects.locations.models.evaluations.list](https://cloud.google.com/vertex-ai/docs/reference/rest/v1beta1/projects.locations.models.evaluations/list) 

### [projects.locations.models.evaluations.list](https://cloud.google.com/vertex-ai/docs/reference/rest/v1beta1/projects.locations.models.evaluations/list)

#### 电话

In [None]:
request = clients["model"].list_model_evaluations(parent=model_id)

回复

In [None]:
model_evaluations = [json.loads(MessageToJson(mel.__dict__["_pb"])) for mel in request]

# The evaluation slice
evaluation_slice = request.model_evaluations[0].name

print(json.dumps(model_evaluations, indent=2))

*示例输出*：
```
[
  {
    "name": "projects/116273516712/locations/us-central1/models/5497364624833511424/evaluations/412684097299677184",
    "metricsSchemaUri": "gs://google-cloud-aiplatform/schema/modelevaluation/text_sentiment_metrics_1.0.0.yaml",
    "metrics": {
      "linearKappa": 0.4001057,
      "quadraticKappa": 0.48378703,
      "precision": 0.59030837,
      "confusionMatrix": {
        "rows": [
          [
            3.0,
            4.0,
            1.0,
            1.0,
            0.0
          ],
          [
            2.0,
            19.0,
            25.0,
            18.0,
            0.0
          ],
          [
            0.0,
            10.0,
            72.0,
            57.0,
            0.0
          ],
          [
            0.0,
            16.0,
            34.0,
            169.0,
            5.0
          ],
          [
            0.0,
            0.0,
            2.0,
            11.0,
            5.0
          ]
        ],
        "annotationSpecs": [
          {
            "displayName": "0",
            "id": "7302033741432487936"
          },
          {
            "id": "1537426218398253056",
            "displayName": "1"
          },
          {
            "displayName": "2",
            "id": "6149112236825640960"
          },
          {
            "displayName": "3",
            "id": "3843269227611947008"
          },
          {
            "displayName": "4",
            "id": "8454955246039334912"
          }
        ]
      },
      "meanAbsoluteError": 0.4955947,
      "f1Score": 0.59030837,
      "recall": 0.59030837,
      "meanSquaredError": 0.67180616
    },
    "createTime": "2021-03-02T01:41:13.130713Z",
    "sliceDimensions": [
      "annotationSpec"
    ]
  }
]
```

### [projects.locations.models.evaluations.get](https://cloud.google.com/vertex-ai/docs/reference/rest/v1beta1/projects.locations.models.evaluations/get)

### [项目位置模型评估获取](https://cloud.google.com/vertex-ai/docs/reference/rest/v1beta1/projects.locations.models.evaluations/get)

#### 电话

In [None]:
request = clients["model"].get_model_evaluation(name=evaluation_slice)

#### 回复

In [None]:
print(MessageToJson(request.__dict__["_pb"]))

{
  "name": "projects/116273516712/locations/us-central1/models/5497364624833511424/evaluations/412684097299677184",
  "metricsSchemaUri": "gs://google-cloud-aiplatform/schema/modelevaluation/text_sentiment_metrics_1.0.0.yaml",
  "metrics": {
    "meanSquaredError": 0.67180616,
    "linearKappa": 0.4001057,
    "precision": 0.59030837,
    "recall": 0.59030837,
    "confusionMatrix": {
      "annotationSpecs": [
        {
          "id": "7302033741432487936",
          "displayName": "0"
        },
        {
          "id": "1537426218398253056",
          "displayName": "1"
        },
        {
          "displayName": "2",
          "id": "6149112236825640960"
        },
        {
          "id": "3843269227611947008",
          "displayName": "3"
        },
        {
          "id": "8454955246039334912",
          "displayName": "4"
        }
      ],
      "rows": [
        [
          3.0,
          4.0,
          1.0,
          1.0,
          0.0
        ],
        [
          2.0,
          19.0,
          25.0,
          18.0,
          0.0
        ],
        [
          0.0,
          10.0,
          72.0,
          57.0,
          0.0
        ],
        [
          0.0,
          16.0,
          34.0,
          169.0,
          5.0
        ],
        [
          0.0,
          0.0,
          2.0,
          11.0,
          5.0
        ]
      ]
    },
    "meanAbsoluteError": 0.4955947,
    "quadraticKappa": 0.48378703,
    "f1Score": 0.59030837
  },
  "createTime": "2021-03-02T01:41:13.130713Z",
  "sliceDimensions": [
    "annotationSpec"
  ]
}

进行批量预测

制作批量输入文件

现在让我们制作一个批量输入文件，您将把它存储在您的本地云存储桶中。批量输入文件可以是 CSV 或 JSONL 格式。在本教程中，您将使用 JSONL 文件。对于 JSONL 文件，您为每个数据项（实例）的每一行制作一个字典条目。该字典包含键/值对:

- `content`：图像在云存储中的路径。
- `mime_type`：内容类型。在我们的示例中，它是一个名为 `test/plain` 的文件。

In [None]:
import json

import tensorflow as tf

test_data = ! gsutil cat $IMPORT_FILE | head -n1

test_item = str(test_data[0]).split(",")[1]
test_label = str(test_data[0]).split(",")[2]

gcs_test_item = "gs://" + BUCKET_NAME + "/test.txt"
with tf.io.gfile.GFile(gcs_test_item, "w") as f:
    f.write(test_item + "\n")

gcs_input_uri = "gs://" + BUCKET_NAME + "/test.jsonl"
with tf.io.gfile.GFile(gcs_input_uri, "w") as f:
    data = {"content": gcs_test_item, "mime_type": "text/plain"}
    f.write(json.dumps(data) + "\n")

! gsutil cat $gcs_input_uri
! gsutil cat $gcs_test_item

*示例输出*：
```
{"content": "gs://migration-ucaip-trainingaip-20210301212135/test.txt", "mime_type": "text/plain"}
@freewrytin 上帝对克良丁太好了
```

### [projects.locations.batchPredictionJobs.create](https://cloud.google.com/vertex-ai/docs/reference/rest/v1beta1/projects.locations.batchPredictionJobs/create)

### [projects.locations.batchPredictionJobs.create](https://cloud.google.com/vertex-ai/docs/reference/rest/v1beta1/projects.locations.batchPredictionJobs/create)

请求

In [None]:
batch_prediction_job = {
    "display_name": "claritin_" + TIMESTAMP,
    "model": model_id,
    "input_config": {
        "instances_format": "jsonl",
        "gcs_source": {"uris": [gcs_input_uri]},
    },
    "output_config": {
        "predictions_format": "jsonl",
        "gcs_destination": {
            "output_uri_prefix": "gs://" + f"{BUCKET_NAME}/batch_output/"
        },
    },
    "dedicated_resources": {
        "machine_spec": {"machine_type": "n1-standard-2", "accelerator_count": 0},
        "starting_replica_count": 1,
        "max_replica_count": 1,
    },
}

print(
    MessageToJson(
        aip.CreateBatchPredictionJobRequest(
            parent=PARENT, batch_prediction_job=batch_prediction_job
        ).__dict__["_pb"]
    )
)

{
  "parent": "projects/migration-ucaip-training/locations/us-central1",
  "batchPredictionJob": {
    "displayName": "claritin_20210301212135",
    "model": "projects/116273516712/locations/us-central1/models/5497364624833511424",
    "inputConfig": {
      "instancesFormat": "jsonl",
      "gcsSource": {
        "uris": [
          "gs://migration-ucaip-trainingaip-20210301212135/test.jsonl"
        ]
      }
    },
    "outputConfig": {
      "predictionsFormat": "jsonl",
      "gcsDestination": {
        "outputUriPrefix": "gs://migration-ucaip-trainingaip-20210301212135/batch_output/"
      }
    },
    "dedicatedResources": {
      "machineSpec": {
        "machineType": "n1-standard-2"
      },
      "startingReplicaCount": 1,
      "maxReplicaCount": 1
    }
  }
}

#### 电话

In [None]:
request = clients["job"].create_batch_prediction_job(
    parent=PARENT, batch_prediction_job=batch_prediction_job
)

回复

In [None]:
print(MessageToJson(request.__dict__["_pb"]))

```json
{
  "name": "projects/116273516712/locations/us-central1/batchPredictionJobs/3543215802926759936",
  "displayName": "claritin_20210301212135",
  "model": "projects/116273516712/locations/us-central1/models/5497364624833511424",
  "inputConfig": {
    "instancesFormat": "jsonl",
    "gcsSource": {
      "uris": [
        "gs://migration-ucaip-trainingaip-20210301212135/test.jsonl"
      ]
    }
  },
  "outputConfig": {
    "predictionsFormat": "jsonl",
    "gcsDestination": {
      "outputUriPrefix": "gs://migration-ucaip-trainingaip-20210301212135/batch_output/"
    }
  },
  "state": "JOB_STATE_PENDING",
  "completionStats": {
    "incompleteCount": "-1"
  },
  "createTime": "2021-03-02T01:41:31.919764Z",
  "updateTime": "2021-03-02T01:41:31.919764Z"
}
```

In [None]:
# The fully qualified ID for the batch job
batch_job_id = request.name
# The short numeric ID for the batch job
batch_job_short_id = batch_job_id.split("/")[-1]

print(batch_job_id)

### [projects.locations.batchPredictionJobs.get](https://cloud.google.com/vertex-ai/docs/reference/rest/v1beta1/projects.locations.batchPredictionJobs/get) 

将上述英文文本翻译为中文：### [projects.locations.batchPredictionJobs.get](https://cloud.google.com/vertex-ai/docs/reference/rest/v1beta1/projects.locations.batchPredictionJobs/get)

#### 打电话

In [None]:
request = clients["job"].get_batch_prediction_job(name=batch_job_id)

#### 回复

In [None]:
print(MessageToJson(request.__dict__["_pb"]))

{
  "name": "projects/116273516712/locations/us-central1/batchPredictionJobs/3543215802926759936",
  "displayName": "claritin_20210301212135",
  "model": "projects/116273516712/locations/us-central1/models/5497364624833511424",
  "inputConfig": {
    "instancesFormat": "jsonl",
    "gcsSource": {
      "uris": [
        "gs://migration-ucaip-trainingaip-20210301212135/test.jsonl"
      ]
    }
  },
  "outputConfig": {
    "predictionsFormat": "jsonl",
    "gcsDestination": {
      "outputUriPrefix": "gs://migration-ucaip-trainingaip-20210301212135/batch_output/"
    }
  },
  "state": "JOB_STATE_PENDING",
  "completionStats": {
    "incompleteCount": "-1"
  },
  "createTime": "2021-03-02T01:41:31.919764Z",
  "updateTime": "2021-03-02T01:41:31.919764Z"
}
```

In [None]:
def get_latest_predictions(gcs_out_dir):
    """ Get the latest prediction subfolder using the timestamp in the subfolder name"""
    folders = !gsutil ls $gcs_out_dir
    latest = ""
    for folder in folders:
        subfolder = folder.split("/")[-2]
        if subfolder.startswith("prediction-"):
            if subfolder > latest:
                latest = folder[:-1]
    return latest


while True:
    response = clients["job"].get_batch_prediction_job(name=batch_job_id)
    if response.state != aip.JobState.JOB_STATE_SUCCEEDED:
        print("The job has not completed:", response.state)
        if response.state == aip.JobState.JOB_STATE_FAILED:
            break
    else:
        folder = get_latest_predictions(
            response.output_config.gcs_destination.output_uri_prefix
        )
        ! gsutil ls $folder/prediction*.jsonl

        ! gsutil cat $folder/prediction*.jsonl
        break
    time.sleep(60)

示例输出：
```
gs://migration-ucaip-trainingaip-20210301212135/batch_output/prediction-claritin_20210301212135-2021-03-02T01:41:31.705301Z/predictions_00001.jsonl
{"instance":{"content":"gs://migration-ucaip-trainingaip-20210301212135/test.txt","mimeType":"text/plain"},"prediction":{"sentiment":2}}
```

做在线预测

### 为在线预测准备数据项

In [None]:
test_data = ! gsutil cat $IMPORT_FILE | head -n1

test_item = str(test_data[0]).split(",")[1]
test_label = str(test_data[0]).split(",")[2]

print((test_item, test_label))

### [projects.locations.endpoints.create](https://cloud.google.com/vertex-ai/docs/reference/rest/v1beta1/projects.locations.endpoints/create) 
### [projects.locations.endpoints.create](https://cloud.google.com/vertex-ai/docs/reference/rest/v1beta1/projects.locations.endpoints/create)

请求

In [None]:
endpoint = {"display_name": "claritin_" + TIMESTAMP}

print(
    MessageToJson(
        aip.CreateEndpointRequest(parent=PARENT, endpoint=endpoint).__dict__["_pb"]
    )
)

{
  "parent": "projects/migration-ucaip-training/locations/us-central1",
  "endpoint": {
    "displayName": "claritin_20210301212135"
  }
}
```

中文翻译:
```
{
  "parent": "projects/migration-ucaip-training/locations/us-central1",
  "endpoint": {
    "displayName": "claritin_20210301212135"
  }
}
```

#### 呼叫

In [None]:
request = clients["endpoint"].create_endpoint(parent=PARENT, endpoint=endpoint)

#### 回复

In [None]:
result = request.result()

print(MessageToJson(result.__dict__["_pb"]))

{
  "name": "projects/116273516712/locations/us-central1/endpoints/45845236831748096"
}
```

中文翻译：
{
  "name": "projects/116273516712/locations/us-central1/endpoints/45845236831748096"
}

In [None]:
# The full unique ID for the endpoint
endpoint_id = result.name
# The short numeric ID for the endpoint
endpoint_short_id = endpoint_id.split("/")[-1]

print(endpoint_id)

### [projects.locations.endpoints.deployModel](https://cloud.google.com/vertex-ai/docs/reference/rest/v1beta1/projects.locations.endpoints/deployModel)

### [项目.位置.端点.部署模型](https://cloud.google.com/vertex-ai/docs/reference/rest/v1beta1/projects.locations.endpoints/deployModel)

请求

In [None]:
deployed_model = {
    "model": model_id,
    "display_name": "claritin_" + TIMESTAMP,
    "automatic_resources": {"min_replica_count": 1, "max_replica_count": 1},
}

traffic_split = {"0": 100}

print(
    MessageToJson(
        aip.DeployModelRequest(
            endpoint=endpoint_id,
            deployed_model=deployed_model,
            traffic_split=traffic_split,
        ).__dict__["_pb"]
    )
)

{
  "endpoint": "projects/116273516712/locations/us-central1/endpoints/45845236831748096",
  "deployedModel": {
    "model": "projects/116273516712/locations/us-central1/models/5497364624833511424",
    "displayName": "claritin_20210301212135",
    "automaticResources": {
      "minReplicaCount": 1,
      "maxReplicaCount": 1
    }
  },
  "trafficSplit": {
    "0": 100
  }
} 

{
  "端点": "projects/116273516712/locations/us-central1/endpoints/45845236831748096",
  "部署模型": {
    "模型": "projects/116273516712/locations/us-central1/models/5497364624833511424",
    "显示名称": "claritin_20210301212135",
    "自动资源": {
      "最小复制数": 1,
      "最大复制数": 1
    }
  },
  "流量分配": {
    "0": 100
  }
}

#### 号码

In [None]:
request = clients["endpoint"].deploy_model(
    endpoint=endpoint_id, deployed_model=deployed_model, traffic_split=traffic_split
)

回复

In [None]:
result = request.result()

print(MessageToJson(result.__dict__["_pb"]))

{
  "deployedModel": {
    "id": "6669232913810194432"
  }
}
将上述英文文本翻译为中文:
```
{
  "deployedModel": {
    "id": "6669232913810194432"
  }
}
```

In [None]:
# The unique ID for the deployed model
deployed_model_id = result.deployed_model.id

print(deployed_model_id)

### [projects.locations.endpoints.predict](https://cloud.google.com/vertex-ai/docs/reference/rest/v1beta1/projects.locations.endpoints/predict)

### [projects.locations.endpoints.predict](https://cloud.google.com/vertex-ai/docs/reference/rest/v1beta1/projects.locations.endpoints/predict)

#### 请求

In [None]:
instances_list = [{"content": test_item}]
instances = [json_format.ParseDict(s, Value()) for s in instances_list]

request = aip.PredictRequest(endpoint=endpoint_id)
request.instances.append(instances)

print(MessageToJson(request.__dict__["_pb"]))

{
  "endpoint": "projects/116273516712/locations/us-central1/endpoints/45845236831748096",
  "instances": [
    [
      {
        "content": "@freewrytin 上帝对Clartin 太好了"
      }
    ]
  ]
}

#### 呼叫

In [None]:
request = clients["prediction"].predict(endpoint=endpoint_id, instances=instances)

#### 回复

In [None]:
print(MessageToJson(request.__dict__["_pb"]))

*示例输出*：
```
{
  "predictions": [
    {
      "sentiment": 2.0
    }
  ],
  "deployedModelId": "6669232913810194432"
}
``` 

*示例输出*：
```
{
  "predictions": [
    {
      "sentiment": 2.0
    }
  ],
  "deployedModelId": "6669232913810194432"
}
```

### [projects.locations.endpoints.undeployModel](https://cloud.google.com/vertex-ai/docs/reference/rest/v1beta1/projects.locations.endpoints/undeployModel)

### [projects.locations.endpoints.undeployModel](https://cloud.google.com/vertex-ai/docs/reference/rest/v1beta1/projects.locations.endpoints/undeployModel)

电话

In [None]:
request = clients["endpoint"].undeploy_model(
    endpoint=endpoint_id, deployed_model_id=deployed_model_id, traffic_split={}
)

#### 回复

In [None]:
result = request.result()

print(MessageToJson(result.__dict__["_pb"]))

*示例输出*：
```
{}
```

清理工作

要清理此项目中使用的所有GCP资源，您可以删除用于本教程的GCP项目。

否则，您可以删除本教程中创建的各个资源。

In [None]:
delete_dataset = True
delete_model = True
delete_endpoint = True
delete_pipeline = True
delete_batchjob = True
delete_bucket = True

# Delete the dataset using the Vertex AI fully qualified identifier for the dataset
try:
    if delete_dataset:
        clients["dataset"].delete_dataset(name=dataset_id)
except Exception as e:
    print(e)

# Delete the model using the Vertex AI fully qualified identifier for the model
try:
    if delete_model:
        clients["model"].delete_model(name=model_id)
except Exception as e:
    print(e)

# Delete the endpoint using the Vertex AI fully qualified identifier for the endpoint
try:
    if delete_endpoint:
        clients["endpoint"].delete_endpoint(name=endpoint_id)
except Exception as e:
    print(e)

# Delete the training pipeline using the Vertex AI fully qualified identifier for the training pipeline
try:
    if delete_pipeline:
        clients["pipeline"].delete_training_pipeline(name=training_pipeline_id)
except Exception as e:
    print(e)

# Delete the batch job using the Vertex AI fully qualified identifier for the batch job
try:
    if delete_batchjob:
        clients["job"].delete_batch_prediction_job(name=batch_job_id)
except Exception as e:
    print(e)

if delete_bucket and "BUCKET_NAME" in globals():
    ! gsutil rm -r gs://$BUCKET_NAME