In [None]:
# Copyright 2021 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Vertex SDK：使用预建容器（以前称为托管运行时）训练和部署XGBoost模型。

安装

安装Google *云存储*库。

In [None]:
! pip3 install google-cloud-storage

### 重新启动内核

安装了Vertex SDK和Google *cloud-storage*之后，您需要重新启动笔记本内核，以便它可以找到这些软件包。

In [None]:
import os

if not os.getenv("AUTORUN"):
    # Automatically restart kernel after installs
    import IPython

    app = IPython.Application.instance()
    app.kernel.do_shutdown(True)

## 开始之前

### GPU运行时

*如果有GPU运行时选项，请确保在GPU运行时中运行此笔记本。在Colab中，选择* **Runtime > Change Runtime Type > GPU**

### 设置您的GCP项目

**无论您的笔记本环境如何，以下步骤都是必需的。**

1. [选择或创建一个GCP项目](https://console.cloud.google.com/cloud-resource-manager)。当您首次创建账号时，您将获得$300的免费信用用于计算/存储成本。

2. [确保为您的项目启用了计费。](https://cloud.google.com/billing/docs/how-to/modify-project)

3. [启用Vertex API和Compute Engine API。](https://console.cloud.google.com/flows/enableapi?apiid=ml.googleapis.com,compute_component)

4. [Google Cloud SDK](https://cloud.google.com/sdk)已经安装在Google Cloud笔记本上。

5. 在下面的单元格中输入您的项目ID。然后运行该单元格，以确保Cloud SDK为本笔记本中的所有命令使用正确的项目。

**注意**：Jupyter以`!`开头的行作为shell命令运行，它会将`$`开头的Python变量插入这些命令中。

In [None]:
PROJECT_ID = "[your-project-id]"  # @param {type:"string"}

In [None]:
if PROJECT_ID == "" or PROJECT_ID is None or PROJECT_ID == "[your-project-id]":
    # Get your GCP project id from gcloud
    shell_output = !gcloud config list --format 'value(core.project)' 2>/dev/null
    PROJECT_ID = shell_output[0]
    print("Project ID:", PROJECT_ID)

In [None]:
! gcloud config set project $PROJECT_ID

#### 区域

您也可以更改 `REGION` 变量，该变量在本笔记本的其余部分中使用。以下是支持 Vertex 的区域。我们建议在可能的情况下，选择离您最近的区域。

- 美洲：`us-central1`
- 欧洲：`europe-west4`
- 亚太：`asia-east1`

您不能使用多区域存储桶来训练 Vertex。并非所有区域都支持所有 Vertex 服务。有关每个区域的最新支持，请参见[Vertex 服务的区域支持]()。

In [None]:
REGION = "us-central1"  # @param {type: "string"}

时间戳

如果您正在进行在线教程会话，您可能会使用共享的测试账户或项目。为了避免资源创建时用户之间的名称冲突，您为每个实例会话创建一个时间戳，并附加在将在此教程中创建的资源的名称上。

In [None]:
from datetime import datetime

TIMESTAMP = datetime.now().strftime("%Y%m%d%H%M%S")

### 验证您的GCP账号

**如果您正在使用Google Cloud笔记本**，您的环境已经验证通过。请跳过此步骤。

*注意：如果您正在使用Vertex笔记本并运行此单元格，则该单元格会知道跳过执行验证步骤。*

In [None]:
import os
import sys

# If you are running this notebook in Colab, run this cell and follow the
# instructions to authenticate your Google Cloud account. This provides access
# to your Cloud Storage bucket and lets you submit training jobs and prediction
# requests.

# If on Vertex, then don't execute this code
if not os.path.exists("/opt/deeplearning/metadata/env_version"):
    if "google.colab" in sys.modules:
        from google.colab import auth as google_auth

        google_auth.authenticate_user()

    # If you are running this tutorial in a notebook locally, replace the string
    # below with the path to your service account key and run this cell to
    # authenticate your Google Cloud account.
    else:
        %env GOOGLE_APPLICATION_CREDENTIALS your_path_to_credentials.json

    # Log in to your account on Google Cloud
    ! gcloud auth login

### 创建一个云存储桶

**无论您使用何种笔记本环境，以下步骤都是必须的。**

本教程旨在使用公共云存储桶中的训练数据以及本地云存储桶进行批量预测。您也可以选择使用您自己存储在本地云存储桶中的训练数据。

在下面设置您的云存储桶的名称。它必须在所有云存储桶中唯一。

In [None]:
BUCKET_NAME = "[your-bucket-name]"  # @param {type:"string"}

In [None]:
if BUCKET_NAME == "" or BUCKET_NAME is None or BUCKET_NAME == "[your-bucket-name]":
    BUCKET_NAME = PROJECT_ID + "aip-" + TIMESTAMP

只有当您的存储桶尚不存在时才运行以下单元格以创建您的云存储存储桶。

In [None]:
! gsutil mb -l $REGION gs://$BUCKET_NAME

最后，通过检查云存储桶的内容来验证对其的访问权限：

In [None]:
! gsutil ls -al gs://$BUCKET_NAME

### 设置变量

接下来，设置一些在教程中使用的变量。
### 导入库并定义常量

导入顶点SDK

将顶点SDK导入我们的Python环境。

In [None]:
import json
import time

from googleapiclient import discovery

顶点常量

为顶点设置以下常量：

- `PARENT`：数据集、模型和端点资源的顶点位置根路径。

In [None]:
# Vertex location root path for your dataset, model and endpoint resources
PARENT = "projects/" + PROJECT_ID + "/locations/" + REGION

客户端

Vertex SDK 采用客户端/服务器模型。您自己（Python 脚本）将创建一个客户端，向服务器（Vertex）发送请求并从服务器接收响应。

在本教程中，您将使用多个客户端，因此请提前设置好它们。

In [None]:
client = discovery.build("ml", "v1")

准备一个训练师脚本

### 包装组装

In [None]:
# Make folder for python training script
! rm -rf custom
! mkdir custom

# Add package information
! touch custom/README.md

setup_cfg = "[egg_info]\n\
tag_build =\n\
tag_date = 0"
! echo "$setup_cfg" > custom/setup.cfg

setup_py = "import setuptools\n\
setuptools.setup(\n\
    install_requires=[\n\
    ],\n\
    packages=setuptools.find_packages())"
! echo "$setup_py" > custom/setup.py

pkg_info = "Metadata-Version: 1.0\n\
Name: Custom XGBoost Iris\n\
Version: 0.0.0\n\
Summary: Demonstration training script\n\
Home-page: www.google.com\n\
Author: Google\n\
Author-email: aferlitsch@google.com\n\
License: Public\n\
Description: Demo\n\
Platform: Vertex AI"
! echo "$pkg_info" > custom/PKG-INFO

# Make the training subfolder
! mkdir custom/trainer
! touch custom/trainer/__init__.py

### Task.py的内容

In [None]:
%%writefile custom/trainer/task.py
# Single Instance Training for Iris

import datetime
import os
import subprocess
import sys
import pandas as pd
import xgboost as xgb

import argparse

parser = argparse.ArgumentParser()
parser.add_argument('--model-dir', dest='model_dir',
                    default=os.getenv('AIP_MODEL_DIR'), type=str, help='Model dir.')
args = parser.parse_args()

# Download data
iris_data_filename = 'iris_data.csv'
iris_target_filename = 'iris_target.csv'
data_dir = 'gs://cloud-samples-data/ai-platform/iris'

# gsutil outputs everything to stderr so we need to divert it to stdout.
subprocess.check_call(['gsutil', 'cp', os.path.join(data_dir,
                                                    iris_data_filename),
                       iris_data_filename], stderr=sys.stdout)
subprocess.check_call(['gsutil', 'cp', os.path.join(data_dir,
                                                    iris_target_filename),
                       iris_target_filename], stderr=sys.stdout)


# Load data into pandas, then use `.values` to get NumPy arrays
iris_data = pd.read_csv(iris_data_filename).values
iris_target = pd.read_csv(iris_target_filename).values

# Convert one-column 2D array into 1D array for use with XGBoost
iris_target = iris_target.reshape((iris_target.size,))


# Load data into DMatrix object
dtrain = xgb.DMatrix(iris_data, label=iris_target)

# Train XGBoost model
bst = xgb.train({}, dtrain, 20)

# Export the classifier to a file
model_filename = 'model.bst'
bst.save_model(model_filename)


# Upload the saved model file to Cloud Storage
gcs_model_path = os.path.join(args.model_dir, model_filename)
subprocess.check_call(['gsutil', 'cp', model_filename, gcs_model_path],
    stderr=sys.stdout)


### 将培训脚本存储在您的云存储桶中

In [None]:
! rm -f custom.tar custom.tar.gz
! tar cvf custom.tar custom
! gzip custom.tar
! gsutil cp custom.tar.gz gs://$BUCKET_NAME/iris.tar.gz

训练模型

### [projects.jobs.create](https://cloud.google.com/ai-platform/training/docs/reference/rest/v1/projects.jobs/create)

将上述英文文本翻译为中文：### [projects.jobs.create](https://cloud.google.com/ai-platform/training/docs/reference/rest/v1/projects.jobs/create)

#### 请求

In [None]:
JOB_NAME = "custom_job_XGB" + TIMESTAMP

training_input = {
    "scaleTier": "BASIC",
    "packageUris": ["gs://" + BUCKET_NAME + "/iris.tar.gz"],
    "pythonModule": "trainer.task",
    "args": ["--model-dir=" + "gs://{}/{}".format(BUCKET_NAME, JOB_NAME)],
    "region": REGION,
    "runtimeVersion": "2.4",
    "pythonVersion": "3.7",
}

body = {"jobId": JOB_NAME, "trainingInput": training_input}

request = client.projects().jobs().create(parent="projects/" + PROJECT_ID)
request.body = body

print(json.dumps(json.loads(request.to_json()), indent=2))

request = client.projects().jobs().create(parent="projects/" + PROJECT_ID, body=body)

{
  "uri": "https://ml.googleapis.com/v1/projects/migration-ucaip-training/jobs?alt=json",
  "method": "POST",
  "body": {
    "jobId": "custom_job_XGB20210302154841",
    "trainingInput": {
      "scaleTier": "BASIC",
      "packageUris": [
        "gs://migration-ucaip-trainingaip-20210302154841/iris.tar.gz"
      ],
      "pythonModule": "trainer.task",
      "args": [
        "--model-dir=gs://migration-ucaip-trainingaip-20210302154841/custom_job_XGB20210302154841"
      ],
      "region": "us-central1",
      "runtimeVersion": "2.4",
      "pythonVersion": "3.7"
    }
  },
  "headers": {
    "accept": "application/json",
    "accept-encoding": "gzip, deflate",
    "user-agent": "(gzip)",
    "x-goog-api-client": "gdcl/1.12.8 gl-python/3.7.8"
  },
  "methodId": "ml.projects.jobs.create",
  "resumable": null,
  "response_callbacks": [],
  "_in_error_state": false,
  "body_size": 0,
  "resumable_uri": null,
  "resumable_progress": 0
}

###电话

In [None]:
result = request.execute()

#### 回应

In [None]:
print(json.dumps(result, indent=2))

**示例输出**：
```
{
  "jobId": "custom_job_XGB20210302154841",
  "trainingInput": {
    "packageUris": [
      "gs://migration-ucaip-trainingaip-20210302154841/iris.tar.gz"
    ],
    "pythonModule": "trainer.task",
    "args": [
      "--model-dir=gs://migration-ucaip-trainingaip-20210302154841/custom_job_XGB20210302154841"
    ],
    "region": "us-central1",
    "runtimeVersion": "2.4",
    "pythonVersion": "3.7"
  },
  "createTime": "2021-03-02T15:50:12Z",
  "state": "QUEUED",
  "trainingOutput": {},
  "etag": "PmcK2JEDnJM="
}
```

In [None]:
# The short numeric ID for the custom training job
custom_training_short_id = result["jobId"]
# The full unique ID for the custom training job
custom_training_id = "projects/" + PROJECT_ID + "/jobs/" + result["jobId"]

print(custom_training_id)

### [projects.jobs.get](https://cloud.google.com/ai-platform/training/docs/reference/rest/v1/projects.jobs/get)

### [projects.jobs.get](https://cloud.google.com/ai-platform/training/docs/reference/rest/v1/projects.jobs/get)

#### 打电话

In [None]:
request = client.projects().jobs().get(name=custom_training_id)

result = request.execute()

#### 回复

In [None]:
print(json.dumps(result, indent=2))

*示例输出*：
```
{
  "jobId": "custom_job_XGB20210302154841",
  "trainingInput": {
    "packageUris": [
      "gs://migration-ucaip-trainingaip-20210302154841/iris.tar.gz"
    ],
    "pythonModule": "trainer.task",
    "args": [
      "--model-dir=gs://migration-ucaip-trainingaip-20210302154841/custom_job_XGB20210302154841"
    ],
    "region": "us-central1",
    "runtimeVersion": "2.4",
    "pythonVersion": "3.7"
  },
  "createTime": "2021-03-02T15:50:12Z",
  "state": "PREPARING",
  "trainingOutput": {},
  "etag": "L+085Kgm1Wo="
}
```

In [None]:
while True:
    response = client.projects().jobs().get(name=custom_training_id).execute()

    if response["state"] != "SUCCEEDED":
        print("Training job has not completed:", response["state"])
        if response["state"] == "FAILED":
            break
    else:
        break
    time.sleep(60)

# model artifact output directory on Google Cloud Storage
model_artifact_dir = response["trainingInput"]["args"][0].split("=")[-1]
print("artifact location  " + model_artifact_dir)

部署模型

### [projects.models.create](https://cloud.google.com/ai-platform/prediction/docs/reference/rest/v1/projects.models/create)

### [projects.models.create](https://cloud.google.com/ai-platform/prediction/docs/reference/rest/v1/projects.models/create)

请求

In [None]:
body = {"name": "custom_job_XGB" + TIMESTAMP}

request = client.projects().models().create(parent="projects/" + PROJECT_ID)
request.body = json.loads(json.dumps(body, indent=2))

print(json.dumps(json.loads(request.to_json()), indent=2))

request = client.projects().models().create(parent="projects/" + PROJECT_ID, body=body)

{
  "uri": "https://ml.googleapis.com/v1/projects/migration-ucaip-training/models?alt=json",
  "method": "POST",
  "body": {
    "name": "custom_job_XGB20210302154841"
  },
  "headers": {
    "accept": "application/json",
    "accept-encoding": "gzip, deflate",
    "user-agent": "(gzip)",
    "x-goog-api-client": "gdcl/1.12.8 gl-python/3.7.8"
  },
  "methodId": "ml.projects.models.create",
  "resumable": null,
  "response_callbacks": [],
  "_in_error_state": false,
  "body_size": 0,
  "resumable_uri": null,
  "resumable_progress": 0
}

#### 呼叫

In [None]:
result = request.execute()

回复

In [None]:
print(json.dumps(result, indent=2))

*示例输出*：
```
{
  "name": "projects/migration-ucaip-training/models/custom_job_XGB20210302154841",
  "regions": [
    "us-central1"
  ],
  "etag": "4gQZjQgH2sc="
}
```

In [None]:
model_id = result["name"]

### [projects.models.versions.create](https://cloud.google.com/ai-platform/prediction/docs/reference/rest/v1/projects.models.versions/create)

### [projects.models.versions.create](https://cloud.google.com/ai-platform/prediction/docs/reference/rest/v1/projects.models.versions/create)

请求

In [None]:
version = {
    "name": "custom_job_XGB" + TIMESTAMP,
    "deploymentUri": model_artifact_dir,
    "runtimeVersion": "2.1",
    "framework": "XGBOOST",
    "pythonVersion": "3.7",
    "machineType": "mls1-c1-m2",
}

request = (
    client.projects()
    .models()
    .versions()
    .create(
        parent=model_id,
    )
)
request.body = version

print(json.dumps(json.loads(request.to_json()), indent=2))

request = client.projects().models().versions().create(parent=model_id, body=version)

```json
{
  "uri": "https://ml.googleapis.com/v1/projects/migration-ucaip-training/models/custom_job_XGB20210302154841/versions?alt=json",
  "method": "POST",
  "body": {
    "name": "custom_job_XGB20210302154841",
    "deploymentUri": "gs://migration-ucaip-trainingaip-20210302154841/custom_job_XGB20210302154841",
    "runtimeVersion": "2.1",
    "framework": "XGBOOST",
    "pythonVersion": "3.7",
    "machineType": "mls1-c1-m2"
  },
  "headers": {
    "accept": "application/json",
    "accept-encoding": "gzip, deflate",
    "user-agent": "(gzip)",
    "x-goog-api-client": "gdcl/1.12.8 gl-python/3.7.8"
  },
  "methodId": "ml.projects.models.versions.create",
  "resumable": null,
  "response_callbacks": [],
  "_in_error_state": false,
  "body_size": 0,
  "resumable_uri": null,
  "resumable_progress": 0
}
```

#### 电话

In [None]:
result = request.execute()

#### 回复

In [None]:
print(json.dumps(result, indent=2))

{
  "name": "projects/migration-ucaip-training/operations/create_custom_job_XGB20210302154841_custom_job_XGB20210302154841-1614701495149",
  "metadata": {
    "@type": "type.googleapis.com/google.cloud.ml.v1.OperationMetadata",
    "createTime": "2021-03-02T16:11:35Z",
    "operationType": "CREATE_VERSION",
    "modelName": "projects/migration-ucaip-training/models/custom_job_XGB20210302154841",
    "version": {
      "name": "projects/migration-ucaip-training/models/custom_job_XGB20210302154841/versions/custom_job_XGB20210302154841",
      "deploymentUri": "gs://migration-ucaip-trainingaip-20210302154841/custom_job_XGB20210302154841",
      "createTime": "2021-03-02T16:11:35Z",
      "runtimeVersion": "2.1",
      "etag": "t71tF0fa60o=",
      "framework": "XGBOOST",
      "machineType": "mls1-c1-m2",
      "pythonVersion": "3.7"
    }
  }
}
```

In [None]:
# The full unique ID for the model version
model_version_name = result["metadata"]["version"]["name"]

print(model_version_name)

In [None]:
while True:
    response = (
        client.projects().models().versions().get(name=model_version_name).execute()
    )
    if response["state"] == "READY":
        print("Model version created.")
        break
    time.sleep(60)

## 进行批量预测

批量预测仅支持Tensorflow。FRAMEWORK_SCIKIT_LEARN目前不可用。

## 进行在线预测

In [None]:
INSTANCES = [[1.4, 1.3, 5.1, 2.8], [1.5, 1.2, 4.7, 2.4]]

### 为在线预测准备文件

### [projects.predict](https://cloud.google.com/ai-platform/prediction/docs/reference/rest/v1/projects/predict)

### [projects.predict](https://cloud.google.com/ai-platform/prediction/docs/reference/rest/v1/projects/predict)

#### 请求

In [None]:
request = client.projects().predict(
    name=model_version_name,
)
request.body = json.loads(json.dumps({"instances": INSTANCES}, indent=2))

print(json.dumps(json.loads(request.to_json()), indent=2))

request = client.projects().predict(
    name=model_version_name, body={"instances": INSTANCES}
)

{
  "uri": "https://ml.googleapis.com/v1/projects/migration-ucaip-training/models/custom_job_XGB20210302154841/versions/custom_job_XGB20210302154841:predict?alt=json",
  "method": "POST",
  "body": {
    "instances": [
      [
        1.4,
        1.3,
        5.1,
        2.8
      ],
      [
        1.5,
        1.2,
        4.7,
        2.4
      ]
    ]
  },
  "headers": {
    "accept": "application/json",
    "accept-encoding": "gzip, deflate",
    "user-agent": "(gzip)",
    "x-goog-api-client": "gdcl/1.12.8 gl-python/3.7.8"
  },
  "methodId": "ml.projects.predict",
  "resumable": null,
  "response_callbacks": [],
  "_in_error_state": false,
  "body_size": 0,
  "resumable_uri": null,
  "resumable_progress": 0
}

#### 呼叫

In [None]:
result = request.execute()

回复

In [None]:
print(json.dumps(result, indent=2))

{
  "predictions": [
    [],
    []
  ]
}

### [projects.models.versions.delete](https://cloud.google.com/ai-platform/prediction/docs/reference/rest/v1/projects.models.versions/delete)

### [projects.models.versions.delete](https://cloud.google.com/ai-platform/prediction/docs/reference/rest/v1/projects.models.versions/delete)

请求

In [None]:
request = client.projects().models().versions().delete(name=model_version_name)

#### 呼叫

In [None]:
response = request.execute()

#### 回应

In [None]:
print(json.dumps(response, indent=2))

{
  "name": "projects/migration-ucaip-training/operations/delete_custom_job_XGB20210302154841_custom_job_XGB20210302154841-1614709380234",
  "metadata": {
    "@type": "type.googleapis.com/google.cloud.ml.v1.OperationMetadata",
    "createTime": "2021-03-02T18:23:00Z",
    "operationType": "DELETE_VERSION",
    "modelName": "projects/migration-ucaip-training/models/custom_job_XGB20210302154841",
    "version": {
      "name": "projects/migration-ucaip-training/models/custom_job_XGB20210302154841/versions/custom_job_XGB20210302154841",
      "deploymentUri": "gs://migration-ucaip-trainingaip-20210302154841/custom_job_XGB20210302154841",
      "createTime": "2021-03-02T16:11:35Z",
      "runtimeVersion": "2.1",
      "state": "READY",
      "etag": "sBx1RZUe3HQ=",
      "framework": "XGBOOST",
      "machineType": "mls1-c1-m2",
      "pythonVersion": "3.7"
    }
  }
}

清理

要清理此项目中使用的所有GCP资源，您可以删除用于本教程的GCP项目。

否则，您可以删除在本教程中创建的单个资源。

In [None]:
delete_model = True
delete_bucket = True

# Delete the model using the Vertex fully qualified identifier for the model
try:
    if delete_model:
        client.projects().models().delete(name=model_id)
except Exception as e:
    print(e)

if delete_bucket and "BUCKET_NAME" in globals():
    ! gsutil rm -r gs://$BUCKET_NAME