In [None]:
# Copyright 2022 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# 使用Vertex AI Experiments跟踪、比较和管理实验

<table align="left">
  <td>
    <a href="https://colab.research.google.com/github/GoogleCloudPlatform/vertex-ai-samples/blob/main/notebooks/vertex_ai_experiments_classification.ipynb">
      <img src="https://cloud.google.com/ml-engine/images/colab-logo-32px.png" alt="Colab logo"> 在Colab中运行
    </a>
  </td>
  <td>
    <a href="https://github.com/GoogleCloudPlatform/vertex-ai-samples/blob/main/notebooks/vertex_ai_experiments_classification.ipynb">
      <img src="https://cloud.google.com/ml-engine/images/github-logo-32px.png" alt="GitHub logo">
      在GitHub上查看
    </a>
  </td>
  <td>
    <a href="https://console.cloud.google.com/vertex-ai/workbench/deploy-notebook?download_url=https://raw.githubusercontent.com/GoogleCloudPlatform/vertex-ai-samples/main/notebooks/vertex_ai_experiments_classification.ipynb">
      <img src="https://lh3.googleusercontent.com/UiNooY4LUgW_oTvpsNhPpQzsstV5W8F7rYgxgGBD85cWJoLmrOzhVs_ksK_vgx40SHs7jCqkTkCk=e14-rj-sc0xffffff-h130-w32" alt="Vertex AI logo">
      在Vertex AI工作台中打开
    </a>
  </td>                                                                                               
</table>

## 概述

在为问题开发模型时的目标是识别针对特定用例的最佳模型。为此，Vertex AI实验使您能够跟踪、分析、比较和搜索不同的ML框架（例如TensorFlow、PyTorch、scikit-learn）和训练环境。

Vertex AI实验使您能够跟踪

*   实验运行的步骤，例如前处理、训练
*   输入，例如算法、参数、数据集
*   这些步骤的输出，例如模型、检查点、指标

### 目标

在本教程中，您将学习如何使用Vertex AI实验来进行模型实验。

该教程使用以下 Google Cloud ML 服务和资源：

- Cloud 存储
- Vertex AI

执行的步骤包括：

1. 创建实验
2. 创建运行
3. 记录参数
4. 记录指标
5. 跟踪工件和执行
6. 跟踪Vertex AI流水线

### 数据集

该数据集是[心脏病数据集](https://archive.ics.uci.edu/ml/datasets/heart+Disease)。该数据库包含14个属性。

“goal”字段指的是患者是否患有心脏病。

### 费用

本教程使用 Google Cloud 的计费组件：

* Vertex AI
* 云存储

了解 [Vertex AI 价格](https://cloud.google.com/vertex-ai/pricing)，
以及 [云存储价格](https://cloud.google.com/storage/pricing)，
并使用 [价格计算器](https://cloud.google.com/products/calculator/)
根据您预计的使用量生成费用估算。

设置您的本地开发环境

**如果您使用的是Colab或Vertex AI Workbench笔记本**，您的环境已经满足运行此笔记本的所有要求。您可以跳过此步骤。

注意：此笔记本在以下环境中进行了测试：

* Python版本= 3.9

否则，请确保您的环境符合本笔记本的要求。
您需要以下内容：

* Google Cloud SDK

Google Cloud指南中 [设置Python开发环境](https://cloud.google.com/python/setup) 和 [Jupyter安装指南](https://jupyter.org/install) 提供了满足这些要求的详细说明。以下步骤提供了压缩的指令：

1. [安装和初始化Cloud SDK。](https://cloud.google.com/sdk/docs/)

1. [安装Python 3。](https://cloud.google.com/python/setup#installing_python)

1. [安装virtualenv](https://cloud.google.com/python/setup#installing_and_using_virtualenv) 并创建一个使用Python 3的虚拟环境。激活虚拟环境。

1. 要安装Jupyter，在终端中运行 `pip3 install jupyter`。

1. 要启动Jupyter，在终端中运行 `jupyter notebook`。

1. 在Jupyter Notebook仪表板中打开这个笔记本。

## 安装

安装以下所需的包以执行此笔记本。

In [None]:
import os

# The Vertex AI Workbench Notebook product has specific requirements
IS_WORKBENCH_NOTEBOOK = os.getenv("DL_ANACONDA_HOME")
IS_USER_MANAGED_WORKBENCH_NOTEBOOK = os.path.exists(
    "/opt/deeplearning/metadata/env_version"
)

# Vertex AI Notebook requires dependencies to be installed with '--user'
USER_FLAG = ""
if IS_WORKBENCH_NOTEBOOK:
    USER_FLAG = "--user"

! pip3 install --upgrade google-cloud-aiplatform kfp fsspec gcsfs {USER_FLAG} -q --no-warn-conflicts

重新启动内核

在安装额外的包之后，您需要重新启动笔记本内核，以便它可以找到这些包。

In [None]:
# Automatically restart kernel after installs
import os

if not os.getenv("IS_TESTING"):
    # Automatically restart kernel after installs
    import IPython

    app = IPython.Application.instance()
    app.kernel.do_shutdown(True)

## 准备开始之前

### 设置您的谷歌云项目

**无论您使用哪种笔记本环境，都需要完成以下步骤。**

1. [选择或创建一个谷歌云项目](https://console.cloud.google.com/cloud-resource-manager)。当您第一次创建账号时，您将获得$300的免费信用额度，用于支付计算/存储成本。

1. [确保为您的项目启用了计费](https://cloud.google.com/billing/docs/how-to/modify-project)。

1. [启用 Vertex AI API](https://console.cloud.google.com/flows/enableapi?apiid=aiplatform.googleapis.com)。

1. 如果您正在本地运行此笔记本，您需要安装[Cloud SDK](https://cloud.google.com/sdk)。

1. 在下面的单元格中输入您的项目ID。然后运行该单元格，确保Cloud SDK在本笔记本中的所有命令中使用正确的项目。

**注意**：Jupyter会将以`!`为前缀的行视为shell命令，并将以`$`为前缀的Python变量插入这些命令中。

#### 设置您的项目ID

**如果您不知道您的项目ID**，您可以使用`gcloud`命令来获取您的项目ID。

In [None]:
PROJECT_ID = "[your-project-id]"  # @param {type:"string"}

In [None]:
if PROJECT_ID == "" or PROJECT_ID is None or PROJECT_ID == "[your-project-id]":
    # Get your GCP project id from gcloud
    shell_output = ! gcloud config list --format 'value(core.project)' 2>/dev/null
    PROJECT_ID = shell_output[0]
    print("Project ID:", PROJECT_ID)

In [None]:
! gcloud config set project $PROJECT_ID

地区

您还可以更改“REGION”变量，该变量用于笔记本中的其他操作。以下是Vertex AI支持的地区。建议您选择距离您最近的地区。

- 美洲：`us-central1`
- 欧洲：`europe-west4`
- 亚太地区：`asia-east1`

您不能使用多地区存储桶来训练Vertex AI。并非所有地区都支持所有的Vertex AI服务。

了解更多有关[Vertex AI地区](https://cloud.google.com/vertex-ai/docs/general/locations)。

In [None]:
REGION = "[your-region]"  # @param {type: "string"}

if REGION == "[your-region]":
    REGION = "us-central1"

UUID

如果您正在进行现场教程，您可能正在使用共享测试帐户或项目。为了避免资源名称冲突，您可以为每个实例会话创建一个uuid，并将其附加到您在本教程中创建的资源名称上。

In [None]:
import random
import string


# Generate a uuid of a specifed length(default=8)
def generate_uuid(length: int = 8) -> str:
    return "".join(random.choices(string.ascii_lowercase + string.digits, k=length))


UUID = generate_uuid()

### 验证您的谷歌云帐号

**如果您正在使用Vertex AI Workbench笔记本**，您的环境已经得到了验证。

**如果您正在使用Colab**，请运行下面的单元格，并按照提示进行帐户oAuth验证。

**否则**，请按照以下步骤进行操作：

1. 在Cloud控制台中，转到[**创建服务帐号密钥**页面](https://console.cloud.google.com/apis/credentials/serviceaccountkey)。

2. 点击**创建服务帐号**。

3. 在**服务帐号名称**字段中输入一个名称，并点击**创建**。

4. 在**将此服务帐号授予对项目的访问权限**部分，点击**角色**下拉列表。在过滤框中输入“Vertex AI”，选择**Vertex AI 管理员**。在过滤框中输入“存储对象管理员”，选择**存储对象管理员**。

5. 点击*创建*。一个包含您密钥的JSON文件将下载到您的本地环境。

6. 在下面的单元格中将您的服务帐号密钥路径作为`GOOGLE_APPLICATION_CREDENTIALS`变量输入，并运行单元格。

In [None]:
# If you are running this notebook in Colab, run this cell and follow the
# instructions to authenticate your GCP account. This provides access to your
# Cloud Storage bucket and lets you submit training jobs and prediction
# requests.

import os
import sys

# If on Vertex AI Workbench, then don't execute this code
IS_COLAB = "google.colab" in sys.modules
if not os.path.exists("/opt/deeplearning/metadata/env_version") and not os.getenv(
    "DL_ANACONDA_HOME"
):
    if "google.colab" in sys.modules:
        from google.colab import auth as google_auth

        google_auth.authenticate_user()

    # If you are running this notebook locally, replace the string below with the
    # path to your service account key and run this cell to authenticate your GCP
    # account.
    elif not os.getenv("IS_TESTING"):
        %env GOOGLE_APPLICATION_CREDENTIALS '[your-service-account-key-path]'

获取您的项目编号

现在项目ID已经设置，您可以获得相应的项目编号。

In [None]:
shell_output = ! gcloud projects list --filter="PROJECT_ID:'{PROJECT_ID}'" --format='value(PROJECT_NUMBER)'
PROJECT_NUMBER = shell_output[0]
print("Project Number:", PROJECT_NUMBER)

### 创建一个云存储桶

**无论您使用什么笔记本环境，以下步骤都是必需的。**

在下面设置您的云存储桶的名称。它必须在所有云存储桶中保持唯一。

In [None]:
BUCKET_NAME = "[your-bucket-name]"  # @param {type:"string"}
BUCKET_URI = f"gs://{BUCKET_NAME}"

In [None]:
if BUCKET_NAME == "" or BUCKET_NAME is None or BUCKET_NAME == "[your-bucket-name]":
    BUCKET_NAME = PROJECT_ID + "-aip-" + UUID
    BUCKET_URI = f"gs://{BUCKET_NAME}"

只有在您的存储桶尚未存在的情况下才运行以下单元格以创建您的云存储存储桶。

In [None]:
! gsutil mb -l $REGION -p $PROJECT_ID $BUCKET_URI

最后，通过检查云存储桶中的内容来验证对其的访问。

In [None]:
! gsutil ls -al $BUCKET_URI

服务账户

如果你不想使用项目的计算引擎服务账户，请将`SERVICE_ACCOUNT`设置为另一个服务账户ID。

In [None]:
SERVICE_ACCOUNT = "[your-service-account]"  # @param {type:"string"}

In [None]:
if (
    SERVICE_ACCOUNT == ""
    or SERVICE_ACCOUNT is None
    or SERVICE_ACCOUNT == "[your-service-account]"
):
    # Get your service account from gcloud
    if not IS_COLAB:
        shell_output = !gcloud auth list 2>/dev/null
        SERVICE_ACCOUNT = shell_output[2].replace("*", "").strip()

    else:  # IS_COLAB:
        shell_output = ! gcloud projects describe  $PROJECT_ID
        project_number = shell_output[-1].split(":")[1].strip().replace("'", "")
        SERVICE_ACCOUNT = f"{project_number}-compute@developer.gserviceaccount.com"

    print("Service Account:", SERVICE_ACCOUNT)

### 设置服务账号访问权限

运行以下命令，将您的服务账号访问权限授予之前创建的存储桶中的{TODO; 即读取和写入管道工件}。您只需要为每个服务账号运行此步骤一次。

In [None]:
! gsutil iam ch serviceAccount:{SERVICE_ACCOUNT}:roles/storage.objectCreator $BUCKET_URI

! gsutil iam ch serviceAccount:{SERVICE_ACCOUNT}:roles/storage.objectViewer $BUCKET_URI

### 导入库

In [None]:
import kfp.v2.compiler as compiler
import kfp.v2.dsl as dsl
import pandas as pd
import tensorflow as tf
from google.cloud import aiplatform as vertex_ai
from kfp.v2.dsl import Metrics, Model, Output, component
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.layers import IntegerLookup, Normalization, StringLookup

定义常数

In [None]:
EXPERIMENT_NAME = "[your-experiment-name]"  # @param {type:"string"}

In [None]:
if EXPERIMENT_NAME == "[your-experiment-name]" or EXPERIMENT_NAME is None:
    EXPERIMENT_NAME = "my-experiment-" + UUID

### 帮助函数

以下是一些帮助函数：

- `dataframe_to_dataset`用于将Pandas数据框转换为tf.data.Dataset
- `encode_numerical_feature`用于创建一个标准化器并编码一个数值特征。
- `encode_categorical_feature`用于对分类特征进行编码。

In [None]:
def dataframe_to_dataset(dataframe):
    """
    Convert a Pandas dataframe to a tf.data.Dataset.
    Args:
        dataframe: Pandas dataframe
    Returns:
        tf.data.Dataset
    """
    dataframe = dataframe.copy()
    labels = dataframe.pop("target")
    ds = tf.data.Dataset.from_tensor_slices((dict(dataframe), labels))
    return ds


def encode_numerical_feature(feature, name, dataset):
    """
    Create a normalizer and encode a numerical feature.
    Args:
        feature: the feature to encode
        name: the name of the feature
        dataset: tf.data.Dataset
    Returns:
        the encoded feature
    """
    # Create a Normalization layer for our feature
    normalizer = Normalization()

    # Prepare a Dataset that only yields our feature
    feature_ds = dataset.map(lambda x, y: x[name])
    feature_ds = feature_ds.map(lambda x: tf.expand_dims(x, -1))

    # Learn the statistics of the data
    normalizer.adapt(feature_ds)

    # Normalize the input feature
    encoded_feature = normalizer(feature)
    return encoded_feature


def encode_categorical_feature(feature, name, dataset, is_string):
    """
    Encode a categorical feature.
    Args:
        feature: the feature to encode
        name: the name of the feature
        dataset: tf.data.Dataset
        is_string: whether the feature is a string
    Returns:
        the encoded feature
    """
    lookup_class = StringLookup if is_string else IntegerLookup
    # Create a lookup layer which will turn strings into integer indices
    lookup = lookup_class(output_mode="binary")

    # Prepare a Dataset that only yields our feature
    feature_ds = dataset.map(lambda x, y: x[name])
    feature_ds = feature_ds.map(lambda x: tf.expand_dims(x, -1))

    # Learn the set of possible string values and assign them a fixed integer index
    lookup.adapt(feature_ds)

    # Turn the string input into integer indices
    encoded_feature = lookup(feature)
    return encoded_feature

### 初始化 Python 的 Vertex AI SDK

为您的项目和相应的存储桶初始化 Python 的 Vertex AI SDK。

In [None]:
vertex_ai.init(project=PROJECT_ID, location=REGION, staging_bucket=BUCKET_URI)

使用Vertex AI TensorBoard创建TensorBoard实例

您可以通过首先创建一个Vertex AI TensorBoard实例来上传您的TensorBoard日志。

请注意，如果您还没有激活，Vertex AI TensorBoard将向每个唯一活动用户收取每月300美元的费用。

了解有关[TensorBoard概述](https://cloud.google.com/vertex-ai/docs/experiments/tensorboard-overview)的更多信息。

In [None]:
vertex_ai_tb = vertex_ai.Tensorboard.create()

In [None]:
vertex_ai.init(experiment=EXPERIMENT_NAME, experiment_tensorboard=vertex_ai_tb)

## 使用 Vertex AI 实验进行模型实验和形式化

Vertex AI 使用户能够跟踪实验运行的步骤（例如，预处理、训练）以及跟踪这些步骤的输入（例如，算法、参数、数据集）和输出（例如，模型、检查点、度量）。 

为了更好地了解参数和度量是如何存储和组织的，以下概念进行了解释：

1. **实验** 描述了将你的运行和创建的工件组合成一个逻辑会话的上下文。例如，在这本笔记本中，你创建一个实验并记录数据到该实验中。

2. **运行** 代表您在执行实验时执行的单个路径/途径。一个运行包括您用作输入或输出的工件以及您在此执行中使用的参数。一个实验可以包含多个运行。

您可以使用 Vertex AI SDK for Python 来跟踪参数和度量，对每个实验在多个实验运行过程中在本地训练的模型。

开始一个实验并运行实验。

您定义了多个实验配置，运行实验并在Vertex AI实验中进行跟踪。

In [None]:
RUN_NAME = "run-1"
my_run = vertex_ai.start_run(RUN_NAME)

准备数据进行模型训练

跟踪数据集和数据配置

In [None]:
file_url = "http://storage.googleapis.com/download.tensorflow.org/data/heart.csv"
dataset_artifact = vertex_ai.Artifact.create(
    schema_title="system.Dataset",
    resource_id=f"{EXPERIMENT_NAME}-heart-data",
    uri=file_url,
    display_name="heart data",
)

开始实验配置

In [None]:
params = dict(
    dataset_uri=dataset_artifact.uri,
    dataset_fraction_split=0.2,
    dataset_batch=32,
    random_state=1337,
)

vertex_ai.log_params(params)
vertex_ai.get_experiment_df()

读取数据集

In [None]:
dataframe = pd.read_csv(file_url)
dataframe.head()

创建训练、测试和验证数据集，并在Vertex ML Metadata中跟踪它们的数据谱系。

运行一次执行来准备训练数据，并将结果数据集作为实验谱系的一部分进行跟踪。

In [None]:
with vertex_ai.start_execution(
    schema_title="system.ContainerExecution", display_name=f"{RUN_NAME} data split"
) as exc:
    exc.assign_input_artifacts([dataset_artifact])

    # Train, test and validation split
    val_dataframe = dataframe.sample(
        frac=params["dataset_fraction_split"], random_state=params["random_state"]
    )
    test_dataframe = val_dataframe.sample(
        frac=params["dataset_fraction_split"], random_state=params["random_state"]
    )
    train_dataframe = dataframe.drop(val_dataframe.index)

    train_uri = f"{BUCKET_URI}/{EXPERIMENT_NAME}/{RUN_NAME}/data/heart_train.csv"
    test_uri = f"{BUCKET_URI}/{EXPERIMENT_NAME}/{RUN_NAME}/data/heart_test.csv"
    val_uri = f"{BUCKET_URI}/{EXPERIMENT_NAME}/{RUN_NAME}/data/heart_val.csv"

    # Materialize data
    train_dataframe.to_csv(train_uri)
    val_dataframe.to_csv(val_uri)
    test_dataframe.to_csv(val_uri)

    # Create Vertex AI Datasets
    train_metadata = vertex_ai.Artifact.create(
        schema_title="system.Dataset", uri=train_uri, display_name="train split"
    )
    val_metadata = vertex_ai.Artifact.create(
        schema_title="system.Dataset", uri=val_uri, display_name="val split"
    )
    test_metadata = vertex_ai.Artifact.create(
        schema_title="system.Dataset", uri=test_uri, display_name="test split"
    )

    exc.assign_output_artifacts([train_metadata, val_metadata, test_metadata])

#### 特征工程

##### 将数据框转换为TF数据集

In [None]:
train_ds = (
    dataframe_to_dataset(train_dataframe)
    .batch(params["dataset_batch"])
    .shuffle(buffer_size=len(train_dataframe))
)
val_ds = dataframe_to_dataset(val_dataframe).batch(params["dataset_batch"])
test_ds = dataframe_to_dataset(test_dataframe).batch(params["dataset_batch"])

生成特征

此部分中执行的步骤包括分类特征和整数特征的编码。

In [None]:
# Categorical features encoded as integers
sex = keras.Input(shape=(1,), name="sex", dtype="int64")
cp = keras.Input(shape=(1,), name="cp", dtype="int64")
fbs = keras.Input(shape=(1,), name="fbs", dtype="int64")
restecg = keras.Input(shape=(1,), name="restecg", dtype="int64")
exang = keras.Input(shape=(1,), name="exang", dtype="int64")
ca = keras.Input(shape=(1,), name="ca", dtype="int64")

# Categorical feature encoded as string
thal = keras.Input(shape=(1,), name="thal", dtype="string")

# Numerical features
age = keras.Input(shape=(1,), name="age")
trestbps = keras.Input(shape=(1,), name="trestbps")
chol = keras.Input(shape=(1,), name="chol")
thalach = keras.Input(shape=(1,), name="thalach")
oldpeak = keras.Input(shape=(1,), name="oldpeak")
slope = keras.Input(shape=(1,), name="slope")

all_inputs = [
    sex,
    cp,
    fbs,
    restecg,
    exang,
    ca,
    thal,
    age,
    trestbps,
    chol,
    thalach,
    oldpeak,
    slope,
]

# Integer categorical features
sex_encoded = encode_categorical_feature(sex, "sex", train_ds, False)
cp_encoded = encode_categorical_feature(cp, "cp", train_ds, False)
fbs_encoded = encode_categorical_feature(fbs, "fbs", train_ds, False)
restecg_encoded = encode_categorical_feature(restecg, "restecg", train_ds, False)
exang_encoded = encode_categorical_feature(exang, "exang", train_ds, False)
ca_encoded = encode_categorical_feature(ca, "ca", train_ds, False)

# String categorical features
thal_encoded = encode_categorical_feature(thal, "thal", train_ds, True)

# Numerical features
age_encoded = encode_numerical_feature(age, "age", train_ds)
trestbps_encoded = encode_numerical_feature(trestbps, "trestbps", train_ds)
chol_encoded = encode_numerical_feature(chol, "chol", train_ds)
thalach_encoded = encode_numerical_feature(thalach, "thalach", train_ds)
oldpeak_encoded = encode_numerical_feature(oldpeak, "oldpeak", train_ds)
slope_encoded = encode_numerical_feature(slope, "slope", train_ds)

all_features = layers.concatenate(
    [
        sex_encoded,
        cp_encoded,
        fbs_encoded,
        restecg_encoded,
        exang_encoded,
        slope_encoded,
        ca_encoded,
        thal_encoded,
        age_encoded,
        trestbps_encoded,
        chol_encoded,
        thalach_encoded,
        oldpeak_encoded,
    ]
)

#### 构建并训练模型

###### 跟踪模型配置

In [None]:
params.update(n_units=32, activation="relu", dropout_rate=0.5)

vertex_ai.log_params(params)
vertex_ai.get_experiment_df()

建立模型

In [None]:
x = layers.Dense(params["n_units"], activation=params["activation"])(all_features)
x = layers.Dropout(params["dropout_rate"])(x)
output = layers.Dense(1, activation="sigmoid")(x)
model = keras.Model(all_inputs, output)
model.compile("adam", "binary_crossentropy", metrics=["accuracy"])

训练模型并跟踪指标

In [None]:
with vertex_ai.start_execution(
    schema_title="system.ContainerExecution", display_name=f"{RUN_NAME} train"
) as exc:

    exc.assign_input_artifacts([train_metadata, val_metadata, test_metadata])

    params.update(epochs=10)
    vertex_ai.log_params(params)
    history = model.fit(train_ds, epochs=params["epochs"], validation_data=val_ds)
    for i in range(history.params["epochs"]):
        vertex_ai.log_time_series_metrics(
            dict(
                train_loss=history.history["loss"][i],
                train_accuracy=history.history["accuracy"][i],
                val_loss=history.history["val_loss"][i],
                val_accuracy=history.history["val_accuracy"][i],
            )
        )

    metrics = model.evaluate(test_ds, return_dict=True)
    vertex_ai.log_metrics(
        dict(
            loss=metrics["loss"],
            accurancy=metrics["accuracy"],
        )
    )

    model_uri = f"{BUCKET_URI}/{EXPERIMENT_NAME}/{RUN_NAME}/model/"
    model.save(model_uri)

    model_metadata = vertex_ai.Artifact.create(
        schema_title="system.Model", uri=model_uri, display_name="trained heart model"
    )

    exc.assign_output_artifacts([model_metadata])

可视化实验结果

绘制指标

In [None]:
vertex_ai.get_experiment_df()

每个时期绘制指标

In [None]:
my_run.get_time_series_data_frame()

###### 在云控制台中可视化实验

In [None]:
print("Vertex AI Experiments:")
print(
    f"https://console.cloud.google.com/ai/platform/experiments/experiments?folder=&organizationId=&project={PROJECT_ID}"
)

In [None]:
vertex_ai.end_run()

将您的实验形式化为一个 Vertex AI 流水线

##### 创建自定义组件

下面的部分创建了一个自定义的训练器组件，将实验代码包装在管道组件中。

In [None]:
@component(packages_to_install=["tensorflow", "pandas"])
def tabular_trainer(
    dataset_uri: str,
    dataset_fraction_split: float,
    dataset_batch: int,
    random_state: int,
    n_units: int,
    activation: str,
    dropout_rate: float,
    epochs: int,
    metrics: Output[Metrics],
    model_metadata: Output[Model],
):

    import pandas as pd
    import tensorflow as tf
    from tensorflow import keras
    from tensorflow.keras import layers
    from tensorflow.keras.layers import (IntegerLookup, Normalization,
                                         StringLookup)

    dataframe = pd.read_csv(dataset_uri)
    dataframe.head()

    val_dataframe = dataframe.sample(
        frac=dataset_fraction_split, random_state=random_state
    )
    test_dataframe = val_dataframe.sample(
        frac=dataset_fraction_split, random_state=random_state
    )
    train_dataframe = dataframe.drop(val_dataframe.index)

    def dataframe_to_dataset(dataframe):
        """
        Convert a Pandas dataframe to a tf.data.Dataset.
        Args:
            dataframe: Pandas dataframe
        Returns:
            tf.data.Dataset
        """
        dataframe = dataframe.copy()
        labels = dataframe.pop("target")
        ds = tf.data.Dataset.from_tensor_slices((dict(dataframe), labels))
        return ds

    train_ds = (
        dataframe_to_dataset(train_dataframe)
        .batch(dataset_batch)
        .shuffle(buffer_size=len(train_dataframe))
    )
    val_ds = dataframe_to_dataset(val_dataframe).batch(dataset_batch)
    test_ds = dataframe_to_dataset(test_dataframe).batch(dataset_batch)

    def encode_numerical_feature(feature, name, dataset):
        """
        Create a normalizer and encode a numerical feature.
        Args:
          feature: the feature to encode
          name: the name of the feature
          dataset: tf.data.Dataset
        Returns:
          the encoded feature
        """
        # Create a Normalization layer for our feature
        normalizer = Normalization()

        # Prepare a Dataset that only yields our feature
        feature_ds = dataset.map(lambda x, y: x[name])
        feature_ds = feature_ds.map(lambda x: tf.expand_dims(x, -1))

        # Learn the statistics of the data
        normalizer.adapt(feature_ds)

        # Normalize the input feature
        encoded_feature = normalizer(feature)
        return encoded_feature

    def encode_categorical_feature(feature, name, dataset, is_string):
        """
        Encode a categorical feature.
        Args:
          feature: the feature to encode
          name: the name of the feature
          dataset: tf.data.Dataset
          is_string: whether the feature is a string
        Returns:
          the encoded feature
        """
        lookup_class = StringLookup if is_string else IntegerLookup
        # Create a lookup layer which will turn strings into integer indices
        lookup = lookup_class(output_mode="binary")

        # Prepare a Dataset that only yields our feature
        feature_ds = dataset.map(lambda x, y: x[name])
        feature_ds = feature_ds.map(lambda x: tf.expand_dims(x, -1))

        # Learn the set of possible string values and assign them a fixed integer index
        lookup.adapt(feature_ds)

        # Turn the string input into integer indices
        encoded_feature = lookup(feature)
        return encoded_feature

    # Categorical features encoded as integers
    sex = keras.Input(shape=(1,), name="sex", dtype="int64")
    cp = keras.Input(shape=(1,), name="cp", dtype="int64")
    fbs = keras.Input(shape=(1,), name="fbs", dtype="int64")
    restecg = keras.Input(shape=(1,), name="restecg", dtype="int64")
    exang = keras.Input(shape=(1,), name="exang", dtype="int64")
    ca = keras.Input(shape=(1,), name="ca", dtype="int64")

    # Categorical feature encoded as string
    thal = keras.Input(shape=(1,), name="thal", dtype="string")

    # Numerical features
    age = keras.Input(shape=(1,), name="age")
    trestbps = keras.Input(shape=(1,), name="trestbps")
    chol = keras.Input(shape=(1,), name="chol")
    thalach = keras.Input(shape=(1,), name="thalach")
    oldpeak = keras.Input(shape=(1,), name="oldpeak")
    slope = keras.Input(shape=(1,), name="slope")

    all_inputs = [
        sex,
        cp,
        fbs,
        restecg,
        exang,
        ca,
        thal,
        age,
        trestbps,
        chol,
        thalach,
        oldpeak,
        slope,
    ]

    # Integer categorical features
    sex_encoded = encode_categorical_feature(sex, "sex", train_ds, False)
    cp_encoded = encode_categorical_feature(cp, "cp", train_ds, False)
    fbs_encoded = encode_categorical_feature(fbs, "fbs", train_ds, False)
    restecg_encoded = encode_categorical_feature(restecg, "restecg", train_ds, False)
    exang_encoded = encode_categorical_feature(exang, "exang", train_ds, False)
    ca_encoded = encode_categorical_feature(ca, "ca", train_ds, False)

    # String categorical features
    thal_encoded = encode_categorical_feature(thal, "thal", train_ds, True)

    # Numerical features
    age_encoded = encode_numerical_feature(age, "age", train_ds)
    trestbps_encoded = encode_numerical_feature(trestbps, "trestbps", train_ds)
    chol_encoded = encode_numerical_feature(chol, "chol", train_ds)
    thalach_encoded = encode_numerical_feature(thalach, "thalach", train_ds)
    oldpeak_encoded = encode_numerical_feature(oldpeak, "oldpeak", train_ds)
    slope_encoded = encode_numerical_feature(slope, "slope", train_ds)

    all_features = layers.concatenate(
        [
            sex_encoded,
            cp_encoded,
            fbs_encoded,
            restecg_encoded,
            exang_encoded,
            slope_encoded,
            ca_encoded,
            thal_encoded,
            age_encoded,
            trestbps_encoded,
            chol_encoded,
            thalach_encoded,
            oldpeak_encoded,
        ]
    )

    x = layers.Dense(n_units, activation=activation)(all_features)
    x = layers.Dropout(dropout_rate)(x)
    output = layers.Dense(1, activation="sigmoid")(x)
    model = keras.Model(all_inputs, output)
    model.compile("adam", "binary_crossentropy", metrics=["accuracy"])

    model.fit(train_ds, epochs=epochs, validation_data=val_ds)

    m = model.evaluate(test_ds, return_dict=True)
    metrics.metadata.update(
        dict(
            loss=m["loss"],
            accurancy=m["accuracy"],
        )
    )

    model.save(model_metadata.uri)

构建机器学习管道

构建和编译包括训练组件在内的管道

In [None]:
@dsl.pipeline(name="simple-tabular-pipeline")
def pipeline(
    dataset_uri: str,
    dataset_fraction_split: float,
    dataset_batch: int,
    random_state: int,
    n_units: int,
    activation: str,
    dropout_rate: float,
    epochs: int,
):

    tabular_trainer(
        dataset_uri=dataset_uri,
        dataset_fraction_split=dataset_fraction_split,
        dataset_batch=dataset_batch,
        random_state=random_state,
        n_units=n_units,
        activation=activation,
        dropout_rate=dropout_rate,
        epochs=epochs,
    )


compiler.Compiler().compile(pipeline_func=pipeline, package_path="pipeline.json")

提交实验流水线运行

In [None]:
job = vertex_ai.PipelineJob(
    display_name="my pipeline run",
    template_path="pipeline.json",
    job_id=f"pipeline-{RUN_NAME}",
    pipeline_root=BUCKET_URI,
    parameter_values={**params},
)

job.submit(experiment=EXPERIMENT_NAME)

检查管道实验状态

In [None]:
vertex_ai.get_experiment_df()

In [None]:
job.wait()

In [None]:
vertex_ai.get_experiment_df()

在云控制台中进行可视化实验

In [None]:
print("Vertex AI Experiments:")
print(
    f"https://console.cloud.google.com/ai/platform/experiments/experiments?folder=&organizationId=&project={PROJECT_ID}"
)

清理工作

要清理此项目中使用的所有Google Cloud资源，您可以删除用于教程的[Google Cloud项目](https://cloud.google.com/resource-manager/docs/creating-managing-projects#shutting_down_projects)。

否则，您可以删除此教程中创建的各个资源。

In [None]:
# Delete pipeline
job.delete()

# Delete experiment
exp = vertex_ai.Experiment(EXPERIMENT_NAME)
exp.delete(delete_backing_tensorboard_runs=True)

# Delete Tensorboard
vertex_ai_tb.delete()

# Delete Artifacts
artifacts_list = vertex_ai.Artifact.list()
for artifact in artifacts_list:
    vertex_ai.Artifact.delete(artifact)

# Delete Contexts
context_list = vertex_ai.Context.list()
for context in context_list:
    vertex_ai.Context.delete(context)


# Delete Cloud Storage objects that were created
delete_bucket = False

if delete_bucket or os.getenv("IS_TESTING"):
    ! gsutil rm -rf {BUCKET_URI}