In [None]:
# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Vertex AI自定义模型在线预测的模型监控

<table align="left">
  <td style="text-align: center">
    <a href="https://colab.research.google.com/github/GoogleCloudPlatform/vertex-ai-samples/blob/main/notebooks/official/model_monitoring_v2/model_monitoring_for_custom_model_online_prediction.ipynb">
      <img src="https://cloud.google.com/ml-engine/images/colab-logo-32px.png" alt="Google Colaboratory logo"><br> 在Colab中打开
    </a>
  </td>
  <td style="text-align: center">
    <a href="https://console.cloud.google.com/vertex-ai/colab/import/https:%2F%2Fraw.githubusercontent.com%2FGoogleCloudPlatform%2Fvertex-ai-samples%2Fmain%2Fnotebooks%2Fofficial%2model_monitoring_v2%2model_monitoring_for_custom_model_online_prediction.ipynb">
      <img width="32px" src="https://lh3.googleusercontent.com/JmcxdQi-qOpctIvWKgPtrzZdJJK-J3sWE1RsfjZNwshCFgE_9fULcNpuXYTilIR2hjwN" alt="Google Cloud Colab Enterprise logo"><br> 在Colab Enterprise中打开
    </a>
  </td>    
  <td style="text-align: center">
    <a href="https://console.cloud.google.com/vertex-ai/workbench/deploy-notebook?download_url=https://raw.githubusercontent.com/GoogleCloudPlatform/vertex-ai-samples/blob/main/notebooks/official/model_monitoring_v2/model_monitoring_for_custom_model_online_prediction.ipynb">
      <img src="https://lh3.googleusercontent.com/UiNooY4LUgW_oTvpsNhPpQzsstV5W8F7rYgxgGBD85cWJoLmrOzhVs_ksK_vgx40SHs7jCqkTkCk=e14-rj-sc0xffffff-h130-w32" alt="Vertex AI logo"><br> 在工作台中打开
    </a>
  </td>
  <td style="text-align: center">
    <a href="https://github.com/GoogleCloudPlatform/vertex-ai-samples/blob/main/notebooks/official/model_monitoring_v2/model_monitoring_for_custom_model_online_prediction.ipynb">
      <img src="https://cloud.google.com/ml-engine/images/github-logo-32px.png" alt="GitHub logo"><br> 在GitHub上查看
    </a>
  </td>
</table>

# 概述

本教程演示了如何使用 Python 的 Vertex AI SDK 为您的模型设置 Vertex AI Model Monitoring V2。

### 目标

在本教程中，您将完成以下步骤：

- 将自定义模型上传到 Vertex AI 模型注册表。
- 将模型部署到启用请求-响应日志记录的 Vertex AI 终端点。
- 生成一些在线预测流量。
- 创建一个模型监视器。
- 运行一项按需模型监视作业，以分析在线预测流量和训练数据集之间的数据漂移。
- 运行一项按需模型监视作业，以分析在线预测流量和GCS基线数据集之间的特征归因漂移。
- 创建一个时间表，持续运行模型监视作业，以分析在线预测流量和训练数据集之间的数据漂移。


### 成本

在公共预览期间，Vertex AI 模型监视 v2 是免费的，但您仍将收到以下 Google Cloud 服务的账单：

* [BigQuery](https://cloud.google.com/bigquery/pricing)
* [Cloud Storage](https://cloud.google.com/storage/pricing)
* [Vertex AI 在线预测](https://cloud.google.com/vertex-ai/pricing#prediction-prices)
* [Vertex AI 批量解释作业](https://cloud.google.com/vertex-ai/pricing#prediction-prices)（如果您运行特征归因漂移示例）。

## 开始使用

安装 Vertex AI SDK 和其他必需的软件包。

In [None]:
! pip3 install --upgrade --quiet \
    google-cloud-bigquery \
    pandas \
    pandas_gbq \
    pyarrow \
    tensorflow_data_validation[visualization] \
    google-cloud-aiplatform

检查 google-cloud-aiplatform 的版本是否为 1.51.0 或更高版本。

In [None]:
from google.cloud import aiplatform

aiplatform.__version__

重新启动运行时（仅适用于Colab）

要使用新安装的软件包，您必须在Google Colab上重新启动运行时。

In [None]:
import sys

if "google.colab" in sys.modules:

    import IPython

    app = IPython.Application.instance()
    app.kernel.do_shutdown(True)

<div class="alert alert-block alert-warning"> <b>⚠️ 内核将重新启动。在继续下一步之前，请等待它完成。⚠️</b> </div>

### 在Google Colab上验证您的笔记本环境。

In [None]:
import sys

if "google.colab" in sys.modules:

    from google.colab import auth

    auth.authenticate_user()

### 设置Google Cloud项目信息并初始化Vertex AI SDK

要开始使用Vertex AI，您必须拥有一个现有的Google Cloud项目并[启用Vertex AI API](https://console.cloud.google.com/flows/enableapi?apiid=aiplatform.googleapis.com)。了解更多关于[设置项目和开发环境](https://cloud.google.com/vertex-ai/docs/start/cloud-environment)的信息。

In [None]:
PROJECT_ID = "[your-project-id]"  # @param {type:"string"}
LOCATION = "us-central1"  # @param {type:"string"}


import os

import vertexai

! gcloud config set project $PROJECT_ID
os.environ["GOOGLE_CLOUD_PROJECT"] = PROJECT_ID
! gcloud config set ai/region $LOCATION

vertexai.init(project=PROJECT_ID, location=LOCATION)

开始模型监测教程##

### 步骤1：创建一个云存储桶

创建一个云存储桶来存储中间产物，如数据集。

In [None]:
# Create a Cloud Storage bucket
BUCKET_URI = f"gs://your-bucket-name-{PROJECT_ID}-unique"  # @param {type:"string"}

只有当您的桶不存在时才运行以下单元格以创建您的云存储桶。

In [None]:
! gsutil mb -l {LOCATION} -p {PROJECT_ID} {BUCKET_URI}

### 步骤2：在Vertex AI模型注册表中准备一个模型

您可以在Vertex AI模型注册表中注册一个模型及其相关文件，使您能够执行在线提供或批量预测。或者，您可以注册一个引用/占位模型，其中仅包含该模型的名称。
在这个笔记本中，您将注册一个带有文件的模型，因为您将运行一个批量预测作业。

In [None]:
import google.cloud.aiplatform as aiplatform

MODEL_PATH = "gs://mco-mm/churn"
MODEL_NAME = "churn"
IMAGE = "us-docker.pkg.dev/cloud-aiplatform/prediction/tf2-cpu.2-5:latest"

model = aiplatform.Model.upload(
    display_name=MODEL_NAME,
    artifact_uri=MODEL_PATH,
    serving_container_image_uri=IMAGE,
    sync=True,
)

MODEL_ID = model.resource_name.split("/")[-1]

### 步骤 3：部署模型到启用请求-响应日志记录的 Vertex AI 端点

创建一个启用日志记录的终端节点，并部署模型到这个终端节点。

In [None]:
# Create an endpoint with logging enabled. Specify the logging sampling rate and BigQuery destination.
import pandas as pd

TIMESTAMP = pd.Timestamp.utcnow().strftime("%Y%m%d%H%M%S")
ENDPOINT_DISPLAY_NAME = f"churn_endpoint_{TIMESTAMP}"
BQ_LOGGING_DATASET = f"churn_production_{TIMESTAMP}"
BQ_LOGGING_TABLE = f"bq://{PROJECT_ID}.{BQ_LOGGING_DATASET}.req_resp"

endpoint = aiplatform.Endpoint.create(
    display_name=ENDPOINT_DISPLAY_NAME,
    enable_request_response_logging=True,
    request_response_logging_sampling_rate=1.0,
    request_response_logging_bq_destination_table=BQ_LOGGING_TABLE,
)

# Deploy model to this endpoint
endpoint.deploy(
    model=model,
    traffic_percentage=100,
    min_replica_count=1,
    max_replica_count=1,
    machine_type="n1-standard-4",
)

运行预测测试

In [None]:
DEFAULT_INPUT = {
    "cnt_ad_reward": 0,
    "cnt_challenge_a_friend": 0,
    "cnt_completed_5_levels": 1,
    "cnt_level_complete_quickplay": 3,
    "cnt_level_end_quickplay": 5,
    "cnt_level_reset_quickplay": 2,
    "cnt_level_start_quickplay": 6,
    "cnt_post_score": 34,
    "cnt_spend_virtual_currency": 0,
    "cnt_use_extra_steps": 0,
    "cnt_user_engagement": 120,
    "country": "Denmark",
    "dayofweek": 3,
    "julianday": 254,
    "language": "da-dk",
    "month": 9,
    "operating_system": "IOS",
    "user_pseudo_id": "104B0770BAE16E8B53DF330C95881893",
}

In [None]:
import pprint as pp

import matplotlib.pyplot as plt

try:
    resp = endpoint.predict([DEFAULT_INPUT])
    for i in resp.predictions:
        vals = i["churned_values"]
        probs = i["churned_probs"]
    for i in range(len(vals)):
        print(vals[i], probs[i])
    plt.pie(probs, labels=vals)
    pp.pprint(resp)
except Exception as ex:
    print("prediction request failed", ex)

发送一些在线预测请求到端点

In [None]:
import sys
import time

from google.cloud import bigquery

# Here we just use subset of the training dataset as prediction data
DATASET_BQ_URI = "bq://mco-mm.bqmlga4.train"
GROUND_TRUTH = "churned"

bqclient = bigquery.Client(project=PROJECT_ID)
table = bigquery.TableReference.from_string(DATASET_BQ_URI[5:])

rows = bqclient.list_rows(table, max_results=1000)
count = 0
for row in rows:
    instance = {}
    for key, value in row.items():
        if key == GROUND_TRUTH:
            continue
        if value is None:
            value = ""
        instance[key] = value
    endpoint.predict(instances=[instance])
    # Print progress
    if count % 100 == 0:
        print(f"Sent: {count} requests")
    count = count + 1
    time.sleep(0.01)

请检查终端点日志表。

In [None]:
import pandas as pd

query_string = (
    f"SELECT * FROM `{BQ_LOGGING_TABLE[5:]}` ORDER BY logging_time DESC LIMIT 10"
)
pd.read_gbq(query_string, project_id=PROJECT_ID)

### 第四步：创建模型监视器

定义模型监控模式

监控模式是模型监视器的必需配置。模式包括输入特征名称、预测输出和标签（如果可用），以及它们各自的数据类型。

**注意：模式仅对AutoML表（回归/分类）是可选的，因为当可用时将会自动获取（如果模型监控无法检索到模式，则您将需要提供自己的模式）。**

In [None]:
from vertexai.resources.preview import ml_monitoring

MODEL_MONITORING_SCHEMA = ml_monitoring.spec.ModelMonitoringSchema(
    feature_fields=[
        ml_monitoring.spec.FieldSchema(name="user_pseudo_id", data_type="string"),
        ml_monitoring.spec.FieldSchema(name="country", data_type="string"),
        ml_monitoring.spec.FieldSchema(name="operating_system", data_type="string"),
        ml_monitoring.spec.FieldSchema(name="cnt_user_engagement", data_type="integer"),
        ml_monitoring.spec.FieldSchema(
            name="cnt_level_start_quickplay", data_type="integer"
        ),
        ml_monitoring.spec.FieldSchema(
            name="cnt_level_end_quickplay", data_type="integer"
        ),
        ml_monitoring.spec.FieldSchema(
            name="cnt_level_complete_quickplay", data_type="integer"
        ),
        ml_monitoring.spec.FieldSchema(
            name="cnt_level_reset_quickplay", data_type="integer"
        ),
        ml_monitoring.spec.FieldSchema(name="cnt_post_score", data_type="integer"),
        ml_monitoring.spec.FieldSchema(
            name="cnt_spend_virtual_currency", data_type="integer"
        ),
        ml_monitoring.spec.FieldSchema(name="cnt_ad_reward", data_type="integer"),
        ml_monitoring.spec.FieldSchema(
            name="cnt_challenge_a_friend", data_type="integer"
        ),
        ml_monitoring.spec.FieldSchema(
            name="cnt_completed_5_levels", data_type="integer"
        ),
        ml_monitoring.spec.FieldSchema(name="cnt_use_extra_steps", data_type="integer"),
        ml_monitoring.spec.FieldSchema(name="month", data_type="categorical"),
        ml_monitoring.spec.FieldSchema(name="julianday", data_type="integer"),
        ml_monitoring.spec.FieldSchema(name="dayofweek", data_type="integer"),
    ],
    ground_truth_fields=[
        ml_monitoring.spec.FieldSchema(name="churned", data_type="categorical")
    ],
    prediction_fields=[
        ml_monitoring.spec.FieldSchema(
            name="predicted_churned", data_type="categorical"
        )
    ],
)

####（可选）定义训练数据集

训练数据集可以作为基线数据集，用于分析生产环境中的数据漂移。您可以在模型监控中注册训练数据集。

In [None]:
from vertexai.resources.preview import ml_monitoring

# Copy files to your projects gs bucket to avoid permission issues.
# Ignore any error(s) for bucket already exists.
PUBLIC_TRAINING_DATASET = (
    "gs://cloud-samples-data/vertex-ai/model-monitoring/churn/churn_training.csv"
)
TRAINING_URI = f"{BUCKET_URI}/model-monitoring/churn/churn_training.csv"

! gsutil copy $PUBLIC_TRAINING_DATASET $TRAINING_URI

TRAINING_DATASET = ml_monitoring.spec.MonitoringInput(
    gcs_uri=TRAINING_URI, data_format="csv"
)

创建一个模型监视器

一个模型监控器是一个顶层资源，用于管理您的指标和模型监控作业。

In [None]:
from vertexai.resources.preview import ml_monitoring

my_model_monitor = ml_monitoring.ModelMonitor.create(
    project=PROJECT_ID,
    location=LOCATION,
    display_name="churn_model_monitor",
    model_name=model.resource_name,
    model_version_id="1",
    training_dataset=TRAINING_DATASET,
    model_monitoring_schema=MODEL_MONITORING_SCHEMA,
)
MODEL_MONITOR_ID = my_model_monitor.name
print(f"MODEL MONITOR {MODEL_MONITOR_ID} created.")

### 步骤5：运行一个按需模型监控作业

#### 定义监控目标配置

对于表格模型，模型监控支持以下目标：

- **输入特征漂移检测**

    模型监控提供了针对分类和数值特征类型的漂移分析，支持以下指标：

    - 分类特征：`Jensen Shannon Divergence`、`L Infinity`
    - 数值特征：`Jensen Shannon Divergence`

    您可以通过在`ml_monitoring.spec.DataDriftSpec`规范的`features`字段中指定仅分析感兴趣的特征来选择分析的特征。如果未指定，则将分析模型模式中的所有输入特征。此外，您可以选择为分类或数值特征设置默认阈值，也可以为单个特征指定阈值。如果检测到的漂移超过阈值，将通过电子邮件或其他通知渠道发送警报。

- **预测输出漂移检测**

    与输入特征漂移检测类似，预测输出漂移检测识别预测输出中的数据漂移。

- **特征归因漂移检测**

    模型监控利用Vertex Explainable AI来监视特征归因。可解释的AI使您能够了解每个特征对结果预测的相对贡献。实质上，它评估了每个特征影响的大小。
    您必须使用特征归因目标配置配置`Explanation`规范。

输入特征漂移规范

In [None]:
from vertexai.resources.preview import ml_monitoring

FEATURE_THRESHOLDS = {
    "country": 0.003,
    "cnt_user_engagement": 0.004,
}

FEATURE_DRIFT_SPEC = ml_monitoring.spec.DataDriftSpec(
    categorical_metric_type="l_infinity",
    numeric_metric_type="jensen_shannon_divergence",
    default_categorical_alert_threshold=0.2,
    default_numeric_alert_threshold=0.3,
    feature_alert_thresholds=FEATURE_THRESHOLDS,
)

预测输出漂移规范

In [None]:
PREDICTION_OUTPUT_DRIFT_SPEC = ml_monitoring.spec.DataDriftSpec(
    categorical_metric_type="l_infinity",
    numeric_metric_type="jensen_shannon_divergence",
    default_categorical_alert_threshold=0.1,
    default_numeric_alert_threshold=0.1,
)

特征归属规范

In [None]:
FEATURE_ATTRIBUTION_SPEC = ml_monitoring.spec.FeatureAttributionSpec(
    default_alert_threshold=0.0003,
    feature_alert_thresholds={"cnt_ad_reward": 0.0001},
)

####定义警报通知和指标输出规范。

模型监控支持以下通知方法：

*  电子邮件
*  [通知渠道](https://cloud.google.com/monitoring/support/notification-options)
*  [云记录](https://cloud.google.com/logging/docs)

本笔记本以电子邮件为例。

将生成的指标导出到您指定的 Google Cloud Storage 位置，或者，如果您没有指定位置，Vertex AI 将创建一个默认的桶来使用。

In [None]:
import os

EMAIL = "[your-email-address]"  # @param {type:"string"}
if os.getenv("IS_TESTING"):
    EMAIL = "noreply@google.com"

NOTIFICATION_SPEC = ml_monitoring.spec.NotificationSpec(
    user_emails=[EMAIL],
)

OUTPUT_SPEC = ml_monitoring.spec.OutputSpec(gcs_base_dir=BUCKET_URI)

运行模型监控任务

示例1：特征漂移和预测输出漂移检测，将当前数据与训练数据集进行比较。

In [None]:
import pandas as pd

TIMESTAMP = pd.Timestamp.utcnow().strftime("%Y%m%d%H%M%S")
JOB_DISPLAY_NAME = f"churn_model_monitoring_job_{TIMESTAMP}"
TARGET_DATASET = ml_monitoring.spec.MonitoringInput(endpoints=[endpoint.resource_name])
model_monitoring_job_1 = my_model_monitor.run(
    display_name=JOB_DISPLAY_NAME,
    baseline_dataset=TRAINING_DATASET,
    target_dataset=TARGET_DATASET,
    tabular_objective_spec=ml_monitoring.spec.TabularObjective(
        # Input feature drift spec.
        feature_drift_spec=FEATURE_DRIFT_SPEC,
        # Prediction output drift spec.
        prediction_output_drift_spec=PREDICTION_OUTPUT_DRIFT_SPEC,
    ),
    notification_spec=NOTIFICATION_SPEC,
    output_spec=OUTPUT_SPEC,
)

示例2：功能归因漂移检测，将当前数据与云存储基准数据集进行比较。

对于特征归因监控，数据集以以下方式发送到Vertex AI批量解释任务中：

* Google Cloud存储-> 直接发送为Vertex AI批量解释任务的输入。
* BigQuery表格-> 直接发送为Vertex AI批量解释任务的输入。
* BigQuery查询-> 不支持。
* Vertex AI批量解释任务-> 批量预测作业的输入用作Vertex AI批量解释任务的输入。
* Vertex AI端点日志记录-> 请求日志记录用作Vertex AI批量解释任务的输入。

检查这些数据集是否符合Vertex AI批量解释任务的要求。

生成用于 Vertex 可解释 AI 的模型元数据
您必须指定要在 Vertex AI 批量解释作业中使用的解释规范。运行以下单元格从导出模型中提取元数据，这些元数据在为预测请求生成解释时是必需的。

In [None]:
from google.cloud.aiplatform_v1beta1.types import (ExplanationMetadata,
                                                   ExplanationParameters,
                                                   ExplanationSpec)

EXPLANATION_SPEC = ExplanationSpec(
    parameters=ExplanationParameters(
        {"sampled_shapley_attribution": {"path_count": 2}}
    ),
    metadata=ExplanationMetadata(
        inputs={
            "cnt_ad_reward": ExplanationMetadata.InputMetadata(
                {
                    "input_tensor_name": "cnt_ad_reward",
                    "encoding": "IDENTITY",
                    "modality": "numeric",
                }
            ),
            "cnt_challenge_a_friend": ExplanationMetadata.InputMetadata(
                {
                    "input_tensor_name": "cnt_challenge_a_friend",
                    "encoding": "IDENTITY",
                    "modality": "numeric",
                }
            ),
            "cnt_completed_5_levels": ExplanationMetadata.InputMetadata(
                {
                    "input_tensor_name": "cnt_completed_5_levels",
                    "encoding": "IDENTITY",
                    "modality": "numeric",
                }
            ),
            "cnt_level_complete_quickplay": ExplanationMetadata.InputMetadata(
                {
                    "input_tensor_name": "cnt_level_complete_quickplay",
                    "encoding": "IDENTITY",
                    "modality": "numeric",
                }
            ),
            "cnt_level_end_quickplay": ExplanationMetadata.InputMetadata(
                {
                    "input_tensor_name": "cnt_level_end_quickplay",
                    "encoding": "IDENTITY",
                    "modality": "numeric",
                }
            ),
            "cnt_level_reset_quickplay": ExplanationMetadata.InputMetadata(
                {
                    "input_tensor_name": "cnt_level_reset_quickplay",
                    "encoding": "IDENTITY",
                    "modality": "numeric",
                }
            ),
            "cnt_level_start_quickplay": ExplanationMetadata.InputMetadata(
                {
                    "input_tensor_name": "cnt_level_start_quickplay",
                    "encoding": "IDENTITY",
                    "modality": "numeric",
                }
            ),
            "cnt_post_score": ExplanationMetadata.InputMetadata(
                {
                    "input_tensor_name": "cnt_post_score",
                    "encoding": "IDENTITY",
                    "modality": "numeric",
                }
            ),
            "cnt_spend_virtual_currency": ExplanationMetadata.InputMetadata(
                {
                    "input_tensor_name": "cnt_spend_virtual_currency",
                    "encoding": "IDENTITY",
                    "modality": "numeric",
                }
            ),
            "cnt_use_extra_steps": ExplanationMetadata.InputMetadata(
                {
                    "input_tensor_name": "cnt_use_extra_steps",
                    "encoding": "IDENTITY",
                    "modality": "numeric",
                }
            ),
            "cnt_user_engagement": ExplanationMetadata.InputMetadata(
                {
                    "input_tensor_name": "cnt_user_engagement",
                    "encoding": "IDENTITY",
                    "modality": "numeric",
                }
            ),
            "country": ExplanationMetadata.InputMetadata(
                {
                    "input_tensor_name": "country",
                    "encoding": "IDENTITY",
                    "modality": "categorical",
                }
            ),
            "dayofweek": ExplanationMetadata.InputMetadata(
                {
                    "input_tensor_name": "dayofweek",
                    "encoding": "IDENTITY",
                    "modality": "numeric",
                }
            ),
            "julianday": ExplanationMetadata.InputMetadata(
                {
                    "input_tensor_name": "julianday",
                    "encoding": "IDENTITY",
                    "modality": "numeric",
                }
            ),
            "language": ExplanationMetadata.InputMetadata(
                {
                    "input_tensor_name": "language",
                    "encoding": "IDENTITY",
                    "modality": "categorical",
                }
            ),
            "month": ExplanationMetadata.InputMetadata(
                {
                    "input_tensor_name": "month",
                    "encoding": "IDENTITY",
                    "modality": "numeric",
                }
            ),
            "operating_system": ExplanationMetadata.InputMetadata(
                {
                    "input_tensor_name": "operating_system",
                    "encoding": "IDENTITY",
                    "modality": "categorical",
                }
            ),
            "user_pseudo_id": ExplanationMetadata.InputMetadata(
                {
                    "input_tensor_name": "user_pseudo_id",
                    "encoding": "IDENTITY",
                    "modality": "categorical",
                }
            ),
        },
        outputs={
            "churned_probs": ExplanationMetadata.OutputMetadata(
                {"output_tensor_name": "churned_probs"}
            )
        },
    ),
)

In [None]:
FEATURE_ATTRIBUTION_BASELINE_DATASET = (
    f"{BUCKET_URI}/model-monitoring/churn/churn_no_ground_truth.jsonl"
)
! gsutil cp gs://cloud-samples-data/vertex-ai/model-monitoring/churn/churn_no_ground_truth.jsonl $FEATURE_ATTRIBUTION_BASELINE_DATASET

In [None]:
import pandas as pd
from vertexai.resources.preview import ml_monitoring

TIMESTAMP = pd.Timestamp.utcnow().strftime("%Y%m%d%H%M%S")
JOB_DISPLAY_NAME = f"churn_model_monitoring_job_{TIMESTAMP}"
BASELINE_DATASET = ml_monitoring.spec.MonitoringInput(
    gcs_uri=FEATURE_ATTRIBUTION_BASELINE_DATASET, data_format="jsonl"
)
TARGET_DATASET = ml_monitoring.spec.MonitoringInput(endpoints=[endpoint.resource_name])
model_monitoring_job_2 = my_model_monitor.run(
    display_name=JOB_DISPLAY_NAME,
    baseline_dataset=BASELINE_DATASET,
    target_dataset=TARGET_DATASET,
    tabular_objective_spec=ml_monitoring.spec.TabularObjective(
        # Feature attribution spec.
        feature_attribution_spec=FEATURE_ATTRIBUTION_SPEC
    ),
    # You must have a Explanation spec for feature attribution monitoring.
    # You can specify the explanation spec in the Model, Model monitor, or the Model monitoring job.
    explanation_spec=EXPLANATION_SPEC,
    notification_spec=NOTIFICATION_SPEC,
    output_spec=OUTPUT_SPEC,
)

##### 列出模型监控任务

In [None]:
my_model_monitor.list_jobs()

### 步骤 6: 等待模型监控任务运行并验证结果

请通过电子邮件验证结果

一旦模型监控工作开始运行，您将收到以下邮件：

<img src="https://services.google.com/fh/files/misc/create_job_email.png" />

监控工作完成后，如果检测到任何异常情况，您将收到类似以下内容的电子邮件： 

<img src="https://services.google.com/fh/files/misc/job_anomalies_email.png" />

检查监控指标：Google Cloud 控制台

要查看[Google Cloud控制台](https://console.cloud.google.com/vertex-ai/model-monitoring/model-monitors)中的模型监控指标，请转到**Vertex AI**下的**监控**选项卡。

<img src="https://storage.googleapis.com/cmm-public-data/images/endpoint_jobs_details.gif" />

检查监控指标：云存储存储桶

运行以下命令以查看存储在云存储存储桶中的模型监控指标。

In [None]:
try:
    my_model_monitor.show_feature_drift_stats(model_monitoring_job_1.name)
except Exception as e:
    print(e)

In [None]:
try:
    my_model_monitor.show_output_drift_stats(model_monitoring_job_1.name)
except Exception as e:
    print(e)

步骤7：安排持续模型监控任务

要设置连续的模型监控，请按照以下示例创建一个时间表。您可以为您的模型监控创建多个时间表。

以下示例监控输入特征和预测输出的漂移。该时间表配置为每小时准点启动模型监控任务，比如在00:00、01:00等。每个任务分析前一个小时窗口内的数据。例如，如果计划在早上6:00进行任务，则会分析从早上5:00到6:00收集的数据。

将更多的流量发送到终端。

In [None]:
import sys
import time

from google.cloud import bigquery

# Here we just use subset of the training dataset as prediction data
DATASET_BQ_URI = "bq://mco-mm.bqmlga4.train"
GROUND_TRUTH = "churned"

bqclient = bigquery.Client(project=PROJECT_ID)
table = bigquery.TableReference.from_string(DATASET_BQ_URI[5:])
rows = bqclient.list_rows(table, max_results=1000)
count = 0
for row in rows:
    instance = {}
    for key, value in row.items():
        if key == GROUND_TRUTH:
            continue
        if value is None:
            value = ""
        instance[key] = value
    endpoint.predict(instances=[instance])
    # Print progress
    if count % 100 == 0:
        print(f"Sent: {count} requests")
    count = count + 1
    time.sleep(0.01)

创建一个时间表

In [None]:
# Every 1 hour at :00, for example 1:00, 2:00..
CRON = "0 * * * *"  # @param {type:"string"}

In [None]:
# from google.protobuf import timestamp_pb2

SCHEDULE_DISPLAY_NAME = "churn-continous-drift-detection"

# Example 1: Training dataset as baseline
BASELINE_DATASET = TRAINING_DATASET

# Example 2: Baseline with the same target dataset and with offset. Example:
# BASELINE_DATASET=model_monitor.spec.MonitoringInput(
#     endpoints=[endpoint.resource_name],
#     window="1h",
#     offset="1h"
# )

# Example 3: Baseline with the same target dataset and with start and end time
# window. Example:
# BASELINE_DATASET=model_monitor.spec.MonitoringInput(
#     endpoints=[endpoint.resource_name],
#     start_time=timestamp_pb2.Timestamp(seconds=xxx),
#     end_time=timestamp_pb2.Timestamp(seconds=xxx)
# )
# More options are available, please check the `MonitoringInput`.

TARGET_DATASET = ml_monitoring.spec.MonitoringInput(
    endpoints=[endpoint.resource_name],
    window="1h",
)

model_monitoring_schedule = my_model_monitor.create_schedule(
    display_name=SCHEDULE_DISPLAY_NAME,
    cron=CRON,
    baseline_dataset=BASELINE_DATASET,
    target_dataset=TARGET_DATASET,
    tabular_objective_spec=ml_monitoring.spec.TabularObjective(
        # Input feature drift spec.
        feature_drift_spec=FEATURE_DRIFT_SPEC,
        # Prediction output drift spec.
        prediction_output_drift_spec=PREDICTION_OUTPUT_DRIFT_SPEC,
    ),
    notification_spec=NOTIFICATION_SPEC,
    output_spec=OUTPUT_SPEC,
)

SCHEDULE_RESOURCE_NAME = model_monitoring_schedule.name
print(f"Schedule {SCHEDULE_RESOURCE_NAME} created.")

暂停计划

运行以下命令来暂停模型监控计划：

In [None]:
my_model_monitor.pause_schedule(schedule_name=SCHEDULE_RESOURCE_NAME)

#### 恢复日程

运行以下命令以恢复暂停的模型监控日程:

In [None]:
my_model_monitor.resume_schedule(schedule_name=SCHEDULE_RESOURCE_NAME)

更新计划

运行以下命令以更新模型监控计划：

In [None]:
# Update to run every 1 hour at :30, for example 0:30, 1:30, 2:00..
my_model_monitor.update_schedule(
    schedule_name=SCHEDULE_RESOURCE_NAME, cron="30 * * * *"
)

请在Google Cloud控制台中检查监控计划

要在Google Cloud控制台中检查您的模型监控计划，请转到Vertex AI下的监控选项卡。

### 步骤 8: 清理

如果您不再需要您的模型监控资源，请运行以下命令来删除它们:

In [None]:
# When no jobs are running, delete the schedule and all the jobs.
my_model_monitor.delete(force=True)

# Undeploy endpoint
endpoint.undeploy_all()
endpoint.delete()

# Delete the model
model.delete()