In [None]:
# Copyright 2023 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# 顶点 AI 模型花园 MoViNet 视频动作识别

<table align="left">
  <td>
    <a href="https://colab.research.google.com/github/GoogleCloudPlatform/vertex-ai-samples/blob/main/notebooks/community/model_garden/model_garden_movinet_action_recognition.ipynb">
      <img src="https://cloud.google.com/ml-engine/images/colab-logo-32px.png" alt="Colab logo"> 在 Colab 中运行
    </a>
  </td>

  <td>
    <a href="https://github.com/GoogleCloudPlatform/vertex-ai-samples/blob/main/notebooks/community/model_garden/model_garden_movinet_action_recognition.ipynb">
      <img src="https://cloud.google.com/ml-engine/images/github-logo-32px.png" alt="GitHub logo">
      查看 GitHub 上的内容
    </a>
  </td>
  <td>                                                                                               <td>
    <a href="https://console.cloud.google.com/vertex-ai/notebooks/deploy-notebook?download_url=https://raw.githubusercontent.com/GoogleCloudPlatform/vertex-ai-samples/main/notebooks/community/model_garden/model_garden_movinet_action_recognition.ipynb">
      <img src="https://lh3.googleusercontent.com/UiNooY4LUgW_oTvpsNhPpQzsstV5W8F7rYgxgGBD85cWJoLmrOzhVs_ksK_vgx40SHs7jCqkTkCk=e14-rj-sc0xffffff-h130-w32" alt="Vertex AI logo">
在 Vertex AI Workbench 中打开
    </a>
  </td>
</table>

**_注意_**: 这个笔记本已在以下环境中进行测试：

- Python 版本 = 3.9

## 概观

本文档演示了如何在 Vertex AI 模型花园中使用 [MoViNet](https://github.com/tensorflow/models/tree/master/official/projects/movinet) 进行视频动作识别。

### 目标

* 训练新模型
  * 将输入数据转换为训练格式
  * 创建 [超参数调整作业](https://cloud.google.com/vertex-ai/docs/training/hyperparameter-tuning-overview) 来训练新模型
  * 找到并导出最佳模型

* 测试训练好的模型
  * 将模型上传到 [Vertex AI 模型注册表](https://cloud.google.com/vertex-ai/docs/model-registry/introduction)
  * 运行批量预测

* 清理资源

### 成本

本教程使用 Google Cloud 的可计费组件：

* Vertex AI
* Cloud Storage

了解有关 [Vertex AI 价格](https://cloud.google.com/vertex-ai/pricing) 和 [Cloud Storage 价格](https://cloud.google.com/storage/pricing)，并使用 [定价计算器](https://cloud.google.com/products/calculator/) 来根据您的预期使用情况生成成本估算。

## 在你开始之前

仅适用于 Colab
在 Colab 上运行以下命令，如果您使用 Workbench 则跳过此部分。

In [None]:
import sys

if "google.colab" in sys.modules:
    ! pip3 install --upgrade google-cloud-aiplatform

    # Automatically restart kernel after installs
    import IPython

    app = IPython.Application.instance()
    app.kernel.do_shutdown(True)

    from google.colab import auth as google_auth

    google_auth.authenticate_user()

### 设置您的Google Cloud项目

**无论您使用的是哪种笔记本环境，以下步骤都是必需的。**

1. [选择或创建一个Google Cloud项目](https://console.cloud.google.com/cloud-resource-manager)。当您首次创建帐户时，您将获得$300的免费信用额度用于计算/存储成本。

1. [确保为您的项目启用计费](https://cloud.google.com/billing/docs/how-to/modify-project)。

1. [启用Vertex AI API和Compute Engine API](https://console.cloud.google.com/flows/enableapi?apiid=aiplatform.googleapis.com,compute_component)。

1. 如果您在本地运行此笔记本，您需要安装[Cloud SDK](https://cloud.google.com/sdk)。

1. 在下面的单元格中输入您的项目ID。然后运行该单元格，以确保Cloud SDK为本笔记本中的所有命令使用正确的项目。

1. [创建一个服务账号](https://cloud.google.com/iam/docs/service-accounts-create#iam-service-accounts-create-console)，该账号需具备`Vertex AI User`和`Storage Object Admin`角色，用于运行对调整模型进行批量预测的操作。

**注意**：Jupyter运行使用`!`前缀的行为shell命令，并将以`$`前缀的Python变量插入这些命令中。

In [None]:
import os

from google.cloud import aiplatform

# The GCP project ID for experiments.
PROJECT_ID = ""  # @param {type:"string"}

# Bucket URI with gs:// prefix.
BUCKET_URI = ""  # @param {type:"string"}

# You can choose a region from https://cloud.google.com/about/locations.
# Only regions prefixed by "us", "asia", or "europe" are supported.
REGION = "us-central1"  # @param {type:"string"}
REGION_PREFIX = REGION.split("-")[0]
assert REGION_PREFIX in (
    "us",
    "europe",
    "asia",
), f'{REGION} is not supported. It must be prefixed by "us", "asia", or "europe".'

! gcloud config set project $PROJECT_ID

STAGING_BUCKET = os.path.join(BUCKET_URI, "temporal")
CHECKPOINT_BUCKET = os.path.join(BUCKET_URI, "ckpt")

aiplatform.init(project=PROJECT_ID, location=REGION, staging_bucket=STAGING_BUCKET)

# Download config files.
CONFIG_DIR = os.path.join(BUCKET_URI, "config")

### 定义常量

In [None]:
OBJECTIVE = "var"

# Data converter constants.
DATA_CONVERTER_JOB_PREFIX = "data_converter"
DATA_CONVERTER_CONTAINER = f"{REGION_PREFIX}-docker.pkg.dev/vertex-ai/vertex-vision-model-garden-dockers/data-converter"
DATA_CONVERTER_MACHINE_TYPE = "n1-highmem-8"
IMAGE_SIZES = {
    "a0": 172,
    "a1": 172,
    "a2": 224,
    "a3": 256,
    "a4": 290,
    "a5": 320,
}

# Training constants.
TRAINING_JOB_PREFIX = "train"
TRAIN_CONTAINER_URI = f"{REGION_PREFIX}-docker.pkg.dev/vertex-ai/vertex-vision-model-garden-dockers/movinet-train"
TRAIN_MACHINE_TYPE = "n1-highmem-32"
TRAIN_ACCELERATOR_TYPE = "NVIDIA_TESLA_V100"
TRAIN_NUM_GPU = 8

# Evaluation constants.
EVALUATION_METRIC = "accuracy"

# Export constants.
EXPORT_JOB_PREFIX = "export"
EXPORT_CONTAINER_URI = f"{REGION_PREFIX}-docker.pkg.dev/vertex-ai/vertex-vision-model-garden-dockers/movinet-model-export"
EXPORT_MACHINE_TYPE = "n1-highmem-8"

# Prediction constants.
# You can adjust accelerator types and machine types to get faster predictions.
PREDICTION_CONTAINER_URI = f"{REGION_PREFIX}-docker.pkg.dev/vertex-ai/vertex-vision-model-garden-dockers/movinet-serve"
PREDICTION_PORT = 8080
PREDICTION_ACCELERATOR_COUNT = 1
PREDICTION_ACCELERATOR_TYPE = "NVIDIA_TESLA_T4"
PREDICTION_MACHINE_TYPE = "n1-standard-4"
PREDICTION_JOB_PREFIX = "predict"

### 定义常见的辅助函数

In [None]:
import json
from datetime import datetime
from typing import Any

import tensorflow as tf
import yaml


def get_job_name_with_datetime(prefix: str) -> str:
    """Returns a timestamped job name with the given prefix."""
    return prefix + datetime.now().strftime("_%Y%m%d_%H%M%S")


def get_label_map(label_map_yaml_filepath: str) -> tuple[dict[int, str], int]:
    """Reads label map from a YAML file and returns the label map with the number of classes."""
    with tf.io.gfile.GFile(label_map_yaml_filepath, "rb") as input_file:
        label_map = yaml.safe_load(input_file.read())["label_map"]
    num_classes = max(label_map.keys()) + 1
    return label_map, num_classes


def get_best_trial(
    model_di: str, max_trial_count: int, evaluation_metric: str
) -> tuple[str, Any]:
    """Finds the best trial directory and eval results from a hyperparameter tuning job."""
    best_trial_dir = ""
    best_trial_evaluation_results = {}
    best_performance = -1

    for i in range(max_trial_count):
        current_trial = i + 1
        current_trial_dir = os.path.join(model_dir, "trial_" + str(current_trial))
        current_trial_best_ckpt_dir = os.path.join(current_trial_dir, "best_ckpt")
        current_trial_best_ckpt_evaluation_filepath = os.path.join(
            current_trial_best_ckpt_dir, "info.json"
        )
        with tf.io.gfile.GFile(current_trial_best_ckpt_evaluation_filepath, "rb") as f:
            eval_metric_results = json.load(f)
            current_performance = eval_metric_results[evaluation_metric]
            if current_performance > best_performance:
                best_performance = current_performance
                best_trial_dir = current_trial_dir
                best_trial_evaluation_results = eval_metric_results
    return best_trial_dir, best_trial_evaluation_results


def print_response_instance(json_str: str, label_map: dict[int, str]):
    """Prints summary of a prediction JSON result from the model response."""
    json_obj = json.loads(json_str)
    if "prediction" not in json_obj:
        print("Error:", json_str)
        return
    instance = json_obj["instance"]
    prediction = json_obj["prediction"]
    gcs_uri = instance["content"]
    time_start = instance.get("timeSegmentStart", "0.0s")
    time_end = instance.get("timeSegmentEnd", "Infinity")
    print(f"---------- Predict {gcs_uri}, {time_start} to {time_end}:")
    for predicted in prediction:
        time = predicted["timeSegmentStart"]
        label = label_map[predicted["label"]]
        confidence = predicted["confidence"]
        print(f"At {time}, detected {label} with {confidence} confidence.")


def find_checkpoint_in_dir(checkpoint_dir: str) -> str:
    """Finds a checkpoint path relative to the directory."""
    for root, dirs, files in tf.io.gfile.walk(checkpoint_dir):
        for file in files:
            if file.endswith(".index"):
                return os.path.join(root, os.path.splitext(file)[0])


def upload_checkpoint_to_gcs(checkpoint_url: str) -> str:
    """Uploads a compressed .tar.gz checkpoint at the given URL to Cloud Storage."""
    filename = os.path.basename(checkpoint_url)
    checkpoint_name = filename.replace(".tar.gz", "")
    print("Download checkpoint from", checkpoint_url, "and store to", CHECKPOINT_BUCKET)
    ! wget $checkpoint_url -O $filename
    ! mkdir -p $checkpoint_name
    ! tar -xvzf $filename -C $checkpoint_name

    checkpoint_path = find_checkpoint_in_dir(checkpoint_name)
    checkpoint_path = os.path.relpath(checkpoint_path, checkpoint_name)

    ! gsutil cp -r $checkpoint_name $CHECKPOINT_BUCKET/
    checkpoint_uri = os.path.join(CHECKPOINT_BUCKET, checkpoint_name, checkpoint_path)
    print("Checkpoint uploaded to", checkpoint_uri)
    return checkpoint_uri


def upload_config_to_gcs(url: str) -> str:
    """Uploads a config file at the given URL to Cloud Storage."""
    filename = os.path.basename(url)
    destination = os.path.join(CONFIG_DIR, filename)
    print("Copy", url, "to", destination)
    ! wget "$url" -O "$filename"
    ! gsutil cp "$filename" "$destination"
    return destination

## 训练新模型
本节展示如何训练新模型。
1. 将输入数据转换为训练格式
2. 创建超参数调整作业以训练新模型
3. 找到并导出最佳模型

如果您已经训练过模型，请转至`测试训练过的模型`部分。

请选择一个模型：
- `model_id`：MoViNet模型变体ID，可以选择`a0`、`a1`、`a2`、`a3`、`a4`、`a5`中的一个。数字较大的模型需要更多资源来训练，并且预计具有更高的准确性和延迟。这里我们选择`a3`作为演示用途。**目前不建议使用`a0`、`a1`和`a2`，因为我们正在调查一些与它们的推理问题。**
- `model_mode`：MoViNet模型类型，可以是`base`或`stream`。基础模型具有略高的准确性，而流模型则针对流媒体和更快的CPU推理进行了优化。有关更多信息，请参阅[官方MoViNet文档](https://github.com/tensorflow/models/tree/master/official/projects/movinet)。

**注意**：目前预测容器仅支持基础模型（非流式）。如果您训练了一个流式模型，您需要下载模型，并参考[MoViNet官方指南](https://github.com/tensorflow/models/blob/master/official/projects/movinet/movinet_streaming_model_training_and_inference.ipynb)以在本地运行预测。

In [None]:
model_id = "a3"  # @param ["a0", "a1", "a2", "a3", "a4", "a5"]
model_mode = "base"  # @param ["base", "stream"]
is_stream = model_mode == "stream"
model_name = f"movinet_{model_id}_{model_mode}"
image_size = IMAGE_SIZES[model_id]

if is_stream:
    export_container_args = {
        "conv_type": "2plus1d",
        "se_type": "2plus3d",
        "activation": "hard_swish",
        "gating_activation": "hard_sigmoid",
        "use_positional_encoding": model_id in {"a3", "a4", "a5"},
    }
else:
    export_container_args = {
        "conv_type": "3d",
        "se_type": "3d",
        "activation": "swish",
        "gating_activation": "sigmoid",
        "use_positional_encoding": False,
    }

### 为训练准备输入数据

按照[此处](https://cloud.google.com/vertex-ai/docs/video-data/action-recognition/prepare-data)描述的格式准备数据，然后通过运行下面的单元格将其转换为训练格式：

* `input_file_path`：准备数据的输入文件路径。
* `input_file_type`：输入文件类型，如`csv`或`jsonl`。
* `output_fps`：视频的采样率；每秒帧数。
* `num_frames`：围绕关键帧输入进行采样的帧数。
* `min_duration_sec`：围绕关键帧输入采样视频片段的最小持续时间（以秒为单位）。这是为了验证目的-如果关键帧周围没有足够的上下文，将抛出错误。
* `pos_neg_ratio`：正负段之间的采样比例。例如，pos_neg_ratio为0.5表示每2个正实例采样1个负实例。
* `split_ratio`：三个逗号分隔的浮点数，表示要分割为训练/验证/测试集的数据比例。它们必须加起来等于1。
* `num_shard`：三个逗号分隔的整数，表示用于训练/验证/测试的分片数。

**注意**：对于JSONL输入，请在`dataItemResourceLabels`中使用`aiplatform.googleapis.com/ml_use`而不是`ml_use`作为ML用途的JSON键。这是为了与其他目标保持一致。

In [None]:
# This job will convert input data as training format, with given split ratios
# and number of shards on train/test/validation.

data_converter_job_name = get_job_name_with_datetime(
    DATA_CONVERTER_JOB_PREFIX + "_" + OBJECTIVE
)

input_file_path = ""  # @param {type:"string"}
input_file_type = "csv"  # @param ["csv", "jsonl"]
output_fps = 10  # @param {type:"integer"}
num_frames = 32  # @param {type:"integer"}
min_duration_sec = 1.0  # @param {type:"number"}
pos_neg_ratio = 1.0  # @param {type:"number"}
split_ratio = "0.8,0.1,0.1"
num_shard = "10,10,10"
data_converter_output_dir = os.path.join(BUCKET_URI, data_converter_job_name)

worker_pool_specs = [
    {
        "machine_spec": {
            "machine_type": DATA_CONVERTER_MACHINE_TYPE,
        },
        "replica_count": 1,
        "container_spec": {
            "image_uri": DATA_CONVERTER_CONTAINER,
            "command": [],
            "args": [
                "--input_file_path=%s" % input_file_path,
                "--input_file_type=%s" % input_file_type,
                "--objective=%s" % OBJECTIVE,
                "--num_shard=%s" % num_shard,
                "--split_ratio=%s" % split_ratio,
                "--output_dir=%s" % data_converter_output_dir,
                "--output_fps=%d" % output_fps,
                "--num_frames=%d" % num_frames,
                "--min_duration_sec=%f" % min_duration_sec,
                "--pos_neg_ratio=%f" % pos_neg_ratio,
                "--output_shape=%d,%d" % (image_size, image_size),
            ],
        },
    }
]

data_converter_custom_job = aiplatform.CustomJob(
    display_name=data_converter_job_name,
    project=PROJECT_ID,
    worker_pool_specs=worker_pool_specs,
    staging_bucket=STAGING_BUCKET,
)

data_converter_custom_job.run()

input_train_data_path = os.path.join(data_converter_output_dir, "train.tfrecord*")
input_validation_data_path = os.path.join(data_converter_output_dir, "val.tfrecord*")
label_map_path = os.path.join(data_converter_output_dir, "label_map.yaml")
print("input_train_data_path for training: ", input_train_data_path)
print("input_validation_data_path for training: ", input_validation_data_path)
print("label_map_path for prediction: ", label_map_path)

使用Vertex AI SDK来创建和运行具有Vertex AI Model Garden训练Docker镜像的超参数调整作业。

#### 定义以下规格

* `worker_pool_specs`：指定机器类型和Docker镜像的字典列表。此示例定义了一个单节点集群，其中有一个`n1-highmem-32`机器，配备8个`NVIDIA_TESLA_V100` GPU。

  **注意**：我们建议对MoViNet-A2和更大的模型使用8个GPU。由于加载视频数据需要大量的GPU内存，建议先尝试小批量的大小进行实验。
* `parameter_spec`：指定要优化的参数的字典。字典键是分配给训练应用程序代码中每个超参数的命令行参数的字符串，字典值是参数规范。参数规范包括超参数的类型、最小/最大值和规模。
* `metric_spec`：指定要优化的度量的字典。字典键是您在训练应用程序代码中设置的`hyperparameter_metric_tag`，值是优化目标。

In [None]:
from google.cloud.aiplatform import hyperparameter_tuning as hpt

# Input train and validation datasets can be found from the section above
# `Prepare input data for training`.
# Or, set prepared datasets paths if already exist.
# input_train_data_path = ""
# input_validation_data_path = ""
# label_map_path = ""

train_job_name = get_job_name_with_datetime(f"{TRAINING_JOB_PREFIX}_{model_name}")
model_dir = os.path.join(BUCKET_URI, train_job_name)
label_map, num_classes = get_label_map(label_map_path)

# Uploads pretained checkpoint to GCS bucket.
init_checkpoint = f"https://storage.googleapis.com/tf_model_garden/vision/movinet/{model_name}_with_backbone.tar.gz"
init_checkpoint = upload_checkpoint_to_gcs(init_checkpoint)

# Uploads config file according to model_id and streaming options.
config_file = f"{model_id}_stream" if is_stream else model_id
config_file = f"https://raw.githubusercontent.com/tensorflow/models/master/official/projects/movinet/configs/yaml/movinet_{config_file}_gpu.yaml"
config_file = upload_config_to_gcs(config_file)

# The parameters here are mainly for demonstration purpose. Please update them
# for better performance.
trainer_args = {
    "experiment": "movinet_kinetics600",
    "config_file": config_file,
    "input_train_data_path": input_train_data_path,
    "input_validation_data_path": input_validation_data_path,
    "init_checkpoint": init_checkpoint,
    "model_dir": model_dir,
    "num_classes": num_classes,
    "global_batch_size": 16,
    "prefetch_buffer_size": 16,
    "shuffle_buffer_size": 32,
    "train_steps": 2000,
}

worker_pool_specs = [
    {
        "machine_spec": {
            "machine_type": TRAIN_MACHINE_TYPE,
            "accelerator_type": TRAIN_ACCELERATOR_TYPE,
            # Each training job uses TRAIN_NUM_GPU GPUs.
            "accelerator_count": TRAIN_NUM_GPU,
        },
        "replica_count": 1,
        "container_spec": {
            "image_uri": TRAIN_CONTAINER_URI,
            "args": [
                "--mode=train_and_eval",
                "--params_override=runtime.num_gpus=%d" % TRAIN_NUM_GPU,
            ]
            + ["--{}={}".format(k, v) for k, v in trainer_args.items()],
        },
    }
]

metric_spec = {"model_performance": "maximize"}

# These learning rates might not be optimal for your selected model type; To
# tune learning rates, try hpt.DoubleParameterSpec with more trials.
LEARNING_RATES = [1e-3, 3e-3]
MAX_TRIAL_COUNT = len(LEARNING_RATES)
parameter_spec = {
    "learning_rate": hpt.DiscreteParameterSpec(values=LEARNING_RATES, scale="linear"),
}

print(worker_pool_specs, metric_spec, parameter_spec)

#### 运行超参数调整任务
* `max_trial_count`: 设置服务将运行的试验次数的上限。推荐的做法是开始时使用较少的试验次数，了解您选择的超参数的影响程度，然后再逐渐增加。
* `parallel_trial_count`: 如果使用并行试验，服务会提供多个训练处理集群。在创建作业时指定的工作池规范将用于每个单独的训练集群。增加并行试验的数量会减少超参数调整任务运行的时间；但是，这可能会降低作业的总体有效性。这是因为默认调整策略使用之前试验的结果来指导后续试验中的数值分配。
* `search_algorithm`: 可用的搜索算法包括grid、random或默认（无）。默认选项将应用贝叶斯优化来搜索可能超参数值的空间，这是推荐的算法。
点击输出中生成的链接，查看在云控制台中的运行情况。

In [None]:
train_custom_job = aiplatform.CustomJob(
    display_name=train_job_name,
    project=PROJECT_ID,
    worker_pool_specs=worker_pool_specs,
    staging_bucket=STAGING_BUCKET,
)

train_hpt_job = aiplatform.HyperparameterTuningJob(
    display_name=train_job_name,
    custom_job=train_custom_job,
    metric_spec=metric_spec,
    parameter_spec=parameter_spec,
    max_trial_count=MAX_TRIAL_COUNT,
    parallel_trial_count=1,
    project=PROJECT_ID,
    search_algorithm=None,
)

train_hpt_job.run()

print("model_dir is:", model_dir)

### 在Tensorflow SavedModel格式中导出模型

In [None]:
# This job will export models from TF checkpoints to TF saved model format.
# model_dir is from the section above.
best_trial_dir, best_trial_evaluation_results = get_best_trial(
    model_dir, MAX_TRIAL_COUNT, EVALUATION_METRIC
)
best_checkpoint_path = find_checkpoint_in_dir(f"{best_trial_dir}/best_ckpt/")
print("best_trial_dir: ", best_trial_dir)
print("best_trial_evaluation_results: ", best_trial_evaluation_results)
print("best_checkpoint: ", best_checkpoint_path)

container_args = {
    "export_path": f"{model_dir}/best_model",
    "model_id": model_id,
    "num_classes": num_classes,
    "causal": is_stream,
    "checkpoint_path": best_checkpoint_path,
    "assert_checkpoint_objects_matched": False,
    **export_container_args,
}

worker_pool_specs = [
    {
        "machine_spec": {
            "machine_type": EXPORT_MACHINE_TYPE,
        },
        "replica_count": 1,
        "container_spec": {
            "image_uri": EXPORT_CONTAINER_URI,
            "args": ["--{}={}".format(k, v) for k, v in container_args.items()],
        },
    }
]

model_export_job_name = get_job_name_with_datetime(EXPORT_JOB_PREFIX + "_" + OBJECTIVE)
model_export_custom_job = aiplatform.CustomJob(
    display_name=model_export_job_name,
    project=PROJECT_ID,
    worker_pool_specs=worker_pool_specs,
    staging_bucket=STAGING_BUCKET,
)

model_export_custom_job.run()

print("best model is saved to: ", container_args["export_path"])

## 测试训练好的模型
这一部分展示了如何使用训练好的模型进行测试。
1. 上传并部署模型到[Vertex AI模型注册表](https://cloud.google.com/vertex-ai/docs/model-registry/introduction)
2. 运行批量预测

**注意：** 预测容器仅适用于基础模型。如果您训练了一个流模型，请从导出路径下载模型，并参考[MoViNet官方指南](https://github.com/tensorflow/models/blob/master/official/projects/movinet/movinet_streaming_model_training_and_inference.ipynb)进行本地预测。

### 将模型上传至 Vertex AI 模型注册表

以下单元格将训练好的模型上传至 Vertex AI 模型注册表。如果您想要在已经上传的模型上运行批量预测，请跳过此步骤。

#### 可配置的环境变量

* `MODEL_PATH`：MoViNet 模型在云存储中的 URI。
* `BATCH_SIZE`：推断的批次大小。使用更大的值可以加快 GPU 预测速度。
* `NUM_FRAMES`：使用该模型进行单次预测所需的帧数。
* `FPS`：视频抽帧率。
* `OVERLAP_FRAMES`：允许连续预测窗口之间的重叠帧数。设置较小的值可以加快推断速度，但准确性会降低。

In [None]:
serving_env = {
    "MODEL_ID": "tfvision-movinet-var",
    "MODEL_PATH": container_args["export_path"],
    "BATCH_SIZE": 1,
    "NUM_FRAMES": 32,
    "FPS": output_fps,
    "OVERLAP_FRAMES": 24,
    "OBJECTIVE": OBJECTIVE,
    "IMAGE_WIDTH": image_size,
    "IMAGE_HEIGHT": image_size,
    "DEPLOY_SOURCE": "notebook",
}

model = aiplatform.Model.upload(
    display_name=model_name,
    serving_container_image_uri=PREDICTION_CONTAINER_URI,
    serving_container_ports=[PREDICTION_PORT],
    serving_container_predict_route="/predict",
    serving_container_health_route="/ping",
    serving_container_environment_variables=serving_env,
)

model.wait()

print("The uploaded model name is: ", model_name)

或者，取消下面的单元格的注释，使用已经上传的模型。用现有模型的模型名称字符串替换。

In [None]:
# model = aiplatform.Model("projects/123456789/locations/us-central1/models/12345678901234567890")

### 运行批量预测

我们现在将使用经过训练的MoViNet动作识别模型通过[Vertex AI Batch Prediction](https://cloud.google.com/vertex-ai/docs/predictions/get-batch-predictions)来运行批量预测。

请准备一个输入JSONL文件，每行都遵循[这个格式](https://cloud.google.com/vertex-ai/docs/video-data/action-recognition/get-predictions?hl=en#input_data_requirements)，并将其存储在一个Cloud Storage存储桶中。服务账号应具有对包含训练模型和输入数据的存储桶的读取权限。请参阅[Service accounts overview](https://cloud.google.com/iam/docs/service-account-overview)获取更多信息。

[Vertex AI Batch Prediction](https://cloud.google.com/vertex-ai/docs/predictions/get-batch-predictions)的默认超时时间为10分钟。因此，请确保输入视频片段大约在5分钟长，帧率在5~10之间，否则可能会遇到超时错误。要在此笔记本演示之外更大规模地使用此模型，您可以尝试以下方法之一：

- 将serving Docker镜像拉取到虚拟机或本地机器，并直接发送预测请求。
- 为了同时处理更多的数据，请编写一个自定义[DataFlow](https://cloud.google.com/dataflow)管道，将预测请求发送到movinet服务容器。
- 将视频分割成5分钟片段，使用较小的批量大小运行批量预测。

In [None]:
# Path to the prediction input JSONL file.
test_jsonl_path = ""  # @param {type:"string"}
# Full service account name with the suffix `gserviceaccount.com`.
batch_predict_service_account = ""  # @param {type:"string"}

predict_job_name = get_job_name_with_datetime(f"{PREDICTION_JOB_PREFIX}_{model_name}")
predict_destination_prefix = os.path.join(STAGING_BUCKET, predict_job_name)

batch_prediction_job = model.batch_predict(
    job_display_name=predict_job_name,
    gcs_source=test_jsonl_path,
    gcs_destination_prefix=predict_destination_prefix,
    machine_type=PREDICTION_MACHINE_TYPE,
    accelerator_count=PREDICTION_ACCELERATOR_COUNT,
    accelerator_type=PREDICTION_ACCELERATOR_TYPE,
    max_replica_count=1,
    service_account=batch_predict_service_account,
)

batch_prediction_job.wait()

print(batch_prediction_job.display_name)
print(batch_prediction_job.resource_name)
print(batch_prediction_job.state)

您可以在输出目录中阅读预测响应的JSONL文件。

In [None]:
# The label map file was generated from the section above (`Prepare input data for training`).
for file in tf.io.gfile.glob(os.path.join(predict_destination_prefix, "*/*")):
    with tf.io.gfile.GFile(file, "r") as f:
        for line in f:
            print_response_instance(line, label_map)

清理

In [None]:
# Delete the trained model.
model.delete()
# Delete custom and hpt jobs.
if data_converter_custom_job.list(filter=f'display_name="{data_converter_job_name}"'):
    data_converter_custom_job.delete()
if train_hpt_job.list(filter=f'display_name="{train_job_name}"'):
    train_hpt_job.delete()
if model_export_custom_job.list(filter=f'display_name="{model_export_job_name}"'):
    model_export_custom_job.delete()
if batch_prediction_job.list(filter=f'display_name="{predict_job_name}"'):
    batch_prediction_job.delete()