In [None]:
# Copyright 2021 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# 顶点SDK：AutoML视频物体跟踪模型

安装

安装最新（预览）版本的Vertex SDK。

In [None]:
! pip3 install -U google-cloud-aiplatform --user

安装Google的*云存储*库。

In [None]:
! pip3 install google-cloud-storage

### 重启内核

安装了Vertex SDK和Google *cloud-storage*后，您需要重新启动笔记本内核，以便它可以找到这些软件包。

In [None]:
import os

if not os.getenv("AUTORUN"):
    # Automatically restart kernel after installs
    import IPython

    app = IPython.Application.instance()
    app.kernel.do_shutdown(True)

## 在开始之前

### GPU 运行时

*如果您有这个选项，请确保在 GPU 运行时中运行此笔记本。在 Colab 中，选择* **运行时 > 更改运行时类型 > GPU**

### 设置您的 GCP 项目

**无论您的笔记本环境如何，以下步骤都是必需的。**

1. [选择或创建一个 GCP 项目](https://console.cloud.google.com/cloud-resource-manager)。当您第一次创建一个帐户时，您将获得 $300 的免费信用额度用于您的计算/存储成本。

2. [确保为您的项目启用了结算。](https://cloud.google.com/billing/docs/how-to/modify-project)

3. [启用 Vertex APIs 和 Compute Engine APIs。](https://console.cloud.google.com/flows/enableapi?apiid=ml.googleapis.com,compute_component)

4. [Google Cloud SDK](https://cloud.google.com/sdk) 已经安装在 Google Cloud Notebook 中。

5. 在下面的单元格中输入您的项目 ID。然后运行该单元格，以确保 Cloud SDK 在本笔记本中的所有命令中使用正确的项目。

**注意**：Jupyter 会将以 `!` 为前缀的行视为 shell 命令，并且它会将以 `$` 为前缀的 Python 变量插入到这些命令中。

In [None]:
PROJECT_ID = "[your-project-id]"  # @param {type:"string"}

In [None]:
if PROJECT_ID == "" or PROJECT_ID is None or PROJECT_ID == "[your-project-id]":
    # Get your GCP project id from gcloud
    shell_output = !gcloud config list --format 'value(core.project)' 2>/dev/null
    PROJECT_ID = shell_output[0]
    print("Project ID:", PROJECT_ID)

In [None]:
! gcloud config set project $PROJECT_ID

区域

您还可以更改“REGION”变量，该变量用于此笔记本的其余部分操作。以下是 Vertex AI 支持的区域。我们建议在可能的情况下，选择距离您最近的区域。

- 美洲：`us-central1`
- 欧洲：`europe-west4`
- 亚太地区：`asia-east1`

您不能在 Vertex 上使用多区域存储桶进行训练。并非所有区域都支持所有 Vertex 服务。有关每个区域的最新支持，请参阅[Vertex AI 服务的区域支持情况](https://cloud.google.com/vertex-ai/docs/general/locations)。

In [None]:
REGION = "us-central1"  # @param {type: "string"}

时间戳

如果您参加了一个现场教程会话，您可能正在使用一个共享的测试账户或项目。为了避免资源创建时用户之间的命名冲突，您可以为每个实例会话创建一个时间戳，并将其附加到本教程中将要创建的资源的名称上。

In [None]:
from datetime import datetime

TIMESTAMP = datetime.now().strftime("%Y%m%d%H%M%S")

### 验证您的GCP账户

**如果您正在使用Google Cloud笔记本**，您的环境已经经过验证。跳过这一步。

*注意：如果您是在Vertex笔记本上并运行该单元格，则该单元格知道跳过执行验证步骤。*

In [None]:
import os
import sys

# If you are running this notebook in Colab, run this cell and follow the
# instructions to authenticate your Google Cloud account. This provides access
# to your Cloud Storage bucket and lets you submit training jobs and prediction
# requests.

# If on Vertex, then don't execute this code
if not os.path.exists("/opt/deeplearning/metadata/env_version"):
    if "google.colab" in sys.modules:
        from google.colab import auth as google_auth

        google_auth.authenticate_user()

    # If you are running this tutorial in a notebook locally, replace the string
    # below with the path to your service account key and run this cell to
    # authenticate your Google Cloud account.
    else:
        %env GOOGLE_APPLICATION_CREDENTIALS your_path_to_credentials.json

    # Log in to your account on Google Cloud
    ! gcloud auth login

### 创建一个云存储桶

**无论您所用的笔记本环境如何，以下步骤都是必需的。**

本教程旨在使用公共云存储桶中的训练数据以及本地云存储桶用于批量预测。您也可以使用您自己保存在本地云存储桶中的训练数据。

请在下方设置您的云存储桶的名称。它必须在所有云存储桶中保持唯一。

In [None]:
BUCKET_NAME = "[your-bucket-name]"  # @param {type:"string"}

In [None]:
if BUCKET_NAME == "" or BUCKET_NAME is None or BUCKET_NAME == "[your-bucket-name]":
    BUCKET_NAME = PROJECT_ID + "aip-" + TIMESTAMP

只有在您的存储桶不存在时才运行以下单元格以创建您的云存储存储桶。

In [None]:
! gsutil mb -l $REGION gs://$BUCKET_NAME

最后，通过检查云存储桶的内容来验证对其的访问权限：

In [None]:
! gsutil ls -al gs://$BUCKET_NAME

### 设置变量

接下来，设置一些在教程中使用的变量。
### 导入库并定义常量

#### 导入Vertex SDK

将Vertex SDK导入到我们的Python环境中。

In [None]:
import os
import sys
import time

from google.cloud.aiplatform import gapic as aip
from google.protobuf import json_format
from google.protobuf.json_format import MessageToJson, ParseDict
from google.protobuf.struct_pb2 import Struct, Value

设置以下常量用于Vertex AI：

- `API_ENDPOINT`：Vertex AI的数据集、模型、作业、流水线和端点服务的API服务端点。
- `PARENT`：Vertex AI的数据集、模型和端点资源的位置根路径。

In [None]:
# API Endpoint
API_ENDPOINT = "{}-aiplatform.googleapis.com".format(REGION)

# Vertex AI location root path for your dataset, model and endpoint resources
PARENT = "projects/" + PROJECT_ID + "/locations/" + REGION

自动机学习常量

接下来，设置与AutoML视频对象跟踪数据集和训练相关的常量：

- 数据集模式：告诉托管数据集服务数据集的类型是什么。
- 数据标注（注释）模式：告诉托管数据集服务数据如何被标记（注释）。
- 数据集训练模式：告诉Vertex AI Pipelines服务为哪种任务（例如分类）训练模型。

In [None]:
# Video Dataset type
VIDEO_SCHEMA = "google-cloud-aiplatform/schema/dataset/metadata/video_1.0.0.yaml"
# Video Labeling type
IMPORT_SCHEMA_VIDEO_OBJECT_TRACKING = "gs://google-cloud-aiplatform/schema/dataset/ioformat/video_object_tracking_io_format_1.0.0.yaml"
# Video Training task
TRAINING_VIDEO_OBJECT_TRACKING_SCHEMA = "gs://google-cloud-aiplatform/schema/trainingjob/definition/automl_video_object_tracking_1.0.0.yaml"

客户端

Vertex SDK 采用客户端/服务器模型。在您的一侧(即 Python 脚本)上，您将创建一个客户端，向服务器（Vertex）发送请求并接收响应。

在本教程中，您将使用多个客户端，因此请提前设置好它们。

- 数据集服务，用于管理数据集。
- 模型服务，用于管理模型。
- 训练管道服务。
- 部署端点服务。
- 作业服务，用于批处理作业和自定义训练。
- 预测服务。*注意*：预测具有不同的服务端点。

In [None]:
# client options same for all services
client_options = {"api_endpoint": API_ENDPOINT}


def create_dataset_client():
    client = aip.DatasetServiceClient(client_options=client_options)
    return client


def create_model_client():
    client = aip.ModelServiceClient(client_options=client_options)
    return client


def create_pipeline_client():
    client = aip.PipelineServiceClient(client_options=client_options)
    return client


def create_endpoint_client():
    client = aip.EndpointServiceClient(client_options=client_options)
    return client


def create_prediction_client():
    client = aip.PredictionServiceClient(client_options=client_options)
    return client


def create_job_client():
    client = aip.JobServiceClient(client_options=client_options)
    return client


clients = {}
clients["dataset"] = create_dataset_client()
clients["model"] = create_model_client()
clients["pipeline"] = create_pipeline_client()
clients["endpoint"] = create_endpoint_client()
clients["prediction"] = create_prediction_client()
clients["job"] = create_job_client()

for client in clients.items():
    print(client)

In [None]:
IMPORT_FILE = "gs://automl-video-demo-data/traffic_videos/traffic_videos_labels.csv"

In [None]:
! gsutil cat $IMPORT_FILE | head -n 10

*示例输出*：
```
gs://automl-video-demo-data/traffic_videos/highway_005.mp4，轿车，1565750291672021，11.933333，0.509205，0.594283，，，0.728737，0.760959，，
gs://automl-video-demo-data/traffic_videos/highway_005.mp4，皮卡_SUV_面包车，1565750291672171，17.566666，0.761241，0.498466，，，0.948839，0.668524，，
gs://automl-video-demo-data/traffic_videos/highway_005.mp4，皮卡_SUV_面包车，1565750291672223，20.433333，0.000000，0.465235，，，0.142638，0.665644，，
gs://automl-video-demo-data/traffic_videos/highway_005.mp4，皮卡_SUV_面包车，1565750291672347，25.766666，0.486523，0.592331，，，0.720611，0.776687，，
gs://automl-video-demo-data/traffic_videos/highway_005.mp4，皮卡_SUV_面包车，1565750291672575，28.966666，0.578534，0.652778，，，0.828647，0.862967，，
gs://automl-video-demo-data/traffic_videos/highway_005.mp4，皮卡_SUV_面包车，1565750291672549，28.966666，0.000000，0.518571，，，0.148841，0.737677，，
gs://automl-video-demo-data/traffic_videos/highway_005.mp4，皮卡_SUV_面包车，1565750291672599，28.966666，0.106979，0.458078，，，0.377877，0.678937，，
gs://automl-video-demo-data/traffic_videos/highway_005.mp4，皮卡_SUV_面包车，1565715798494273，32.466666，0.333083，0.485473，，，0.542722，0.647774，，
gs://automl-video-demo-data/traffic_videos/highway_005.mp4，轿车，1565715798494439，36.433333，0.935638，0.564839，，，1.000000，0.672182，，
gs://automl-video-demo-data/traffic_videos/highway_005.mp4，皮卡_SUV_面包车，1565715798494381，36.433333，0.000000，0.455703，，，0.164878，0.660083，，
```

创建一个数据集

### [projects.locations.datasets.create](https://cloud.google.com/vertex-ai/docs/reference/rest/v1beta1/projects.locations.datasets/create)

### [projects.locations.datasets.create](https://cloud.google.com/vertex-ai/docs/reference/rest/v1beta1/projects.locations.datasets/create)

请求

In [None]:
DATA_SCHEMA = VIDEO_SCHEMA

dataset = {
    "display_name": "traffic_" + TIMESTAMP,
    "metadata_schema_uri": "gs://" + DATA_SCHEMA,
}

print(
    MessageToJson(
        aip.CreateDatasetRequest(parent=PARENT, dataset=dataset).__dict__["_pb"]
    )
)

{
  "父级": "projects/migration-ucaip-training/locations/us-central1",
  "数据集": {
    "显示名称": "traffic_20210310013516",
    "元数据模式URI": "gs://google-cloud-aiplatform/schema/dataset/metadata/video_1.0.0.yaml"
  }
}

#### 电话

In [None]:
request = clients["dataset"].create_dataset(parent=PARENT, dataset=dataset)

回應

In [None]:
result = request.result()

print(MessageToJson(result.__dict__["_pb"]))

{
  "name": "projects/116273516712/locations/us-central1/datasets/7534187925055995904",
  "displayName": "traffic_20210310013516",
  "metadataSchemaUri": "gs://google-cloud-aiplatform/schema/dataset/metadata/video_1.0.0.yaml",
  "labels": {
    "aiplatform.googleapis.com/dataset_metadata_schema": "VIDEO"
  },
  "metadata": {
    "dataItemSchemaUri": "gs://google-cloud-aiplatform/schema/dataset/dataitem/video_1.0.0.yaml"
  }
} 

示例输出：{
  "name": "projects/116273516712/locations/us-central1/datasets/7534187925055995904",
  "displayName": "traffic_20210310013516",
  "metadataSchemaUri": "gs://google-cloud-aiplatform/schema/dataset/metadata/video_1.0.0.yaml",
  "labels": {
    "aiplatform.googleapis.com/dataset_metadata_schema": "VIDEO"
  },
  "metadata": {
    "dataItemSchemaUri": "gs://google-cloud-aiplatform/schema/dataset/dataitem/video_1.0.0.yaml"
  }
}

In [None]:
# The full unique ID for the dataset
dataset_id = result.name
# The short numeric ID for the dataset
dataset_short_id = dataset_id.split("/")[-1]

print(dataset_id)

### [projects.locations.datasets.import](https://cloud.google.com/vertex-ai/docs/reference/rest/v1beta1/projects.locations.datasets/import)

### [项目.地点.数据集.导入](https://cloud.google.com/vertex-ai/docs/reference/rest/v1beta1/projects.locations.datasets/import)

### 请求

In [None]:
LABEL_SCHEMA = IMPORT_SCHEMA_VIDEO_OBJECT_TRACKING

import_config = {
    "gcs_source": {"uris": [IMPORT_FILE]},
    "import_schema_uri": LABEL_SCHEMA,
}

print(
    MessageToJson(
        aip.ImportDataRequest(
            name=dataset_short_id, import_configs=[import_config]
        ).__dict__["_pb"]
    )
)

{
  "名称": "7534187925055995904",
  "importConfigs": [
    {
      "gcsSource": {
        "uris": [
          "gs://automl-video-demo-data/traffic_videos/traffic_videos_labels.csv"
        ]
      },
      "importSchemaUri": "gs://google-cloud-aiplatform/schema/dataset/ioformat/video_object_tracking_io_format_1.0.0.yaml"
    }
  ]
}

#### 呼叫

In [None]:
request = clients["dataset"].import_data(
    name=dataset_id, import_configs=[import_config]
)

#### 回应

In [None]:
result = request.result()

print(MessageToJson(result.__dict__["_pb"]))

示例输出：
```
{}
```

## 训练一个模型

### [projects.locations.trainingPipelines.create](https://cloud.google.com/vertex-ai/docs/reference/rest/v1beta1/projects.locations.trainingPipelines/create)

### [projects.locations.trainingPipelines.create](https://cloud.google.com/vertex-ai/docs/reference/rest/v1beta1/projects.locations.trainingPipelines/create)

请求

In [None]:
TRAINING_SCHEMA = TRAINING_VIDEO_OBJECT_TRACKING_SCHEMA

task = Value(struct_value=Struct(fields={"model_type": Value(string_value="CLOUD")}))

training_pipeline = {
    "display_name": "traffic_" + TIMESTAMP,
    "training_task_definition": TRAINING_SCHEMA,
    "training_task_inputs": task,
    "input_data_config": {
        "dataset_id": dataset_short_id,
        "fraction_split": {"training_fraction": 0.8, "test_fraction": 0.2},
    },
    "model_to_upload": {"display_name": "traffic_" + TIMESTAMP},
}

print(
    MessageToJson(
        aip.CreateTrainingPipelineRequest(
            parent=PARENT, training_pipeline=training_pipeline
        ).__dict__["_pb"]
    )
)

{
  "parent": "projects/migration-ucaip-training/locations/us-central1",
  "trainingPipeline": {
    "displayName": "traffic_20210310013516",
    "inputDataConfig": {
      "datasetId": "7534187925055995904",
      "fractionSplit": {
        "trainingFraction": 0.8,
        "testFraction": 0.2
      }
    },
    "trainingTaskDefinition": "gs://google-cloud-aiplatform/schema/trainingjob/definition/automl_video_object_tracking_1.0.0.yaml",
    "trainingTaskInputs": {
      "model_type": "CLOUD"
    },
    "modelToUpload": {
      "displayName": "traffic_20210310013516"
    }
  }
}
```

电话

In [None]:
request = clients["pipeline"].create_training_pipeline(
    parent=PARENT, training_pipeline=training_pipeline
)

回复

In [None]:
print(MessageToJson(request.__dict__["_pb"]))

{
  "name": "projects/116273516712/locations/us-central1/trainingPipelines/4612961451915608064",
  "displayName": "traffic_20210310013516",
  "inputDataConfig": {
    "datasetId": "7534187925055995904",
    "fractionSplit": {
      "trainingFraction": 0.8,
      "testFraction": 0.2
    }
  },
  "trainingTaskDefinition": "gs://google-cloud-aiplatform/schema/trainingjob/definition/automl_video_object_tracking_1.0.0.yaml",
  "trainingTaskInputs": {
    "modelType": "CLOUD"
  },
  "modelToUpload": {
    "displayName": "traffic_20210310013516"
  },
  "state": "PIPELINE_STATE_PENDING",
  "createTime": "2021-03-10T13:09:36.473816Z",
  "updateTime": "2021-03-10T13:09:36.473816Z"
}

In [None]:
# The full unique ID for the training pipeline
training_pipeline_id = request.name
# The short numeric ID for the training pipeline
training_pipeline_short_id = training_pipeline_id.split("/")[-1]

print(training_pipeline_id)

### [projects.locations.trainingPipelines.get](https://cloud.google.com/vertex-ai/docs/reference/rest/v1beta1/projects.locations.trainingPipelines/get)
### [项目.位置.训练管道.获取](https://cloud.google.com/vertex-ai/docs/reference/rest/v1beta1/projects.locations.trainingPipelines/get)

####电话

In [None]:
request = clients["pipeline"].get_training_pipeline(name=training_pipeline_id)

#### 回应

In [None]:
print(MessageToJson(request.__dict__["_pb"]))

{
  "name": "projects/116273516712/locations/us-central1/trainingPipelines/4612961451915608064",
  "displayName": "traffic_20210310013516",
  "inputDataConfig": {
    "datasetId": "7534187925055995904",
    "fractionSplit": {
      "trainingFraction": 0.8,
      "testFraction": 0.2
    }
  },
  "trainingTaskDefinition": "gs://google-cloud-aiplatform/schema/trainingjob/definition/automl_video_object_tracking_1.0.0.yaml",
  "trainingTaskInputs": {
    "modelType": "CLOUD"
  },
  "modelToUpload": {
    "displayName": "traffic_20210310013516"
  },
  "state": "PIPELINE_STATE_PENDING",
  "createTime": "2021-03-10T13:09:36.473816Z",
  "updateTime": "2021-03-10T13:09:36.473816Z"
}

In [None]:
while True:
    response = clients["pipeline"].get_training_pipeline(name=training_pipeline_id)
    if response.state != aip.PipelineState.PIPELINE_STATE_SUCCEEDED:
        print("Training job has not completed:", response.state)
        model_to_deploy_name = None
        if response.state == aip.PipelineState.PIPELINE_STATE_FAILED:
            break
    else:
        model_id = response.model_to_upload.name
        print("Training Time:", response.end_time - response.start_time)
        break
    time.sleep(60)

print(model_id)

评估模型

### [projects.locations.models.evaluations.list](https://cloud.google.com/vertex-ai/docs/reference/rest/v1beta1/projects.locations.models.evaluations/list)

### [projects.locations.models.evaluations.list](https://cloud.google.com/vertex-ai/docs/reference/rest/v1beta1/projects.locations.models.evaluations/list)

呼叫

In [None]:
request = clients["model"].list_model_evaluations(parent=model_id)

#### 回复

In [None]:
import json

model_evaluations = [json.loads(MessageToJson(me.__dict__["_pb"])) for me in request]
# The evaluation slice
evaluation_slice = request.model_evaluations[0].name

print(json.dumps(model_evaluations, indent=2))

```
[
  {
    "name": "projects/116273516712/locations/us-central1/models/6125898247828406272/evaluations/305090287452028928",
    "metricsSchemaUri": "gs://google-cloud-aiplatform/schema/modelevaluation/video_object_tracking_metrics_1.0.0.yaml",
    "metrics": {
      "boundingBoxMetrics": [
        {
          "meanAveragePrecision": 0.34263912,
          "iouThreshold": 0.5,
          "confidenceMetrics": [
            {
              "precision": 0.36842105,
              "recall": 1.0,
              "f1Score": 0.53846157
            },
            {
              "precision": 0.088,
              "confidenceThreshold": 0.032954127,
              "recall": 0.16541353,
              "f1Score": 0.11488251
            },
            {
              "precision": 0.08835341,
              "confidenceThreshold": 0.035069585,
              "recall": 0.16541353,
              "f1Score": 0.11518325
            },
            {
              "precision": 0.088709675,
              "recall": 0.16541353,
              "confidenceThreshold": 0.036181003,
              "f1Score": 0.115485564
            },
            {
              "recall": 0.16541353,
              "f1Score": 0.11578947,
              "confidenceThreshold": 0.037186295,
              "precision": 0.08906882
            },
            {
              "recall": 0.16541353,
              "precision": 0.08943089,
              "confidenceThreshold": 0.038205147,
              "f1Score": 0.116094984
            },
            
            # 省略部分内容
            
            {
              "recall": 0.007518797,
              "precision": 1.0,
              "confidenceThreshold": 0.66486305,
              "f1Score": 0.014925373
            },
            {
              "precision": 1.0,
              "confidenceThreshold": 1.0
            }
          ]
        }
      ],
      "boundingBoxMeanAveragePrecision": 0.34263912
    },
    "createTime": "2021-03-10T14:18:31.880535Z",
    "sliceDimensions": [
      "annotationSpec"
    ]
  }
]
```

### [projects.locations.models.evaluations.get](https://cloud.google.com/vertex-ai/docs/reference/rest/v1beta1/projects.locations.models.evaluations/get)

将上述的英文文本翻译为中文：### [projects.locations.models.evaluations.get](https://cloud.google.com/vertex-ai/docs/reference/rest/v1beta1/projects.locations.models.evaluations/get)

#### 电话

In [None]:
request = clients["model"].get_model_evaluation(name=evaluation_slice)

#### 回复

In [None]:
print(MessageToJson(request.__dict__["_pb"]))

{
"名称": "projects/116273516712/locations/us-central1/models/6125898247828406272/evaluations/305090287452028928",
"metricsSchemaUri": "gs://google-cloud-aiplatform/schema/modelevaluation/video_object_tracking_metrics_1.0.0.yaml",
"metrics": {
    "boundingBoxMetrics": [
      {
        "confidenceMetrics": [
          {
            "recall": 1.0,
            "precision": 0.36842105,
            "f1Score": 0.53846157
          },
          {
            "recall": 0.16541353,
            "precision": 0.088,
            "f1Score": 0.11488251,
            "confidenceThreshold": 0.032954127
          },
          {
            "confidenceThreshold": 1.0,
            "precision": 1.0
          }
        ],
        "meanAveragePrecision": 0.34263912,
        "iouThreshold": 0.5
      }
    ],
    "boundingBoxMeanAveragePrecision": 0.34263912
  },
"createTime": "2021-03-10T14:18:31.880535Z",
"sliceDimensions": [
    "annotationSpec"
  ]
}

进行批量预测

### 准备批量预测数据

In [None]:
test_items = ! gsutil cat $IMPORT_FILE | head -n25

cols_1 = test_items[0].split(",")
cols_2 = test_items[-1].split(",")

if len(cols_1) > 12:
    test_item_1 = str(cols_1[1])
    test_item_2 = str(cols_2[1])
    test_label_1 = str(cols_1[5:])
    test_label_2 = str(cols_2[5:])
else:
    test_item_1 = str(cols_1[0])
    test_item_2 = str(cols_2[0])
    test_label_1 = str(cols_1[4:])
    test_label_2 = str(cols_2[4:])


print(test_item_1, test_label_1)
print(test_item_2, test_label_2)

*示例输出*：
```
gs://automl-video-demo-data/traffic_videos/highway_005.mp4 ['0.509205', '0.594283', '', '', '0.728737', '0.760959', '', '']
gs://automl-video-demo-data/traffic_videos/highway_006.mp4 ['0.621857', '0.561570', '', '', '0.825726', '0.699151', '', '']
```

### 制作批量输入文件

现在让我们制作一个批量输入文件，您可以将其存储在本地的云存储存储桶中。批量输入文件可以是CSV或JSONL格式。在本教程中，您将使用JSONL文件。对于JSONL文件，您需要为每个视频的每一行创建一个字典条目。字典包含以下键值对：

- `content`：视频的云存储路径。
- `mimeType`：内容类型。在我们的示例中，是一个`avi`文件。
- `timeSegmentStart`：要进行预测的视频中的起始时间戳。*注意*，时间戳必须以字符串形式指定，后跟s（秒）、m（分钟）或h（小时）。
- `timeSegmentEnd`：要进行预测的视频中的结束时间戳。

In [None]:
import json

import tensorflow as tf

gcs_input_uri = "gs://" + BUCKET_NAME + "/test.jsonl"
with tf.io.gfile.GFile(gcs_input_uri, "w") as f:
    data = {
        "content": test_item_1,
        "mimeType": "video/avi",
        "timeSegmentStart": "0.0s",
        "timeSegmentEnd": "inf",
    }
    f.write(json.dumps(data) + "\n")
    data = {
        "content": test_item_2,
        "mimeType": "video/avi",
        "timeSegmentStart": "0.0s",
        "timeSegmentEnd": "inf",
    }
    f.write(json.dumps(data) + "\n")

print(gcs_input_uri)
!gsutil cat $gcs_input_uri

*示例输出*：
```
gs://migration-ucaip-trainingaip-20210310013516/test.jsonl
{"content": "gs://automl-video-demo-data/traffic_videos/highway_005.mp4", "mimeType": "video/avi", "timeSegmentStart": "0.0s", "timeSegmentEnd": "inf"}
{"content": "gs://automl-video-demo-data/traffic_videos/highway_006.mp4", "mimeType": "video/avi", "timeSegmentStart": "0.0s", "timeSegmentEnd": "inf"}
```

### [projects.locations.batchPredictionJobs.create](https://cloud.google.com/vertex-ai/docs/reference/rest/v1beta1/projects.locations.batchPredictionJobs/create)

### [projects.locations.batchPredictionJobs.create](https://cloud.google.com/vertex-ai/docs/reference/rest/v1beta1/projects.locations.batchPredictionJobs/create)

请求

In [None]:
batch_prediction_job = {
    "display_name": "traffic_" + TIMESTAMP,
    # Format: 'projects/{project}/locations/{location}/models/{model_id}'
    "model": model_id,
    "model_parameters": json_format.ParseDict(
        {"confidenceThreshold": 0.5, "maxPredictions": 2}, Value()
    ),
    "input_config": {
        "instances_format": "jsonl",
        "gcs_source": {"uris": [gcs_input_uri]},
    },
    "output_config": {
        "predictions_format": "jsonl",
        "gcs_destination": {
            "output_uri_prefix": "gs://" + f"{BUCKET_NAME}/batch_output/"
        },
    },
    "dedicated_resources": {
        "machine_spec": {"machine_type": "n1-standard-4", "accelerator_count": 0},
        "starting_replica_count": 1,
        "max_replica_count": 1,
    },
}

print(
    MessageToJson(
        aip.CreateBatchPredictionJobRequest(
            parent=PARENT, batch_prediction_job=batch_prediction_job
        ).__dict__["_pb"]
    )
)

```
{
  "parent": "projects/migration-ucaip-training/locations/us-central1",
  "batchPredictionJob": {
    "displayName": "traffic_20210310013516",
    "model": "projects/116273516712/locations/us-central1/models/6125898247828406272",
    "inputConfig": {
      "instancesFormat": "jsonl",
      "gcsSource": {
        "uris": [
          "gs://migration-ucaip-trainingaip-20210310013516/test.jsonl"
        ]
      }
    },
    "modelParameters": {
      "maxPredictions": 2.0,
      "confidenceThreshold": 0.5
    },
    "outputConfig": {
      "predictionsFormat": "jsonl",
      "gcsDestination": {
        "outputUriPrefix": "gs://migration-ucaip-trainingaip-20210310013516/batch_output/"
      }
    },
    "dedicatedResources": {
      "machineSpec": {
        "machineType": "n1-standard-4"
      },
      "startingReplicaCount": 1,
      "maxReplicaCount": 1
    }
  }
}
```

#### 呼叫

In [None]:
request = clients["job"].create_batch_prediction_job(
    parent=PARENT, batch_prediction_job=batch_prediction_job
)

#### 回应

In [None]:
print(MessageToJson(request.__dict__["_pb"]))

{
  "name": "projects/116273516712/locations/us-central1/batchPredictionJobs/6806214470445039616",
  "displayName": "traffic_20210310013516",
  "model": "projects/116273516712/locations/us-central1/models/6125898247828406272",
  "inputConfig": {
    "instancesFormat": "jsonl",
    "gcsSource": {
      "uris": [
        "gs://migration-ucaip-trainingaip-20210310013516/test.jsonl"
      ]
    }
  },
  "modelParameters": {
    "maxPredictions": 2.0,
    "confidenceThreshold": 0.5
  },
  "outputConfig": {
    "predictionsFormat": "jsonl",
    "gcsDestination": {
      "outputUriPrefix": "gs://migration-ucaip-trainingaip-20210310013516/batch_output/"
    }
  },
  "state": "JOB_STATE_PENDING",
  "completionStats": {
    "incompleteCount": "-1"
  },
  "createTime": "2021-03-10T14:23:59.862541Z",
  "updateTime": "2021-03-10T14:23:59.862541Z"
}

In [None]:
# The fully qualified ID for the batch job
batch_job_id = request.name
# The short numeric ID for the batch job
batch_job_short_id = batch_job_id.split("/")[-1]

print(batch_job_id)

### [projects.locations.batchPredictionJobs.get](https://cloud.google.com/vertex-ai/docs/reference/rest/v1beta1/projects.locations.batchPredictionJobs/get)

### [项目位置.批量预测作业.获取](https://cloud.google.com/vertex-ai/docs/reference/rest/v1beta1/projects.locations.batchPredictionJobs/get)

#### 电话

In [None]:
request = clients["job"].get_batch_prediction_job(name=batch_job_id)

#### 回复

In [None]:
print(MessageToJson(request.__dict__["_pb"]))

{
  "名称": "projects/116273516712/locations/us-central1/batchPredictionJobs/6806214470445039616",
  "显示名称": "traffic_20210310013516",
  "模型": "projects/116273516712/locations/us-central1/models/6125898247828406272",
  "输入配置": {
    "实例格式": "jsonl",
    "gcs来源": {
      "uris": [
        "gs://migration-ucaip-trainingaip-20210310013516/test.jsonl"
      ]
    }
  },
  "模型参数": {
    "最大预测数": 2.0,
    "置信度阈值": 0.5
  },
  "输出配置": {
    "预测格式": "jsonl",
    "gcs目的地": {
      "outputUriPrefix": "gs://migration-ucaip-trainingaip-20210310013516/batch_output/"
    }
  },
  "状态": "JOB_STATE_RUNNING",
  "完成统计": {
    "未完成数量": "2"
  },
  "创建时间": "2021-03-10T14:23:59.862541Z",
  "开始时间": "2021-03-10T14:24:00.012555Z",
  "更新时间": "2021-03-10T14:24:00.520535Z"
}

In [None]:
def get_latest_predictions(gcs_out_dir):
    """ Get the latest prediction subfolder using the timestamp in the subfolder name"""
    folders = !gsutil ls $gcs_out_dir
    latest = ""
    for folder in folders:
        subfolder = folder.split("/")[-2]
        if subfolder.startswith("prediction-"):
            if subfolder > latest:
                latest = folder[:-1]
    return latest


while True:
    response = clients["job"].get_batch_prediction_job(name=batch_job_id)
    if response.state != aip.JobState.JOB_STATE_SUCCEEDED:
        print("The job has not completed:", response.state)
        if response.state == aip.JobState.JOB_STATE_FAILED:
            break
    else:
        folder = get_latest_predictions(
            response.output_config.gcs_destination.output_uri_prefix
        )
        ! gsutil ls $folder/prediction**

        ! gsutil cat $folder/prediction**
        break
    time.sleep(60)

示例输出：
```
gs://migration-ucaip-trainingaip-20210310013516/batch_output/prediction-traffic_20210310013516-2021-03-10T14:23:59.650333Z/predictions_00001.jsonl
gs://migration-ucaip-trainingaip-20210310013516/batch_output/prediction-traffic_20210310013516-2021-03-10T14:23:59.650333Z/predictions_00002.jsonl
{"instance":{"content":"gs://automl-video-demo-data/traffic_videos/highway_005.mp4","mimeType":"video/avi","timeSegmentStart":"0.0s","timeSegmentEnd":"inf"},"prediction":[{"id":"6593913068772655104","displayName":"pickup_suv_van","timeSegmentStart":"35.300s","timeSegmentEnd":"35.900s","confidence":0.5733388,"frames":[{"timeOffset":"35.300s","xMin":0.92079705,"xMax":1.0,"yMin":0.52717,"yMax":0.6845663},{"timeOffset":"35.400s","xMin":0.86325824,"xMax":0.999999,"yMin":0.5247129,"yMax":0.68186224},{"timeOffset":"35.500s","xMin":0.79357177,"xMax":0.99050075,"yMin":0.5186033,"yMax":0.68388295},{"timeOffset":"35.600s","xMin":0.7312134,"xMax":0.935794,"yMin":0.5121129,"yMax":0.68021643},{"timeOffset":"35.700s","xMin":0.6609115,"xMax":0.8773811,"yMin":0.50215065,"yMax":0.6793843},{"timeOffset":"35.800s","xMin":0.593415,"xMax":0.816827,"yMin":0.4967009,"yMax":0.677144},{"timeOffset":"35.900s","xMin":0.51087815,"xMax":0.75138736,"yMin":0.4922755,"yMax":0.6732076}]},{"id":"6593913068772655104","displayName":"pickup_suv_van","timeSegmentStart":"46.900s","timeSegmentEnd":"47.900s","confidence":0.5721089,"frames":[{"timeOffset":"46.900s","xMin":0.8938238,"xMax":1.0,"yMin":0.53446406,"yMax":0.6901284},{"timeOffset":"47s","xMin":0.81357133,"xMax":0.99999857,"yMin":0.5293013,"yMax":0.6901114},{"timeOffset":"47.100s","xMin":0.733154,"xMax":0.94289106,"yMin":0.52340066,"yMax":0.69049513},{"timeOffset":"47.200s","xMin":0.6544491,"xMax":0.87583166,"yMin":0.51654726,"yMax":0.68932515},{"timeOffset":"47.300s","xMin":0.56814355,"xMax":0.7984497,"yMin":0.50629544,"yMax":0.6880638},{"timeOffset":"47.400s","xMin":0.47772846,"xMax":0.7148553,"yMin":0.49765483,"yMax":0.68850183},{"timeOffset":"47.500s","xMin":0.3756373,"xMax":0.6187881,"yMin":0.49258503,"yMax":0.68797386},{"timeOffset":"47.600s","xMin":0.28856453,"xMax":0.5317154,"yMin":0.48884195,"yMax":0.6842308},{"timeOffset":"47.700s","xMin":0.2014918,"xMax":0.44464266,"yMin":0.48509887,"yMax":0.6804877},{"timeOffset":"47.800s","xMin":0.11441906,"xMax":0.35756993,"yMin":0.48135576,"yMax":0.6767446},{"timeOffset":"47.900s","xMin":0.0075410376,"xMax":0.16627955,"yMin":0.4600201,"yMax":0.67653906}]},{"id":"6593913068772655104","displayName":"pickup_suv_van","timeSegmentStart":"30.700s","timeSegmentEnd":"31.800s","confidence":0.5617838,"frames":[{"timeOffset":"30.700s","xMin":0.9150882,"xMax":1.0,"yMin":0.5151331,"yMax":0.6645079},{"timeOffset":"30.800s","xMin":0.86258525,"xMax":0.99999905,"yMin":0.5172026,"yMax":0.6603533},{"timeOffset":"30.900s","xMin":0.801704,"xMax":0.9690958,"yMin":0.5114948,"yMax":0.65620536},{"timeOffset":"31s","xMin":0.7319551,"xMax":0.90261525,"yMin":0.5059969,"yMax":0.65359175},{"timeOffset":"31.100s","xMin":0.66124815,"xMax":0.83811176,"yMin":0.49920097,"yMax":0.65197486},{"timeOffset":"31.200s","xMin":0.5840557,"xMax":0.77017546,"yMin":0.49158275,"yMax":0.6492404},{"timeOffset":"31.300s","xMin":0.49507296,"xMax":0.7009768,"yMin":0.48103508,"yMax":0.6466039},{"timeOffset":"31.400s","xMin":0.405905,"xMax":0.61765563,"yMin":0.4749309,"yMax":0.6409255},{"timeOffset":"31.500s","xMin":0.3119387,"xMax":0.5226898,"yMin":0.46739954,"yMax":0.639586},{"timeOffset":"31.600s","xMin":0.21487714,"xMax":0.4266367,"yMin":0.46247935,"yMax":0.6337805},{"timeOffset":"31.700s","xMin":0.104759425,"xMax":0.32538062,"yMin":0.4524644,"yMax":0.63389504},{"timeOffset":"31.800s","xMin":0.018637476,"xMax":0.18262094,"yMin":0.438457,"yMax":0.6318268}]},{"id":"6593913068772655104","displayName":"pickup_suv_van","timeSegmentStart":"19.200s","timeSegmentEnd":"20.400s","confidence":0.5343286,"frames":[{"timeOffset":"19.200s","xMin":0.53855574,"xMax":0.7483454,"yMin":0.7698453,"yMax":0.88802785},{"timeOffset":"19.300s","xMin":0.7066706,"xMax":0.9046804,"yMin":0.5262296,"yMax":0.68281764},{"timeOffset":"19.400s","xMin":0.6643509,"xMax":0.8667256,"yMin":0.52155364,"yMax":0.683776},{"timeOffset":"19.500s","xMin":0.60929006,"xMax":0.81281793,"yMin":0.51661617,"yMax":0.6828631},{"timeOffset":"19.600s","xMin":0.5457967,"xMax":0.75832534,"yMin":0.51252514,"yMax":0.6830068},{"timeOffset":"19.700s","xMin":0.48583922,"xMax":0.7025621,"yMin":0.5050785,"yMax":0.6833887},{"timeOffset":"19.800s","xMin":0.42353436,"xMax":0.65267944,"yMin":0.499186,"yMax":0.68389857},{"timeOffset":"19.900s","xMin":0.36298347,"xMax":0.5871576,"yMin":0.49524042,"yMax":0.68120617},{"timeOffset":"20s","xMin":0.28549758,"xMax":0.51240987,"yMin":0.48895967,"yMax":0.6785187},{"timeOffset":"20.100s","xMin":0.20944653,"xMax":0.439814,"yMin":0.48409462,"yMax":0.6736581},{"timeOffset":"20.200s","xMin":0.12498511,"xMax":0.3544973,"yMin":0.47413808,"yMax":0.6721386},{"timeOffset":"20.300s","xMin":0.047281284,"xMax":0.26844072,"yMin":0.46798372,"yMax":0.66819036},{"timeOffset":"20.400s","xMin":-9.64848E-4,"xMax":0.1562836,"yMin":0.45929697,"yMax":0.66720545}]},{"id":"6593913068772655104","displayName":"pickup_suv_van","timeSegmentStart":"21.100s","timeSegmentEnd":"22.500s","confidence":0.5130946,"frames":[{"timeOffset":"21.100s","xMin":0.96134996,"xMax":0.9994743,"yMin":0.5534551,"yMax":0.69061935},{"timeOffset":"21.200s","xMin":0.70565313,"xMax":0.9051543,"yMin":0.52271163,"yMax":0.68131983},{"timeOffset":"21.300s","xMin":0.66035485,"xMax":0.8695388,"yMin":0.5142326,"yMax":0.68292105},{"timeOffset":"21.400s","xMin":0.60553956,"xMax":0.81872016,"yMin":0.5056492,"yMax":0.6834419},{"timeOffset":"21.500s","xMin":0.54476696,"xMax":0.7676598,"yMin":0.5011589,"yMax":0.6836144},{"timeOffset":"21.600s","xMin":0.48482427,"xMax":0.7100822,"yMin":0.49725318,"yMax":0.68293834},{"timeOffset":"21.700s","xMin":0.42285097,"xMax":0.65375054,"yMin":0.49218974,"yMax":0.6809865},{"timeOffset":"21.800s","xMin":0.35869005,"xMax":0.5910624,"yMin":0.48708224,"yMax":0.68020374},{"timeOffset":"21.900s","xMin":0.29066974,"xMax":0.52481556,"yMin":0.4808314,"yMax":0.68033195},{"timeOffset":"22s","xMin":0.22343048,"xMax":0.460608,"yMin":0.4752792,"yMax":0.68010074},{"timeOffset":"22.100s","xMin":0.15115453,"xMax":0.39462602,"yMin":0.46882644,"yMax":0.67740893},{"timeOffset":"22.200s","xMin":0.07956007,"xMax":0.32599604,"yMin":0.46620452,"yMax":0.6739611},{"timeOffset":"22.300s","xMin":0.019409377,"xMax":0.20294939,"yMin":0.4610349,"yMax":0.67052174},{"timeOffset":"22.400s","xMin":-0.017409453,"xMax":0.13916901,"yMin":0.45393682,"yMax":0.66754013},{"timeOffset":"22.500s","xMin":-0.02455799,"xMax":0.0710371,"yMin":0.45522687,"yMax":0.66398114}]},{"id":"6593913068772655104","displayName":"pickup_suv_van","timeSegmentStart":"7.200s","timeSegmentEnd":"7.800s","confidence":0.50423145,"frames":[{"timeOffset":"7.200s","xMin":0.77211607,"xMax":0.9202541,"yMin":0.55106527,"yMax":0.67469126},{"timeOffset":"7.300s","xMin":0.9063811,"xMax":0.9998785,"yMin":0.52542484,"yMax":0.68584275},{"timeOffset":"7.400s","xMin":0.8464132,"xMax":0.9988885,"yMin":0.5182979,"yMax":0.6811758},{"timeOffset":"7.500s","xMin":0.7691617,"xMax":0.9857696,"yMin":0.5142189,"yMax":0.6805917},{"timeOffset":"7.600s","xMin":0.7101507,"xMax":0.91866463,"yMin":0.50963223,"yMax":0.6773922},{"timeOffset":"7.700s","xMin":0.6297891,"xMax":0.85534894,"yMin":0.505215,"yMax":0.6743725},{"timeOffset":"7.800s","xMin":0.5598867,"xMax":0.7586217,"yMin":0.5045158,"yMax":0.67020833}]},{"id":"6593913068772655104","displayName":"pickup_suv_van","timeSegmentStart":"60.800s","timeSegmentEnd":"60.900s","confidence":0.5034977,"frames":[{"timeOffset":"60.800s","xMin":0.6538362,"xMax":0.88029855,"yMin":0.59181464,"yMax":0.769181},{"timeOffset":"60.900s","xMin":0.53221023,"xMax":0.7752164,"yMin":0.58824813,"yMax":0.77228796}]},{"id":"6593913068772655104","displayName":"pickup_suv_van","timeSegmentStart":"49.800s","timeSegmentEnd":"50s","confidence":0.5016138,"frames":[{"timeOffset":"49.800s","xMin":0.5714027,"xMax":0.8481284,"yMin":0.63446057,"yMax":0.88131857},{"timeOffset":"49.900s","xMin":0.3961952,"xMax":0.6958802,"yMin":0.6273483,"yMax":0.8836541},{"timeOffset":"50s","xMin":0.20160168,"xMax":0.50914085,"yMin":0.6205848,"yMax":0.89831626}]}]}
{"instance":{"content":"gs://automl-video-demo-data/traffic_videos/highway_006.mp4","mimeType":"video/avi","timeSegmentStart":"0.0s","timeSegmentEnd":"inf"},"prediction":[{"id":"6593913068772655104","displayName":"pickup_suv_van","timeSegmentStart":"33.600s","timeSegmentEnd":"34.300s","confidence":0.57430255,"frames":[{"timeOffset":"33.600s","xMin":0.7709672,"xMax":0.95710844,"yMin":0.50214607,"yMax":0.663173},{"timeOffset":"33.700s","xMin":0.6879625,"xMax":0.8816852,"yMin":0.5102545,"yMax":0.67594415},{"timeOffset":"33.800s","xMin":0.60038805,"xMax":0.7979881,"yMin":0.5179747,"yMax":0.6842574},{"timeOffset":"33.900s","xMin":0.49664575,"xMax":0.7060626,"yMin":0.5262368,"yMax":0.69601244},{"timeOffset":"34s","xMin":0.40653434,"xMax":0.62025917,"yMin":0.5326814,"yMax":0.7117975},{"timeOffset":"34.100s","xMin":0.29995015,"xMax":0.5285418,"yMin":0.53684795,"yMax":0.7238653},{"timeOffset":"34.200s","xMin":0.18591899,"xMax":0.42276734,"yMin":0.5479096,"yMax":0.7368871},{"timeOffset":"34.300s","xMin":0.06478273,"xMax":0.3098502,"yMin":0.5567089,"yMax":0.7530978}]},{"id":"8899756077986349056","displayName":"large_veh_bus","timeSegmentStart":"42.800s","timeSegmentEnd":"43.600s","confidence":0.5635853,"frames":[{"timeOffset":"42.800s","xMin":0.896148,"xMax":0.997

清理

要清理此项目中使用的所有GCP资源，您可以[删除用于教程的GCP项目](https://cloud.google.com/resource-manager/docs/creating-managing-projects#shutting_down_projects)。

否则，您可以删除在本教程中创建的各个资源。

In [None]:
delete_dataset = True
delete_model = True
delete_pipeline = True
delete_batchjob = True
delete_bucket = True

# Delete the dataset using the Vertex AI fully qualified identifier for the dataset
try:
    if delete_dataset:
        clients["dataset"].delete_dataset(name=dataset_id)
except Exception as e:
    print(e)

# Delete the model using the Vertex AI fully qualified identifier for the model
try:
    if delete_model:
        clients["model"].delete_model(name=model_id)
except Exception as e:
    print(e)

# Delete the training pipeline using the Vertex AI fully qualified identifier for the training pipeline
try:
    if delete_pipeline:
        clients["pipeline"].delete_training_pipeline(name=training_pipeline_id)
except Exception as e:
    print(e)

# Delete the batch job using the Vertex AI fully qualified identifier for the batch job
try:
    if delete_batchjob:
        clients["job"].delete_batch_prediction_job(name=batch_job_id)
except Exception as e:
    print(e)

if delete_bucket and "BUCKET_NAME" in globals():
    ! gsutil rm -r gs://$BUCKET_NAME