In [None]:
# Copyright 2021 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# 顶点 SDK：AutoML 视频分类模型

## 安装

安装最新（预览）版本的Vertex SDK。

In [None]:
! pip3 install -U google-cloud-aiplatform --user

安装Google *云存储*库。

In [None]:
! pip3 install google-cloud-storage

### 重新启动内核

一旦您已安装了Vertex SDK和Google *cloud-storage*，您需要重新启动笔记本内核以便它可以找到这些软件包。

In [None]:
import os

if not os.getenv("AUTORUN"):
    # Automatically restart kernel after installs
    import IPython

    app = IPython.Application.instance()
    app.kernel.do_shutdown(True)

##开始之前

###GPU 运行时

*请确保您在GPU运行时中运行此笔记本（如果您有这个选项）。在Colab中，选择* **Runtime > Change Runtime Type > GPU**

###设立您的GCP项目

**无论您使用的是哪种笔记本环境，以下步骤都是必需的。**

1. [选择或创建一个 GCP 项目](https://console.cloud.google.com/cloud-resource-manager)。当您第一次创建账户时，会获得 $300 的免费信用额度，可用于支付计算/存储费用。

2. [确保您的项目已启用计费功能。](https://cloud.google.com/billing/docs/how-to/modify-project)

3. [启用 Vertex APIs 和 Compute Engine APIs。](https://console.cloud.google.com/flows/enableapi?apiid=ml.googleapis.com,compute_component)

4. [Google Cloud SDK](https://cloud.google.com/sdk) 已经安装在 Google Cloud Notebooks 中。

5. 在下面的单元格中输入您的项目 ID。然后运行该单元格，确保 Cloud SDK 对本笔记本中的所有命令使用正确的项目。

**注意**：Jupyter 在以 `!` 为前缀的行中运行作为 shell 命令，并将以 `$` 为前缀的 Python 变量插入这些命令中。

In [None]:
PROJECT_ID = "[your-project-id]"  # @param {type:"string"}

In [None]:
if PROJECT_ID == "" or PROJECT_ID is None or PROJECT_ID == "[your-project-id]":
    # Get your GCP project id from gcloud
    shell_output = !gcloud config list --format 'value(core.project)' 2>/dev/null
    PROJECT_ID = shell_output[0]
    print("Project ID:", PROJECT_ID)

In [None]:
! gcloud config set project $PROJECT_ID

#### 区域

您还可以更改 “REGION” 变量，该变量用于本笔记本的其余部分操作。 以下是 Vertex AI 支持的区域。我们建议尽可能选择离您最近的区域。

- 美洲： `us-central1`
- 欧洲： `europe-west4`
- 亚太： `asia-east1`

您不能在 Vertex 训练模型时使用多区域存储桶。并非所有区域都支持所有 Vertex 服务。有关每个区域的最新支持信息，请参阅 [Vertex AI 服务的区域支持](https://cloud.google.com/vertex-ai/docs/general/locations)。

In [None]:
REGION = "us-central1"  # @param {type: "string"}

时间戳

如果您正在进行实时教程会话，您可能会使用共享的测试账户或项目。为了避免在创建资源时发生用户名冲突，您需要为每个实例会话创建一个时间戳，并将其附加到将在本教程中创建的资源名称上。

In [None]:
from datetime import datetime

TIMESTAMP = datetime.now().strftime("%Y%m%d%H%M%S")

### 认证您的 GCP 账户

**如果您正在使用 Google 云笔记本**，您的环境已经通过身份验证。跳过这一步。

*注意：如果您正在使用 Vertex 笔记本并运行此单元格，该单元格会自动跳过执行身份验证步骤。*

In [None]:
import os
import sys

# If you are running this notebook in Colab, run this cell and follow the
# instructions to authenticate your Google Cloud account. This provides access
# to your Cloud Storage bucket and lets you submit training jobs and prediction
# requests.

# If on Vertex, then don't execute this code
if not os.path.exists("/opt/deeplearning/metadata/env_version"):
    if "google.colab" in sys.modules:
        from google.colab import auth as google_auth

        google_auth.authenticate_user()

    # If you are running this tutorial in a notebook locally, replace the string
    # below with the path to your service account key and run this cell to
    # authenticate your Google Cloud account.
    else:
        %env GOOGLE_APPLICATION_CREDENTIALS your_path_to_credentials.json

    # Log in to your account on Google Cloud
    ! gcloud auth login

### 创建一个云存储桶

**无论您使用哪种笔记本环境，以下步骤都是必需的。**

本教程旨在使用公共云存储桶中的训练数据，并为您的批量预测使用本地云存储桶。您也可以使用您自己存储在本地云存储桶中的训练数据。

请在下方设置您的云存储桶名称。它必须在所有云存储桶中是唯一的。

In [None]:
BUCKET_NAME = "[your-bucket-name]"  # @param {type:"string"}

In [None]:
if BUCKET_NAME == "" or BUCKET_NAME is None or BUCKET_NAME == "[your-bucket-name]":
    BUCKET_NAME = PROJECT_ID + "aip-" + TIMESTAMP

只有在您的存储桶不存在时才能运行以下单元格来创建您的云存储存储桶。

In [None]:
! gsutil mb -l $REGION gs://$BUCKET_NAME

最后，通过检查云存储桶的内容来验证访问权限：

In [None]:
! gsutil ls -al gs://$BUCKET_NAME

### 设定变量

接下来，设置一些在本教程中使用的变量。
### 导入库并定义常量

导入Vertex SDK

将Vertex SDK导入到我们的Python环境中。

In [None]:
import json
import os
import sys
import time

from google.cloud.aiplatform import gapic as aip
from google.protobuf.json_format import MessageToJson, ParseDict
from google.protobuf.struct_pb2 import Value

将以下常量设置为Vertex AI常量：

- `API_ENDPOINT`：用于数据集、模型、作业、流水线和终端服务的Vertex AI API服务端点。
- `PARENT`：用于数据集、模型和终端资源的Vertex AI位置根路径。

In [None]:
# API Endpoint
API_ENDPOINT = "{}-aiplatform.googleapis.com".format(REGION)

# Vertex AI location root path for your dataset, model and endpoint resources
PARENT = "projects/" + PROJECT_ID + "/locations/" + REGION

#### 自动机器学习常量

接下来，设置适用于AutoML视频分类数据集和训练的常量：

- 数据集模式：告诉受管数据集服务数据集的类型是什么。
- 数据标记（注释）模式：告诉受管数据集服务数据是如何被标记（注释）的。
- 数据集训练模式：告诉Vertex AI流水线服务为何种任务（例如分类）训练模型。

In [None]:
# Video Dataset type
VIDEO_SCHEMA = "google-cloud-aiplatform/schema/dataset/metadata/video_1.0.0.yaml"
# Video Labeling type
IMPORT_SCHEMA_VIDEO_CLASSIFICATION = "gs://google-cloud-aiplatform/schema/dataset/ioformat/video_classification_io_format_1.0.0.yaml"
# Video Training task
TRAINING_VIDEO_CLASSIFICATION_SCHEMA = "gs://google-cloud-aiplatform/schema/trainingjob/definition/automl_video_classification_1.0.0.yaml"

客户端

Vertex SDK 采用客户端/服务器模型。 在您的一侧（Python 脚本），您将创建一个客户端，向服务器（Vertex）发送请求并接收响应。

您在本教程中将使用多个客户端，因此请提前全部设置好它们。

- 数据集服务 用于管理数据集。
- 模型服务 用于管理模型。
- 流水线服务 用于训练。
- 端点服务 用于部署。
- 作业服务 用于批处理作业和自定义训练。
- 预测服务 用于提供服务。 *注意*：预测拥有不同的服务端点。

In [None]:
# client options same for all services
client_options = {"api_endpoint": API_ENDPOINT}


def create_dataset_client():
    client = aip.DatasetServiceClient(client_options=client_options)
    return client


def create_model_client():
    client = aip.ModelServiceClient(client_options=client_options)
    return client


def create_pipeline_client():
    client = aip.PipelineServiceClient(client_options=client_options)
    return client


def create_endpoint_client():
    client = aip.EndpointServiceClient(client_options=client_options)
    return client


def create_prediction_client():
    client = aip.PredictionServiceClient(client_options=client_options)
    return client


def create_job_client():
    client = aip.JobServiceClient(client_options=client_options)
    return client


clients = {}
clients["dataset"] = create_dataset_client()
clients["model"] = create_model_client()
clients["pipeline"] = create_pipeline_client()
clients["endpoint"] = create_endpoint_client()
clients["prediction"] = create_prediction_client()
clients["job"] = create_job_client()

for client in clients.items():
    print(client)

In [None]:
IMPORT_FILE = "gs://automl-video-demo-data/hmdb_split1_5classes_train_inf.csv"

In [None]:
! gsutil cat $IMPORT_FILE | head -n 10

*示例输出*：
```
gs://automl-video-demo-data/hmdb51/_Rad_Schlag_die_Bank__cartwheel_f_cm_np1_le_med_0.avi，侧手翻，0.0，inf
gs://automl-video-demo-data/hmdb51/Acrobacias_de_un_fenomeno_cartwheel_f_cm_np1_ba_bad_8.avi，侧手翻，0.0，inf
gs://automl-video-demo-data/hmdb51/Acrobacias_de_un_fenomeno_cartwheel_f_cm_np1_fr_bad_3.avi，侧手翻，0.0，inf
gs://automl-video-demo-data/hmdb51/Acrobacias_de_un_fenomeno_cartwheel_f_cm_np1_fr_bad_4.avi，侧手翻，0.0，inf
gs://automl-video-demo-data/hmdb51/Acrobacias_de_un_fenomeno_cartwheel_f_cm_np1_fr_bad_5.avi，侧手翻，0.0，inf
gs://automl-video-demo-data/hmdb51/Bayer__Meisterin_Teresa_Stadler_cartwheel_f_cm_np1_le_med_0.avi，侧手翻，0.0，inf
gs://automl-video-demo-data/hmdb51/Bayer__Meisterin_Teresa_Stadler_cartwheel_f_cm_np1_le_med_2.avi，侧手翻，0.0，inf
gs://automl-video-demo-data/hmdb51/Boden_bung_Spoho_Eignungspr_fung_cartwheel_f_cm_np1_ri_med_2.avi，侧手翻，0.0，inf
gs://automl-video-demo-data/hmdb51/Bodenturnen_2004_cartwheel_f_cm_np1_le_med_0.avi，侧手翻，0.0，inf
gs://automl-video-demo-data/hmdb51/Bodenturnen_2004_cartwheel_f_cm_np1_le_med_4.avi，侧手翻，0.0，inf
```

创建一个数据集

### [projects.locations.datasets.create](https://cloud.google.com/vertex-ai/docs/reference/rest/v1beta1/projects.locations.datasets/create)

### [projects.locations.datasets.create](https://cloud.google.com/vertex-ai/docs/reference/rest/v1beta1/projects.locations.datasets/create)

#### 请求

In [None]:
DATA_SCHEMA = VIDEO_SCHEMA

dataset = {
    "display_name": "hmdb_" + TIMESTAMP,
    "metadata_schema_uri": "gs://" + DATA_SCHEMA,
}

print(
    MessageToJson(
        aip.CreateDatasetRequest(parent=PARENT, dataset=dataset).__dict__["_pb"]
    )
)

*示例输出*：
```
{
  "parent": "projects/migration-ucaip-training/locations/us-central1",
  "dataset": {
    "displayName": "hmdb_20210228191029",
    "metadataSchemaUri": "gs://google-cloud-aiplatform/schema/dataset/metadata/video_1.0.0.yaml"
  }
}
```

#### 打电话

In [None]:
request = clients["dataset"].create_dataset(parent=PARENT, dataset=dataset)

回复

In [None]:
result = request.result()

print(MessageToJson(result.__dict__["_pb"]))

{
  "name": "projects/116273516712/locations/us-central1/datasets/7952037527982964736",
  "displayName": "hmdb_20210228191029",
  "metadataSchemaUri": "gs://google-cloud-aiplatform/schema/dataset/metadata/video_1.0.0.yaml",
  "labels": {
    "aiplatform.googleapis.com/dataset_metadata_schema": "VIDEO"
  },
  "metadata": {
    "dataItemSchemaUri": "gs://google-cloud-aiplatform/schema/dataset/dataitem/video_1.0.0.yaml"
  }
}

In [None]:
# The full unique ID for the dataset
dataset_id = result.name
# The short numeric ID for the dataset
dataset_short_id = dataset_id.split("/")[-1]

print(dataset_id)

# Saved for clean up
dataset = {"name": dataset_id}

### [projects.locations.datasets.import](https://cloud.google.com/vertex-ai/docs/reference/rest/v1beta1/projects.locations.datasets/import)

### [projects.locations.datasets.import](https://cloud.google.com/vertex-ai/docs/reference/rest/v1beta1/projects.locations.datasets/import)

#### 请求

In [None]:
LABEL_SCHEMA = IMPORT_SCHEMA_VIDEO_CLASSIFICATION

import_config = {
    "gcs_source": {"uris": [IMPORT_FILE]},
    "import_schema_uri": LABEL_SCHEMA,
}

print(
    MessageToJson(
        aip.ImportDataRequest(
            name=dataset_short_id, import_configs=[import_config]
        ).__dict__["_pb"]
    )
)

{
  "name": "7952037527982964736",
  "importConfigs": [
    {
      "gcsSource": {
        "uris": [
          "gs://automl-video-demo-data/hmdb_split1_5classes_train_inf.csv"
        ]
      },
      "importSchemaUri": "gs://google-cloud-aiplatform/schema/dataset/ioformat/video_classification_io_format_1.0.0.yaml"
    }
  ]
} 

{
  "name": "7952037527982964736",
  "importConfigs": [
    {
      "gcsSource": {
        "uris": [
          "gs://automl-video-demo-data/hmdb_split1_5classes_train_inf.csv"
        ]
      },
      "importSchemaUri": "gs://google-cloud-aiplatform/schema/dataset/ioformat/video_classification_io_format_1.0.0.yaml"
    }
  ]
}

电话

In [None]:
request = clients["dataset"].import_data(
    name=dataset_id, import_configs=[import_config]
)

#### 回复

In [None]:
result = request.result()

print(MessageToJson(result.__dict__["_pb"]))

*示例输出*：
```
{}
```

训练一个模型

### [projects.locations.trainingPipelines.create](https://cloud.google.com/vertex-ai/docs/reference/rest/v1beta1/projects.locations.trainingPipelines/create)

### [projects.locations.trainingPipelines.create](https://cloud.google.com/vertex-ai/docs/reference/rest/v1beta1/projects.locations.trainingPipelines/create)

请求

In [None]:
TRAINING_SCHEMA = TRAINING_VIDEO_CLASSIFICATION_SCHEMA

task = ParseDict({}, Value())

training_pipeline = {
    "display_name": "hmdb_" + TIMESTAMP,
    "input_data_config": {"dataset_id": dataset_short_id},
    "model_to_upload": {"display_name": "hmdb_" + TIMESTAMP},
    "training_task_definition": TRAINING_SCHEMA,
    "training_task_inputs": task,
}

print(
    MessageToJson(
        aip.CreateTrainingPipelineRequest(
            parent=PARENT, training_pipeline=training_pipeline
        ).__dict__["_pb"]
    )
)

{
  "parent": "projects/migration-ucaip-training/locations/us-central1",
  "trainingPipeline": {
    "displayName": "hmdb_20210228191029",
    "inputDataConfig": {
      "datasetId": "7952037527982964736"
    },
    "trainingTaskDefinition": "gs://google-cloud-aiplatform/schema/trainingjob/definition/automl_video_classification_1.0.0.yaml",
    "trainingTaskInputs": {},
    "modelToUpload": {
      "displayName": "hmdb_20210228191029"
    }
  }
} 

*示例输出*：{
  "parent": "projects/migration-ucaip-training/locations/us-central1",
  "trainingPipeline": {
    "displayName": "hmdb_20210228191029",
    "inputDataConfig": {
      "datasetId": "7952037527982964736"
    },
    "trainingTaskDefinition": "gs://google-cloud-aiplatform/schema/trainingjob/definition/automl_video_classification_1.0.0.yaml",
    "trainingTaskInputs": {},
    "modelToUpload": {
      "displayName": "hmdb_20210228191029"
    }
  }
}

电话

In [None]:
request = clients["pipeline"].create_training_pipeline(
    parent=PARENT, training_pipeline=training_pipeline
)

#### 回复

In [None]:
print(MessageToJson(request.__dict__["_pb"]))

{
  "name": "projects/116273516712/locations/us-central1/trainingPipelines/3361945917925097472",
  "displayName": "hmdb_20210228191029",
  "inputDataConfig": {
    "datasetId": "7952037527982964736"
  },
  "trainingTaskDefinition": "gs://google-cloud-aiplatform/schema/trainingjob/definition/automl_video_classification_1.0.0.yaml",
  "trainingTaskInputs": {},
  "modelToUpload": {
    "displayName": "hmdb_20210228191029"
  },
  "state": "PIPELINE_STATE_PENDING",
  "createTime": "2021-02-28T19:17:39.815377Z",
  "updateTime": "2021-02-28T19:17:39.815377Z"
}

In [None]:
# The full unique ID for the training pipeline
training_pipeline_id = request.name
# The short numeric ID for the training pipeline
training_pipeline_short_id = training_pipeline_id.split("/")[-1]

print(training_pipeline_id)

### [projects.locations.trainingPipelines.get](https://cloud.google.com/vertex-ai/docs/reference/rest/v1beta1/projects.locations.trainingPipelines/get)

### [项目.位置.训练管道.获取](https://cloud.google.com/vertex-ai/docs/reference/rest/v1beta1/projects.locations.trainingPipelines/get)

#### 呼叫

In [None]:
request = clients["pipeline"].get_training_pipeline(name=training_pipeline_id)

#### 回答

In [None]:
print(MessageToJson(request.__dict__["_pb"]))

```
{
  "名称": "projects/116273516712/locations/us-central1/trainingPipelines/3361945917925097472",
  "显示名称": "hmdb_20210228191029",
  "inputDataConfig": {
    "数据集ID": "7952037527982964736"
  },
  "trainingTaskDefinition": "gs://google-cloud-aiplatform/schema/trainingjob/definition/automl_video_classification_1.0.0.yaml",
  "trainingTaskInputs": {},
  "modelToUpload": {
    "显示名称": "hmdb_20210228191029"
  },
  "状态": "PIPELINE_STATE_RUNNING",
  "创建时间": "2021-02-28T19:17:39.815377Z",
  "开始时间": "2021-02-28T19:17:40.089331Z",
  "更新时间": "2021-02-28T19:17:40.089331Z"
}
```

In [None]:
while True:
    response = clients["pipeline"].get_training_pipeline(name=training_pipeline_id)
    if response.state != aip.PipelineState.PIPELINE_STATE_SUCCEEDED:
        print("Training job has not completed:", response.state)
        model_to_deploy_name = None
        if response.state == aip.PipelineState.PIPELINE_STATE_FAILED:
            break
    else:
        model_id = response.model_to_upload.name
        print("Training Time:", response.end_time - response.start_time)
        break
    time.sleep(20)

print(model_id)

评估模型

### [项目.地点.模型.评估.列表](https://cloud.google.com/vertex-ai/docs/reference/rest/v1beta1/projects.locations.models.evaluations/list)

#### 电话

In [None]:
request = clients["model"].list_model_evaluations(parent=model_id)

回复

In [None]:
model_evaluations = [json.loads(MessageToJson(mel.__dict__["_pb"])) for mel in request]

print(json.dumps(model_evaluations, indent=2))

# The evaluation slice
evaluation_slice = request.model_evaluations[0].name

```
[
  {
    "name": "projects/116273516712/locations/us-central1/models/5031242063400665088/evaluations/6719412425478635520",
    "metricsSchemaUri": "gs://google-cloud-aiplatform/schema/modelevaluation/classification_metrics_1.0.0.yaml",
    "metrics": {
      "confidenceMetrics": [
        {
          "confidenceThreshold": 0.0891612,
          "precision": 0.2,
          "recall": 1.0,
          "f1Score": 0.33333334
        },
        {
          "recall": 1.0,
          "confidenceThreshold": 0.09073429,
          "precision": 0.20289855,
          "f1Score": 0.33734939
        },
        {
          "recall": 1.0,
          "f1Score": 0.34146342,
          "confidenceThreshold": 0.09176466,
          "precision": 0.20588236
        },
        
        # REMOVED FOR BREVITY
        
        {
          {
            "displayName": "pullup",
            "id": "2856417959264387072"
          },
          {
            "displayName": "golf",
            "id": "5162260968478081024"
          },
          {
            "displayName": "ride_horse",
            "id": "6315182473084928000"
          },
          {
            "displayName": "cartwheel",
            "id": "7468103977691774976"
          }
        ]
      }
    },
    "createTime": "2021-02-28T20:56:43.050002Z",
    "sliceDimensions": [
      "annotationSpec"
    ]
  }
]
```

### [projects.locations.models.evaluations.get](https://cloud.google.com/vertex-ai/docs/reference/rest/v1beta1/projects.locations.models.evaluations/get)

### [projects.locations.models.evaluations.get](https://cloud.google.com/vertex-ai/docs/reference/rest/v1beta1/projects.locations.models.evaluations/get)

#### 呼叫

In [None]:
request = clients["model"].get_model_evaluation(name=evaluation_slice)

#### 回复

In [None]:
print(MessageToJson(request.__dict__["_pb"]))

{
  "name": "projects/116273516712/locations/us-central1/models/5031242063400665088/evaluations/6719412425478635520",
  "metricsSchemaUri": "gs://google-cloud-aiplatform/schema/modelevaluation/classification_metrics_1.0.0.yaml",
  "metrics": {
    "confusionMatrix": {
      "rows": [
        [
          14.0,
          0.0,
          0.0,
          0.0,
          0.0
        ],
        [
          0.0,
          14.0,
          0.0,
          0.0,
          0.0
        ],
        [
          0.0,
          0.0,
          14.0,
          0.0,
          0.0
        ],
        [
          0.0,
          0.0,
          0.0,
          14.0,
          0.0
        ],
        [
          0.0,
          0.0,
          0.0,
          0.0,
          14.0
        ]
      ],
      "annotationSpecs": [
        {
          "displayName": "kick_ball",
          "id": "1703496454657540096"
        },
        {
          "displayName": "pullup",
          "id": "2856417959264387072"
        },
        {
          "displayName": "golf",
          "id": "5162260968478081024"
        },
        {
          "displayName": "ride_horse",
          "id": "6315182473084928000"
        },
        {
          "displayName": "cartwheel",
          "id": "7468103977691774976"
        }
      ]
    },
    "confidenceMetrics": [
      {
        "precision": 0.2,
        "recall": 1.0,
        "confidenceThreshold": 0.0891612,
        "f1Score": 0.33333334
      },
      {
        "recall": 1.0,
        "f1Score": 0.33734939,
        "confidenceThreshold": 0.09073429,
        "precision": 0.20289855
      },
      {
        "precision": 0.20588236,
        "f1Score": 0.34146342,
        "confidenceThreshold": 0.09176466,
        "recall": 1.0
      },
      {
        "confidenceThreshold": 0.09279072,
        "f1Score": 0.34739456,
        "precision": 0.2102102,
        "recall": 1.0
      },
      
      # 省略部分
      
      {
        "recall": 0.071428575,
        "f1Score": 0.13333334,
        "precision": 1.0,
        "confidenceThreshold": 0.6023364
      },
      {
        "f1Score": 0.055555556,
        "precision": 1.0,
        "confidenceThreshold": 0.6101756,
        "recall": 0.028571429
      },
      {
        "recall": 0.014285714,
        "precision": 1.0,
        "confidenceThreshold": 0.6113689,
        "f1Score": 0.028169014
      }
    ],
    "auPrc": 1.0
  },
  "createTime": "2021-02-28T20:56:43.050002Z",
  "sliceDimensions": [
    "annotationSpec"
  ]
}

## 进行批量预测

### 生成批量预测文件

In [None]:
test_items = ! gsutil cat $IMPORT_FILE | head -n2

cols = str(test_items[0]).split(",")
test_item_1 = str(cols[0])
test_label_1 = str(cols[1])

print(test_item_1, test_label_1)

cols = str(test_items[1]).split(",")
test_item_2 = str(cols[0])
test_label_2 = str(cols[1])

print(test_item_2, test_label_2)

以下是示例输出：
```
gs://automl-video-demo-data/hmdb51/_Rad_Schlag_die_Bank__cartwheel_f_cm_np1_le_med_0.avi 翻筋斗
gs://automl-video-demo-data/hmdb51/Acrobacias_de_un_fenomeno_cartwheel_f_cm_np1_ba_bad_8.avi 翻筋斗
```

### 创建批量输入文件

现在让我们创建一个批量输入文件，您可以将其存储在本地的云存储桶中。批量输入文件可以是 CSV 或 JSONL 格式。在本教程中，您将使用 JSONL 格式。对于 JSONL 文件，您需要为每个视频的每一行创建一个字典条目。该字典包含以下键/值对：

- `content`: 视频的云存储路径。
- `mimeType`: 内容类型。在我们的示例中，它是一个 `avi` 文件。
- `timeSegmentStart`: 视频中要进行预测的开始时间戳。*注意*，时间戳必须以字符串形式指定，并且后面跟着 s (秒), m (分钟) 或 h (小时)。
- `timeSegmentEnd`: 视频中要进行预测的结束时间戳。

In [None]:
import json

import tensorflow as tf

gcs_input_uri = "gs://" + BUCKET_NAME + "/test.jsonl"
with tf.io.gfile.GFile(gcs_input_uri, "w") as f:
    data = {
        "content": test_item_1,
        "mimeType": "video/avi",
        "timeSegmentStart": "0.0s",
        "timeSegmentEnd": "inf",
    }
    f.write(json.dumps(data) + "\n")
    data = {
        "content": test_item_2,
        "mimeType": "video/avi",
        "timeSegmentStart": "0.0s",
        "timeSegmentEnd": "inf",
    }
    f.write(json.dumps(data) + "\n")

print(gcs_input_uri)

!gsutil cat $gcs_input_uri

*示例输出*：
```
gs://migration-ucaip-trainingaip-20210228191029/test.jsonl
{"content": "gs://automl-video-demo-data/hmdb51/_Rad_Schlag_die_Bank__cartwheel_f_cm_np1_le_med_0.avi", "mimeType": "video/avi", "timeSegmentStart": "0.0s", "timeSegmentEnd": "inf"}
{"content": "gs://automl-video-demo-data/hmdb51/Acrobacias_de_un_fenomeno_cartwheel_f_cm_np1_ba_bad_8.avi", "mimeType": "video/avi", "timeSegmentStart": "0.0s", "timeSegmentEnd": "inf"}
```

### [projects.locations.batchPredictionJobs.create](https://cloud.google.com/vertex-ai/docs/reference/rest/v1beta1/projects.locations.batchPredictionJobs/create)

### [projects.locations.batchPredictionJobs.create](https://cloud.google.com/vertex-ai/docs/reference/rest/v1beta1/projects.locations.batchPredictionJobs/create)

#### 请求

In [None]:
batch_prediction_job = {
    "display_name": "hmdb_" + TIMESTAMP,
    "model": model_id,
    "input_config": {
        "instances_format": "jsonl",
        "gcs_source": {"uris": [gcs_input_uri]},
    },
    "model_parameters": ParseDict(
        {
            "confidenceThreshold": 0.5,
            "maxPredictions": 2,
            "segmentClassification": True,
            "shotClassification": True,
            "oneSecIntervalClassification": True,
        },
        Value(),
    ),
    "output_config": {
        "predictions_format": "jsonl",
        "gcs_destination": {
            "output_uri_prefix": "gs://" + f"{BUCKET_NAME}/batch_output/"
        },
    },
    "dedicated_resources": {
        "machine_spec": {"machine_type": "n1-standard-2", "accelerator_count": 0},
        "starting_replica_count": 1,
        "max_replica_count": 1,
    },
}

print(
    MessageToJson(
        aip.CreateBatchPredictionJobRequest(
            parent=PARENT, batch_prediction_job=batch_prediction_job
        ).__dict__["_pb"]
    )
)

*示例输出*：
```
{
  "parent": "projects/migration-ucaip-training/locations/us-central1",
  "batchPredictionJob": {
    "displayName": "hmdb_20210228191029",
    "model": "projects/116273516712/locations/us-central1/models/5031242063400665088",
    "inputConfig": {
      "instancesFormat": "jsonl",
      "gcsSource": {
        "uris": [
          "gs://migration-ucaip-trainingaip-20210228191029/test.jsonl"
        ]
      }
    },
    "modelParameters": {
      "segmentClassification": true,
      "maxPredictions": 2.0,
      "shotClassification": true,
      "confidenceThreshold": 0.5,
      "oneSecIntervalClassification": true
    },
    "outputConfig": {
      "predictionsFormat": "jsonl",
      "gcsDestination": {
        "outputUriPrefix": "gs://migration-ucaip-trainingaip-20210228191029/batch_output/"
      }
    },
    "dedicatedResources": {
      "machineSpec": {
        "machineType": "n1-standard-2"
      },
      "startingReplicaCount": 1,
      "maxReplicaCount": 1
    }
  }
}
```

#### Call 

#### 电话

In [None]:
request = clients["job"].create_batch_prediction_job(
    parent=PARENT, batch_prediction_job=batch_prediction_job
)

回复

In [None]:
print(MessageToJson(request.__dict__["_pb"]))

{
  "name": "projects/116273516712/locations/us-central1/batchPredictionJobs/5275975759557558272",
  "displayName": "hmdb_20210228191029",
  "model": "projects/116273516712/locations/us-central1/models/5031242063400665088",
  "inputConfig": {
    "instancesFormat": "jsonl",
    "gcsSource": {
      "uris": [
        "gs://migration-ucaip-trainingaip-20210228191029/test.jsonl"
      ]
    }
  },
  "modelParameters": {
    "oneSecIntervalClassification": true,
    "confidenceThreshold": 0.5,
    "maxPredictions": 2.0,
    "shotClassification": true,
    "segmentClassification": true
  },
  "outputConfig": {
    "predictionsFormat": "jsonl",
    "gcsDestination": {
      "outputUriPrefix": "gs://migration-ucaip-trainingaip-20210228191029/batch_output/"
    }
  },
  "state": "JOB_STATE_PENDING",
  "completionStats": {
    "incompleteCount": "-1"
  },
  "createTime": "2021-02-28T22:47:21.875565Z",
  "updateTime": "2021-02-28T22:47:21.875565Z"
}

In [None]:
# The fully qualified ID for the batch job
batch_job_id = request.name
# The short numeric ID for the batch job
batch_job_short_id = batch_job_id.split("/")[-1]

print(batch_job_id)

### [projects.locations.batchPredictionJobs.get](https://cloud.google.com/vertex-ai/docs/reference/rest/v1beta1/projects.locations.batchPredictionJobs/get)

### [projects.locations.batchPredictionJobs.get](https://cloud.google.com/vertex-ai/docs/reference/rest/v1beta1/projects.locations.batchPredictionJobs/get)

电话

In [None]:
request = clients["job"].get_batch_prediction_job(name=batch_job_id)

### 回应

In [None]:
print(MessageToJson(request.__dict__["_pb"]))

*示例输出*：
```
{
  "name": "projects/116273516712/locations/us-central1/batchPredictionJobs/5275975759557558272",
  "displayName": "hmdb_20210228191029",
  "model": "projects/116273516712/locations/us-central1/models/5031242063400665088",
  "inputConfig": {
    "instancesFormat": "jsonl",
    "gcsSource": {
      "uris": [
        "gs://migration-ucaip-trainingaip-20210228191029/test.jsonl"
      ]
    }
  },
  "modelParameters": {
    "oneSecIntervalClassification": true,
    "confidenceThreshold": 0.5,
    "shotClassification": true,
    "maxPredictions": 2.0,
    "segmentClassification": true
  },
  "outputConfig": {
    "predictionsFormat": "jsonl",
    "gcsDestination": {
      "outputUriPrefix": "gs://migration-ucaip-trainingaip-20210228191029/batch_output/"
    }
  },
  "state": "JOB_STATE_RUNNING",
  "completionStats": {
    "incompleteCount": "2"
  },
  "createTime": "2021-02-28T22:47:21.875565Z",
  "startTime": "2021-02-28T22:47:22.041508Z",
  "updateTime": "2021-02-28T22:47:22.486289Z"
}
```

In [None]:
def get_latest_predictions(gcs_out_dir):
    """ Get the latest prediction subfolder using the timestamp in the subfolder name"""
    folders = !gsutil ls $gcs_out_dir
    latest = ""
    for folder in folders:
        subfolder = folder.split("/")[-2]
        if subfolder.startswith("prediction-"):
            if subfolder > latest:
                latest = folder[:-1]
    return latest


while True:
    response = clients["job"].get_batch_prediction_job(name=batch_job_id)
    if response.state != aip.JobState.JOB_STATE_SUCCEEDED:
        print("The job has not completed:", response.state)
        if response.state == aip.JobState.JOB_STATE_FAILED:
            break
    else:
        folder = get_latest_predictions(
            response.output_config.gcs_destination.output_uri_prefix
        )
        ! gsutil ls $folder/prediction*.jsonl

        ! gsutil cat $folder/prediction*.jsonl
        break
    time.sleep(60)

*示例输出*：
```
gs://migration-ucaip-trainingaip-20210228191029/batch_output/prediction-hmdb_20210228191029-2021-02-28T22:47:21.701608Z/predictions_00001.jsonl
gs://migration-ucaip-trainingaip-20210228191029/batch_output/prediction-hmdb_20210228191029-2021-02-28T22:47:21.701608Z/predictions_00002.jsonl
{"instance":{"content":"gs://automl-video-demo-data/hmdb51/Acrobacias_de_un_fenomeno_cartwheel_f_cm_np1_ba_bad_8.avi","mimeType":"video/avi","timeSegmentStart":"0.0s","timeSegmentEnd":"inf"},"prediction":[]}
{"instance":{"content":"gs://automl-video-demo-data/hmdb51/_Rad_Schlag_die_Bank__cartwheel_f_cm_np1_le_med_0.avi","mimeType":"video/avi","timeSegmentStart":"0.0s","timeSegmentEnd":"inf"},"prediction":[{"id":"7468103977691774976","displayName":"cartwheel","type":"shot-classification","timeSegmentStart":"0.066666s","timeSegmentEnd":"0.226666s","confidence":0.5290586},{"id":"7468103977691774976","displayName":"cartwheel","type":"one-sec-interval-classification","timeSegmentStart":"1.346666s","timeSegmentEnd":"1.346666s","confidence":0.5290586},{"id":"7468103977691774976","displayName":"cartwheel","type":"segment-classification","timeSegmentStart":"0s","timeSegmentEnd":"2.766667s","confidence":0.52444863},{"id":"7468103977691774976","displayName":"cartwheel","type":"shot-classification","timeSegmentStart":"0.266666s","timeSegmentEnd":"2.226666s","confidence":0.51983875},{"id":"7468103977691774976","displayName":"cartwheel","type":"one-sec-interval-classification","timeSegmentStart":"1.586666s","timeSegmentEnd":"1.586666s","confidence":0.51983875}]}
```

清理

要清理此项目中使用的所有GCP资源，您可以删除用于本教程的GCP项目。

否则，您可以删除本教程中创建的各个资源。

In [None]:
delete_dataset = True
delete_model = True
delete_pipeline = True
delete_batchjob = True
delete_bucket = True

# Delete the dataset using the Vertex AI fully qualified identifier for the dataset
try:
    if delete_dataset:
        clients["dataset"].delete_dataset(name=dataset_id)
except Exception as e:
    print(e)

# Delete the model using the Vertex AI fully qualified identifier for the model
try:
    if delete_model:
        clients["model"].delete_model(name=model_id)
except Exception as e:
    print(e)

# Delete the training pipeline using the Vertex AI fully qualified identifier for the training pipeline
try:
    if delete_pipeline:
        clients["pipeline"].delete_training_pipeline(name=training_pipeline_id)
except Exception as e:
    print(e)

# Delete the batch job using the Vertex AI fully qualified identifier for the batch job
try:
    if delete_batchjob:
        clients["job"].delete_batch_prediction_job(name=batch_job_id)
except Exception as e:
    print(e)

if delete_bucket and "BUCKET_NAME" in globals():
    ! gsutil rm -r gs://$BUCKET_NAME