In [None]:
# Copyright 2023 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# 使用手势识别的 Vertex AI 模型花园 MediaPipe

<table align="left">
  <td>
    <a href="https://colab.research.google.com/github/GoogleCloudPlatform/vertex-ai-samples/blob/main/notebooks/community/model_garden/model_garden_mediapipe_gesture_recognition.ipynb">
      <img src="https://cloud.google.com/ml-engine/images/colab-logo-32px.png" alt="Colab logo"> 在 Colab 中运行
    </a>
  </td>

  <td>
    <a href="https://github.com/GoogleCloudPlatform/vertex-ai-samples/blob/main/notebooks/community/model_garden/model_garden_mediapipe_gesture_recognition.ipynb">
      <img src="https://cloud.google.com/ml-engine/images/github-logo-32px.png" alt="GitHub logo">
      在 GitHub 上查看
    </a>
  </td>
  <td>                                                                                               <td>
    <a href="https://console.cloud.google.com/vertex-ai/notebooks/deploy-notebook?download_url=https://raw.githubusercontent.com/GoogleCloudPlatform/vertex-ai-samples/main/notebooks/community/model_garden/model_garden_mediapipe_gesture_recognition.ipynb">
      <img src="https://lh3.googleusercontent.com/UiNooY4LUgW_oTvpsNhPpQzsstV5W8F7rYgxgGBD85cWJoLmrOzhVs_ksK_vgx40SHs7jCqkTkCk=e14-rj-sc0xffffff-h130-w32" alt="Vertex AI logo">
在 Vertex AI Workbench 中打开
    </a>
  </td>
</table>

注意：这个笔记本在以下环境中进行了测试：

* Python版本 = 3.9

注意：此Colab中链接的检查点和数据集不是由谷歌拥有或分发的，而是由第三方提供。在使用检查点和数据之前，请查阅第三方提供的条款和条件。

## 概述

本笔记本演示了如何使用[MediaPipe Model Maker](https://developers.google.com/mediapipe/solutions/model_maker)在Vertex AI Model Garden中训练一个设备上的手势识别模型。

### 目标

* 训练新模型
  * 将输入数据转换为训练格式
  * 创建[自定义作业](https://cloud.google.com/vertex-ai/docs/training/create-custom-job)来训练新模型
  * 导出模型

* 清理资源

### 成本

本教程使用Google Cloud的计费组件：

* Vertex AI
* 云存储

了解[Vertex AI定价](https://cloud.google.com/vertex-ai/pricing)和[云存储定价](https://cloud.google.com/storage/pricing)，并使用[Pricing Calculator](https://cloud.google.com/products/calculator/)基于您的预期使用量生成成本估算。

在你开始之前

只有 Colab 上才能运行
运行以下命令来安装依赖项，并在 Colab 上运行时进行 Google Cloud 身份验证。

In [None]:
! pip3 install --upgrade pip

import sys

if "google.colab" in sys.modules:
    ! pip3 install --upgrade google-cloud-aiplatform

    # Automatically restart kernel after installs
    import IPython

    app = IPython.Application.instance()
    app.kernel.do_shutdown(True)

    from google.colab import auth as google_auth

    google_auth.authenticate_user()

#### 设置您的项目ID

**如果您不知道您的项目ID**，请参考支持页面：[查找项目ID](https://support.google.com/googleapi/answer/7014113)

In [None]:
PROJECT_ID = "[your-project-id]"  # @param {type:"string"}

# Set the project id
! gcloud config set project {PROJECT_ID}

#### 区域

您也可以更改 Vertex AI 使用的 `REGION` 变量。 了解有关 [Vertex AI 区域](https://cloud.google.com/vertex-ai/docs/general/locations) 的更多信息。

In [None]:
REGION = "us-central1"  # @param {type: "string"}
REGION_PREFIX = REGION.split("-")[0]
assert REGION_PREFIX in (
    "us",
    "europe",
    "asia",
), f'{REGION} is not supported. It must be prefixed by "us", "asia", or "europe".'

创建一个云存储桶

创建一个存储桶来存储中间产物，如数据集。

In [None]:
BUCKET_URI = f"gs://your-bucket-name-{PROJECT_ID}-unique"  # @param {type:"string"}

只有当您的存储桶尚不存在时：运行以下单元格以创建您的云存储存储桶。

In [None]:
! gsutil mb -l {REGION} -p {PROJECT_ID} {BUCKET_URI}

### 导入库

In [None]:
import json
import os
from datetime import datetime

import tensorflow
from google.cloud import aiplatform

### 初始化 Python 的 Vertex AI SDK

为您的项目初始化 Python 的 Vertex AI SDK。

In [None]:
now = datetime.now().strftime("%Y%m%d-%H%M%S")

STAGING_BUCKET = os.path.join(BUCKET_URI, "temp/%s" % now)

EVALUATION_RESULT_OUTPUT_DIRECTORY = os.path.join(STAGING_BUCKET, "evaluation")
EVALUATION_RESULT_OUTPUT_FILE = os.path.join(
    EVALUATION_RESULT_OUTPUT_DIRECTORY, "evaluation.json"
)

EXPORTED_MODEL_OUTPUT_DIRECTORY = os.path.join(STAGING_BUCKET, "model")
EXPORTED_MODEL_OUTPUT_FILE = os.path.join(
    EXPORTED_MODEL_OUTPUT_DIRECTORY, "gesture_recognizer.task"
)

aiplatform.init(project=PROJECT_ID, location=REGION, staging_bucket=STAGING_BUCKET)

定义培训机器规格

In [None]:
TRAINING_JOB_DISPLAY_NAME = "mediapipe_gesture_recognizer_%s" % now
TRAINING_CONTAINER = f"{REGION_PREFIX}-docker.pkg.dev/vertex-ai/vertex-vision-model-garden-dockers/mediapipe-train"
TRAINING_MACHINE_TYPE = "n1-highmem-16"
TRAINING_ACCELERATOR_TYPE = "NVIDIA_TESLA_V100"
TRAINING_ACCELERATOR_COUNT = 2

训练您定制的模型

### 为训练准备输入数据

要为手势识别调整模型，需要一个遵循以下模式的目录结构数据集：`<数据集路径>/<标签名>/<图片名>.*`（例如 `my_custom_dataset/thumbs_up/img12.jpg`）。另外，其中一个标签名必须是 none。none 标签代表任何不属于其他手势分类的手势。

本例使用了一个可在云存储中找到的石头剪刀布数据集样本。

In [None]:
training_data_path = (
    "gs://mediapipe-tasks/gesture_recognizer/rps_data_sample"  # @param {type:"string"}
)

当模型生成器加载数据集时，它会运行MediaPipe Hands中预先打包的手部检测模型，以检测图像中的手部地标。任何没有检测到手的图像将从数据集中省略。最终数据集将包含从每个图像中提取的手部地标位置，而不是图像本身。

您可以配置一些选项来确定如何加载数据集：

In [None]:
# A boolean controlling whether to shuffle the dataset. Defaults to true.
shuffle = True  # @param {type:"boolean"}
# A float between 0 and 1 controlling the confidence threshold for hand detection
min_detection_confidence = 0.6  # @param {type:"number"}
# Configures how to split the dataset between training, validation and test data. Must sum to up 1.
split_ratio = "0.8,0.1,0.1"  # @param {type:"string"}

### 设置微调选项

您可以通过指定ModelOptions和HParams来自定义模型。ModelOptions包含与模型本身相关的参数，而HParams包含与训练和保存模型相关的参数。

ModelOptions包含这些可定制的参数，这些参数会影响准确性：

In [None]:
# The fraction of the input units to drop. Used in dropout layer.
dropout_rate: float = 0.05  # @param {type:"number"}
# A list of hidden layer widths for the gesture model. Each element
# in the list will create a new hidden layer with the specified width.
# The hidden layers are separated with BatchNorm, Dropout, and ReLU.
layer_widths: str = ""  # @param {type:"string"}

HParams具有以下可自定义参数列表，影响模型精度：

In [None]:
# The learning rate to use for gradient descent training.
learning_rate: float = 0.001  # @param {type:"number"}
# Batch size for training.
batch_size: int = 2  # @param {type:"number"}
# Number of training iterations over the dataset.
epochs: int = 10  # @param {type:"slider", min:0, max:100, step:1}
# An optional integer that indicates the number of training steps per
# epoch. If set to 0, the training pipeline calculates the default
# steps per epoch as the training dataset size divided by batch size.
steps_per_epoch: int = 0  # @param {type:"number"}
# Whether to shuffle the dataset before training
shuffle: bool = False  # @param {type:"boolean"}
# Learning rate decay to use for gradient descent training.
lr_decay: float = 0.99  # @param {type:"number"}
# Gamma parameter for focal loss. Defaults to 2
gamma: float = 2  # @param {type:"number"}

### 运行微调
准备好您的训练数据集和微调选项后，您就可以开始微调过程了。这个过程需要大量资源，可能需要几分钟才能完成。在Vertex AI上使用GPU处理时，下面的微调示例大约需要1-2分钟来对大约500张图片进行训练。

要开始微调过程，请使用以下代码：

In [None]:
model_export_path = EXPORTED_MODEL_OUTPUT_DIRECTORY
evaluation_result_path = EVALUATION_RESULT_OUTPUT_DIRECTORY

model_options = {"dropout_rate": dropout_rate}
if layer_widths:
    model_options["layer_widths"] = layer_widths

hparams = {
    "learning_rate": learning_rate,
    "batch_size": batch_size,
    "epochs": epochs,
    "shuffle": shuffle,
    "lr_decay": lr_decay,
    "gamma": gamma,
}
if steps_per_epoch:
    hparams["steps_per_epoch"] = steps_per_epoch

worker_pool_specs = [
    {
        "machine_spec": {
            "machine_type": TRAINING_MACHINE_TYPE,
            "accelerator_type": TRAINING_ACCELERATOR_TYPE,
            "accelerator_count": TRAINING_ACCELERATOR_COUNT,
        },
        "replica_count": 1,
        "container_spec": {
            "image_uri": TRAINING_CONTAINER,
            "command": [],
            "args": [
                "--task_name=gesture_recognizer",
                "--training_data_path=%s" % training_data_path,
                "--model_export_path=%s" % model_export_path,
                "--evaluation_result_path=%s" % evaluation_result_path,
                "--split_ratio=%s" % split_ratio,
                "--model_options=%s" % json.dumps(model_options),
                "--hparams=%s" % json.dumps(hparams),
            ],
        },
    }
]

training_job = aiplatform.CustomJob(
    display_name=TRAINING_JOB_DISPLAY_NAME,
    project=PROJECT_ID,
    worker_pool_specs=worker_pool_specs,
    staging_bucket=STAGING_BUCKET,
)

training_job.run()

评估和导出模型

### 评估性能

在微调模型之后，我们对测试数据集上的训练结果进行评估，通常这是您在训练过程中未使用的原始数据集的一部分。一般认为准确率在0.8至0.9之间是很好的，但是您的用例需求可能会有所不同。您还应该考虑模型能够产生推断的速度。更高的准确率通常会以较长的推断时间为代价。

In [None]:
def get_evaluation_result(evaluation_result_path):
    try:
        with tensorflow.io.gfile.GFile(evaluation_result_path, "r") as input_file:
            evalutation_result = json.loads(input_file.read())
        return evalutation_result["accuracy"], evalutation_result["loss"]
    except:
        print(
            "Evaluation result not found. Your test dataset is likely "
            + "empty. You can adjust the size of your test dataset or adjust "
            + "how you split your dataset."
        )
        return None


evaluation_result = get_evaluation_result(EVALUATION_RESULT_OUTPUT_FILE)

if evaluation_result is not None:
    print("Accuracy:", evaluation_result[0])
    print("Loss:", evaluation_result[1])

导出模型
在微调和评估模型之后，您可以保存Tensorflow Lite模型，可以在MediaPipe Studio的[手势识别器](https://mediapipe-studio.webapps.google.com/demo/gesture_recognizer)演示中尝试，或者按照[手势识别器任务指南](https://developers.google.com/mediapipe/solutions/vision/gesture_recognizer)将其集成到您的设备应用程序中。导出的模型包含所需的模型元数据，以及分类标签文件。

In [None]:
import sys


def copy_model(model_source, model_dest):
    ! gsutil cp {model_source} {model_dest}

copy_model(EXPORTED_MODEL_OUTPUT_FILE, "gesture_recognizer.task")

if "google.colab" in sys.modules:
    from google.colab import files

    files.download("gesture_recognizer.task")

清理

In [None]:
# Delete training data and jobs.
if training_job.list(filter=f'display_name="{TRAINING_JOB_DISPLAY_NAME}"'):
    training_job.delete()

!gsutil rm -r {STAGING_BUCKET}