In [None]:
# Copyright 2023 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Vertex AI模型花园 - NLLB (没有语言被遗漏)

<table align="left">
  <td>
    <a href="https://colab.research.google.com/github/GoogleCloudPlatform/vertex-ai-samples/blob/main/notebooks/community/model_garden/model_garden_pytorch_nllb.ipynb">
      <img src="https://cloud.google.com/ml-engine/images/colab-logo-32px.png" alt="Colab logo"> 在Colab中运行
    </a>
  </td>
  <td>
    <a href="https://github.com/GoogleCloudPlatform/vertex-ai-samples/blob/main/notebooks/community/model_garden/model_garden_pytorch_nllb.ipynb">
      <img src="https://cloud.google.com/ml-engine/images/github-logo-32px.png" alt="GitHub logo">
      在GitHub上查看
    </a>
  </td>
  <td>
    <a href="https://console.cloud.google.com/vertex-ai/notebooks/deploy-notebook?download_url=https://raw.githubusercontent.com/GoogleCloudPlatform/vertex-ai-samples/main/notebooks/community/model_garden/model_garden_pytorch_nllb.ipynb">
      <img src="https://lh3.googleusercontent.com/UiNooY4LUgW_oTvpsNhPpQzsstV5W8F7rYgxgGBD85cWJoLmrOzhVs_ksK_vgx40SHs7jCqkTkCk=e14-rj-sc0xffffff-h130-w32" alt="Vertex AI logo">
在Vertex AI工作台中打开
    </a>
    (建议使用Python-3 CPU笔记本)
  </td>
</table>

## 概述

本笔记本演示如何在Vertex AI上部署预训练的[NLLB](https://huggingface.co/facebook/nllb-200-3.3B)模型以进行在线预测。

### 目标

- 将模型上传至[模型注册表](https://cloud.google.com/vertex-ai/docs/model-registry/introduction)。
- 在[端点](https://cloud.google.com/vertex-ai/docs/predictions/using-private-endpoints)上部署模型。
- 运行翻译的在线预测。

### 成本

本教程使用Google Cloud的计费组件：

* Vertex AI

了解[Vertex AI定价](https://cloud.google.com/vertex-ai/pricing)，并使用[Pricing Calculator](https://cloud.google.com/products/calculator/)根据您的预期使用量生成成本估算。

## 设置环境

**注意**：Jupyter运行以 `!` 为前缀的行作为shell命令，并将以 `$` 为前缀的Python变量插入这些命令中。

只有协作

In [None]:
if "google.colab" in str(get_ipython()):
    ! pip3 install --upgrade google-cloud-aiplatform
    from google.colab import auth as google_auth

    google_auth.authenticate_user()

    # Restart the notebook kernel after installs.
    import IPython

    app = IPython.Application.instance()
    app.kernel.do_shutdown(True)

### 设置Google Cloud项目

1. [选择或创建一个Google Cloud项目](https://console.cloud.google.com/cloud-resource-manager)。当您第一次创建帐户时，您会获得$300的免费信用额用于计算/存储成本。

2. [确保您的项目已启用结算](https://cloud.google.com/billing/docs/how-to/modify-project)。

3. [启用Vertex AI API和Compute Engine API](https://console.cloud.google.com/flows/enableapi?apiid=aiplatform.googleapis.com,compute_component)。

4. [创建一个云存储存储桶](https://cloud.google.com/storage/docs/creating-buckets)用于存储实验输出。

为实验环境填写以下变量：

In [None]:
# Cloud project id.
PROJECT_ID = ""  # @param {type:"string"}

# Google Cloud Storage bucket URI.
GCS_BUCKET = "gs://"  # @param {type:"string"}

# The region you want to launch jobs in.
REGION = "us-central1"  # @param {type:"string"}

初始化Vertex AI API:

In [None]:
from google.cloud import aiplatform

aiplatform.init(project=PROJECT_ID, location=REGION, staging_bucket=GCS_BUCKET)

### 定义常数

In [None]:
# The pre-built serving docker image.
# The model artifacts are embedded within the container, except for model weights which will be downloaded during deployment.
SERVE_DOCKER_URI = "us-docker.pkg.dev/vertex-ai/vertex-vision-model-garden-dockers/pytorch-transformers-serve:20231022_0832_RC00"

### 定义常见的函数

In [None]:
def deploy_model(model_id, accelerator_count):
    """Uploads and deploys the model to Vertex AI endpoint for prediction."""
    endpoint = aiplatform.Endpoint.create(display_name=f"{model_id}-endpoint")
    serving_env = {
        "MODEL_ID": model_id,
        "DEPLOY_SOURCE": "notebook",
    }

    model = aiplatform.Model.upload(
        display_name=model_id,
        serving_container_image_uri=SERVE_DOCKER_URI,
        serving_container_ports=[7080],
        serving_container_predict_route="/predictions/transformers_serving",
        serving_container_health_route="/ping",
        serving_container_environment_variables=serving_env,
    )

    model.deploy(
        endpoint=endpoint,
        machine_type="n1-standard-8",
        accelerator_type="NVIDIA_TESLA_V100",
        accelerator_count=accelerator_count,
        deploy_request_timeout=3600,
    )
    return model, endpoint

上传和部署模型

这个笔记本和提供的服务可以支持下面列出的所有4种NLLB模型的变体：

| 模型名称 | 模型类型 | 参数数量 | 检查点 |
| - | - | - | - |
| NLLB-200-3.3B | 稠密 | 3.3B |[模型](https://huggingface.co/facebook/nllb-200-3.3B) |
| NLLB-200-1.3B | 稠密 | 1.3B |[模型](https://huggingface.co/facebook/nllb-200-1.3B) |
| NLLB-200-Distilled-1.3B | 稠密 | 1.3B | [模型](https://huggingface.co/facebook/nllb-200-distilled-1.3B) |
| NLLB-200-Distilled-600M | 稠密 | 600M | [模型](https://huggingface.co/facebook/nllb-200-distilled-600M) |

在这4种变体中，[NLLB-200-3.3B](https://huggingface.co/facebook/nllb-200-3.3B)需要2个V100 GPU用于在线预测。然而，其他的变体只需要一个V100 GPU。这部分演示如何将预训练模型上传到模型注册表，并在对应的机器规格上部署模型。

注意：模型部署步骤需要大约25分钟才能完成。

In [None]:
# Please select a model from the list:
model_id = "facebook/nllb-200-distilled-600M"  # @param ["facebook/nllb-200-distilled-600M", "facebook/nllb-200-distilled-1.3B", "facebook/nllb-200-1.3B", "facebook/nllb-200-3.3B"]

accelerator_count = None
# For models `NLLB-200-Distilled-600M`, `NLLB-200-Distilled-1.3B`, `NLLB-200-1.3B`,
# we use 1 V100 GPU
if model_id in [
    "facebook/nllb-200-distilled-600M",
    "facebook/nllb-200-distilled-1.3B",
    "facebook/nllb-200-1.3B",
]:
    accelerator_count = 1
# For model `NLLB-200-3.3B`, we require 2 V100 GPUs
elif model_id in ["facebook/nllb-200-3.3B"]:
    accelerator_count = 2


if model_id is None or accelerator_count is None:
    raise ValueError("Please select a valid model from the list.")

model, endpoint = deploy_model(model_id, accelerator_count)

注意：模型权重将在部署成功后下载。因此，在上述模型部署步骤成功之后，需要额外等待20分钟时间，然后再运行下面的下一个步骤。否则，当您发送请求到端点时，您可能会看到一个`ServiceUnavailable: 503 502:Bad Gateway`错误。

发送预测请求

In [None]:
input_text = "Şeful ONU spune că nu există o soluţie militară în Siria"
# The source and the target languages. The full list of the languages (and the
# corresponding language codes) that are currently supported by the series of models,
# can be found at: https://github.com/facebookresearch/flores/blob/main/flores200/README.md#languages-in-flores-200
src_lang = "ron_Latn"
tgt_lang = "jpn_Jpan"

instances = [
    {"text": input_text, "src_lang": src_lang, "tgt_lang": tgt_lang},
]
preds = endpoint.predict(instances=instances).predictions
print(preds[0].get("translation_text"))

整理清理

In [None]:
# Undeploy model and delete endpoint.
endpoint.delete(force=True)

# Delete models.
model.delete()