In [None]:
# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# 在 Vertex AI 上开始使用 Gemma

<table align="left">
  <td style="text-align: center">
    <a href="https://colab.research.google.com/github/GoogleCloudPlatform/vertex-ai-samples/blob/main/notebooks/community/model_garden/model_garden_gemma_fine_tuning_batch_deployment_on_rov.ipynb">
      <img width="32px" src="https://cloud.google.com/ml-engine/images/colab-logo-32px.png" alt="Google Colaboratory logo"><br> 在 Colab 中打开
    </a>
  </td>
  <td style="text-align: center">
    <a href="https://console.cloud.google.com/vertex-ai/colab/import/https:%2F%2Fraw.githubusercontent.com%2FGoogleCloudPlatform%2Fvertex-ai-samples%2Fmain%2Fnotebooks%2Fcommunity%2Fmodel_garden%2Fmodel_garden_gemma_fine_tuning_batch_deployment_on_rov.ipynb">
      <img width="32px" src="https://cloud.google.com/ml-engine/images/colab-enterprise-logo-32px.png" alt="Google Cloud Colab Enterprise logo"><br> 在 Colab Enterprise 中打开
    </a>
  </td>
  <td style="text-align: center">
    <a href="https://console.cloud.google.com/vertex-ai/workbench/deploy-notebook?download_url=https://raw.githubusercontent.com/GoogleCloudPlatform/vertex-ai-samples/blob/main/notebooks/community/model_garden/model_garden_gemma_fine_tuning_batch_deployment_on_rov.ipynb">
      <img src="https://lh3.googleusercontent.com/UiNooY4LUgW_oTvpsNhPpQzsstV5W8F7rYgxgGBD85cWJoLmrOzhVs_ksK_vgx40SHs7jCqkTkCk=e14-rj-sc0xffffff-h130-w32" alt="Vertex AI logo"><br> 在 Vertex AI Workbench 中打开
    </a>
  </td>
  <td style="text-align: center">
    <a href="https://github.com/GoogleCloudPlatform/vertex-ai-samples/blob/main/notebooks/community/model_garden/model_garden_gemma_fine_tuning_batch_deployment_on_rov.ipynb">
      <img width="32px" src="https://cloud.google.com/ml-engine/images/github-logo-32px.png" alt="GitHub logo"><br> 在 GitHub 上查看
    </a>
  </td>
</table>

## 概述

本教程演示了如何在Vertex AI上使用Ray对Gemma进行微调和提供服务。

了解更多关于[Ray on Vertex AI](https://cloud.google.com/vertex-ai/docs/open-source/ray-on-vertex-ai/overview)。

### 目标

在本教程中，您将学习如何在Vertex AI上使用Ray来分发Gemma的监督调整。此外，您还将学习如何使用Ray Data在Vertex AI上的Ray上无缝部署经过训练的模型以进行离线预测。

本教程使用以下Google Cloud ML服务和资源：

- Vertex AI上的Ray

执行的步骤包括：

- 在Vertex AI上创建一个Ray集群
- 使用Ray Train在Vertex AI上调整Gemma
- 使用Ray Data为离线预测提供服务。

### 数据集

[极端摘要（XSum）数据集](https://huggingface.co/datasets/EdinburghNLP/xsum) 是关于抽象单文档摘要系统的数据集。

### 费用

本教程使用 Google Cloud 的计费组件：

* Vertex AI
* Cloud Storage

了解 [Vertex AI 价格](https://cloud.google.com/vertex-ai/pricing)，
和 [Cloud Storage 价格](https://cloud.google.com/storage/pricing)，
并使用 [定价计算器](https://cloud.google.com/products/calculator/)
根据您的预期使用量生成费用估算。

<b>注意</b>：本教程使用Ray Jobs API通过公共Ray仪表板。 Ray仪表板地址可以在VPC之外，包括公共互联网上访问。要了解更多关于私有与公共连接性的信息，请参阅[私有和公共连接性](https://cloud.google.com/vertex-ai/docs/open-source/ray-on-vertex-ai/create-cluster#private_and_public_connectivity) 章节中的 [在Vertex AI上创建Ray集群](https://cloud.google.com/vertex-ai/docs/open-source/ray-on-vertex-ai/create-cluster) 文档。

## 在您开始之前

### 设置您的 Google Cloud 项目

**无论您使用的是哪种笔记本环境，下面的步骤都是必须的。**

1. [选择或创建一个 Google Cloud 项目](https://console.cloud.google.com/cloud-resource-manager)。

2. [确保为您的项目启用了计费](https://cloud.google.com/billing/docs/how-to/modify-project)。

3. [启用API](https://console.cloud.google.com/flows/enableapi?apiid=aiplatform.googleapis.com,artifactregistry.googleapis.com,cloudbuild.googleapis.com)。

4. 如果您是在本地运行此笔记本，请确保安装了[Cloud SDK](https://cloud.google.com/sdk)。

### 安装

安装以下所需的软件包以执行此笔记本。

In [None]:
# Install the packages
import os

if not os.getenv("IS_TESTING"):
    USER = "--user"
else:
    USER = ""

! pip3 install {USER} google-cloud-aiplatform[ray]==1.48.0 -q --no-warn-conflicts
! pip3 install {USER} google-cloud-aiplatform[tensorboard]==1.48.0 -q --no-warn-conflicts
! pip3 install {USER} torch==2.2.1 datasets==2.17.0 transformers==4.38.1 evaluate==0.4.1 rouge-score==0.1.2 nltk==3.8.1 bitsandbytes==0.42.0 peft==0.8.2 accelerate==0.27.1 -q --no-warn-conflicts
! pip3 install {USER} tensorflow==2.15.0 -q --no-warn-conflicts
! pip3 install {USER} etils==1.5.0 fsspec==2023.10.0 gcsfs==2023.10.0 -q --no-warn-conflicts

重新启动运行时（仅适用于Colab）

为了使用新安装的软件包，您必须重新启动Google Colab上的运行时。

In [None]:
import sys

if "google.colab" in sys.modules:

    import IPython

    app = IPython.Application.instance()
    app.kernel.do_shutdown(True)

<div class="alert alert-block alert-warning">
<b>⚠️内核将重新启动。在继续下一步之前，请等待它完成。⚠️</b>
</div>

### 在Colab上对您的笔记本环境进行身份验证

在Google Colab上对您的环境进行身份验证。

In [None]:
import sys

if "google.colab" in sys.modules:
    from google.colab import auth

    auth.authenticate_user()

设置谷歌云项目信息

#### 项目编号

In [None]:
PROJECT_ID = "[your-project-id]"  # @param {type:"string"}

# Set the project id
! gcloud config set project {PROJECT_ID}

#### 区域

您还可以更改 Vertex AI 使用的 `REGION` 变量。了解有关 [Vertex AI 区域](https://cloud.google.com/vertex-ai/docs/general/locations) 的更多信息。

In [None]:
REGION = "us-central1"  # @param {type: "string"}

时间戳

您创建时间戳以使您在本教程中创建的资源变得独一无二。

In [None]:
from datetime import datetime

TIMESTAMP = datetime.now().strftime("%Y%m%d%H%M%S")

#### 云存储桶

创建一个存储桶来存储中间产物，如数据集。

In [None]:
BUCKET_NAME = f"your-bucket-name-{PROJECT_ID}-unique"  # @param {type:"string"}

BUCKET_URI = f"gs://{BUCKET_NAME}"

In [None]:
! gsutil mb -l {REGION} -p {PROJECT_ID} {BUCKET_URI}

#### 服务账号

设置服务账号并授予该服务账号访问 Vertex AI TensorBoard 的权限。

In [None]:
SERVICE_ACCOUNT = "[your-service-account]"  # @param {type:"string"}

In [None]:
! gcloud projects add-iam-policy-binding {PROJECT_ID} \
   --member=serviceAccount:{SERVICE_ACCOUNT} \
   --role="roles/storage.admin"

! gcloud projects add-iam-policy-binding {PROJECT_ID} \
   --member=serviceAccount:{SERVICE_ACCOUNT} \
   --role="roles/aiplatform.user"

### 设置教程文件夹

在本教程中设置要使用的文件夹。

In [None]:
from pathlib import Path as path

root_path = path.cwd()
tutorial_path = root_path / "tutorial"
data_path = tutorial_path / "data"
src_path = tutorial_path / "src"
experiments_path = tutorial_path / "experiments"
models_path = tutorial_path / "models"
build_path = tutorial_path / "build"
tests_path = tutorial_path / "tests"

data_path.mkdir(parents=True, exist_ok=True)
src_path.mkdir(parents=True, exist_ok=True)
experiments_path.mkdir(parents=True, exist_ok=True)
models_path.mkdir(parents=True, exist_ok=True)
build_path.mkdir(parents=True, exist_ok=True)
tests_path.mkdir(parents=True, exist_ok=True)

在Vertex AI上设置一个Ray集群之前，请确保[设置](https://cloud.google.com/vertex-ai/docs/open-source/ray-on-vertex-ai/set-up)了Ray on Vertex AI。

In [None]:
import vertex_ray
from google.cloud import aiplatform as vertex_ai
from vertex_ray import NodeImages, Resources

#### 为Python初始化Vertex AI SDK

为您的项目初始化Vertex AI SDK的Python版本。

In [None]:
vertex_ai.init(project=PROJECT_ID, location=REGION, staging_bucket=BUCKET_URI)

建立定制的集群镜像

由于需要特定的依赖项，因此有必要利用Ray的定制集群镜像支持。

要使用定制集群镜像，第一步是构建镜像。以下是要涉及的步骤：

* 准备需求文件
* 为定制镜像创建Dockerfile
* 创建Docker镜像存储库
* 构建Ray集群定制镜像

准备需求文件

准备一个包含您的Ray应用程序运行所需依赖项的`requirements`文件。

In [None]:
requirements = """
ipython==8.22.2
torch==2.2.1
ray==2.10.0
ray[data]==2.10.0
ray[train]==2.10.0
ray[tune]==2.10.0
datasets==2.17.0
transformers==4.38.1
evaluate==0.4.1
rouge-score==0.1.2
nltk==3.8.1
accelerate==0.27.1
bitsandbytes==0.42.0
peft==0.8.2
trl==0.7.10
# flash-attn==2.5.5
pyarrow==15.0.2
fsspec==2023.10.0
gcsfs==2023.10.0
etils==1.7.0
importlib-resources==6.1.2
"""

with open(build_path / "requirements.txt", "w") as rfile:
    rfile.write(requirements)
rfile.close()

创建 Dockerfile

通过利用预建的 Ray on Vertex AI 基础镜像之一，为自定义镜像创建 Dockerfile。

In [None]:
CUSTOM_BASE_IMAGE = "us-docker.pkg.dev/vertex-ai/training/ray-gpu.2-9.py310:latest"  # @param ["us-docker.pkg.dev/vertex-ai/training/ray-cpu.2-4.py310:latest", "us-docker.pkg.dev/vertex-ai/training/ray-cpu.2-9.py310:latest", "us-docker.pkg.dev/vertex-ai/training/ray-gpu.2-4.py310:latest", "us-docker.pkg.dev/vertex-ai/training/ray-gpu.2-9.py310:latest", "europe-docker.pkg.dev/vertex-ai/training/ray-cpu.2-4.py310:latest", "europe-docker.pkg.dev/vertex-ai/training/ray-cpu.2-9.py310:latest", "europe-docker.pkg.dev/vertex-ai/training/ray-gpu.2-4.py310:latest", "europe-docker.pkg.dev/vertex-ai/training/ray-gpu.2-9.py310:latest", "asia-docker.pkg.dev/vertex-ai/training/ray-cpu.2-4.py310:latest", "asia-docker.pkg.dev/vertex-ai/training/ray-cpu.2-9.py310:latest", "asia-docker.pkg.dev/vertex-ai/training/ray-gpu.2-4.py310:latest", "asia-docker.pkg.dev/vertex-ai/training/ray-gpu.2-9.py310:latest"] {allow-input: true}

In [None]:
dockerfile = f"""
FROM {CUSTOM_BASE_IMAGE}

# Install training libraries.
ENV PIP_ROOT_USER_ACTION=ignore
COPY requirements.txt .
RUN pip install -r requirements.txt
"""

with open(build_path / "Dockerfile", "w") as image_file:
    image_file.write(dockerfile)
image_file.close()

创建Docker镜像库

为了存储自定义集群镜像，在Artifact Registry中创建一个Docker镜像库。

In [None]:
REPO_NAME = f"your-repo-name-{PROJECT_ID}-unique"  # @param {type:"string"}

In [None]:
! gcloud artifacts repositories create {REPO_NAME} --repository-format=docker \
    --location={REGION} --description="Tutorial repository"

最后，使用Cloud Build构建Ray集群的自定义镜像。

In [None]:
NODE_TRAIN_IMAGE = f"{REGION}-docker.pkg.dev/{PROJECT_ID}/{REPO_NAME}/train"
BUILD_MACHINE_TYPE = "E2_HIGHCPU_32"

In [None]:
! gcloud builds submit --region={REGION} --tag={NODE_TRAIN_IMAGE} \
    --machine-type={BUILD_MACHINE_TYPE} --timeout=3600 {build_path}

#### 创建 Ray 集群

使用自定义镜像，通过在 Vertex AI SDK for Python 上使用 Ray 创建 Ray 集群。

In [None]:
CLUSTER_NAME = f"your-cluster-name-{PROJECT_ID}-unique"  # @param {type:"string"}

设置Ray集群配置

在Vertex AI上使用Vertex AI Python SDK为Ray设置集群配置。

要了解更多关于集群配置的信息，请参阅[文档](https://cloud.google.com/vertex-ai/docs/open-source/ray-on-vertex-ai/create-cluster#ray-on-vertex-ai-sdk)。

In [None]:
HEAD_NODE_MACHINE_TYPE = "n1-standard-16"  # @param {type:"string"}
HEAD_NODE_COUNT = 1  # @param {type:"integer"}

WORKER_NODE_MACHINE_TYPE = "a2-highgpu-1g"  # @param {type:"string"}
WORKER_NODE_COUNT = 1  # @param {type:"integer"}
WORKER_ACCELERATION_TYPE = "NVIDIA_TESLA_A100"  # @param {type:"string"}
WORKER_ACCELERATION_COUNT = 1  # @param {type:"integer"}

In [None]:
HEAD_NODE_TYPE = Resources(
    machine_type=HEAD_NODE_MACHINE_TYPE,
    node_count=HEAD_NODE_COUNT,
)

WORKER_NODE_TYPES = [
    Resources(
        machine_type=WORKER_NODE_MACHINE_TYPE,
        node_count=WORKER_NODE_COUNT,
        accelerator_type=WORKER_ACCELERATION_TYPE,
        accelerator_count=WORKER_ACCELERATION_COUNT,
    )
]

CUSTOM_IMAGES = NodeImages(
    head=NODE_TRAIN_IMAGE,
    worker=NODE_TRAIN_IMAGE,
)

创建 Ray 集群

使用预定义的自定义配置创建 Ray 集群。根据配置不同，创建集群可能需要几分钟时间。

In [None]:
ray_cluster_name = vertex_ray.create_ray_cluster(
    head_node_type=HEAD_NODE_TYPE,
    worker_node_types=WORKER_NODE_TYPES,
    custom_images=CUSTOM_IMAGES,
    cluster_name=CLUSTER_NAME,
)

获取Ray集群

使用Python的Vertex AI SDK上的Ray来获取Ray集群。

In [None]:
ray_clusters = vertex_ray.list_ray_clusters()
ray_cluster_resource_name = ray_clusters[-1].cluster_resource_name
ray_cluster = vertex_ray.get_ray_cluster(ray_cluster_resource_name)

In [None]:
print("Ray cluster on Vertex AI:", ray_cluster_resource_name)

### 导入库

导入所需的库。

In [None]:
# General
import io
import logging
import os
import random
import shutil
import string
import time

import datasets
import evaluate
import pandas as pd
# Ray - Training
import ray
import torch
import transformers
from etils import epath
from google.cloud import storage
from huggingface_hub import login
from peft import PeftModel
from ray.job_submission import JobStatus, JobSubmissionClient
# Ray - Batch Serving
from ray.tune import ExperimentAnalysis
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline

In [None]:
print("Ray version: ", ray.__version__)

### 设置变量

初始化一些教程变量。

In [None]:
# Training
HF_TOKEN = "[your-hugging-face-token]"  # @param {type:"string"}
EXPERIMENTS_FOLDER_URI = epath.Path(BUCKET_URI) / "experiments"
TENSORBOARD_NAME = f"rov-xsum-gemma-tb-{TIMESTAMP}"

# Serving
MODELS_PATH = epath.Path(BUCKET_URI) / "models"
PREDICTIONS_FOLDER_URI = epath.Path(BUCKET_URI) / "predictions"

### 定义辅助函数

在您的笔记本中定义一个辅助函数，使用Ray Dashboard API 监控Ray作业的状态。

In [None]:
def monitor_job(client, job_id):
    """Monitors the status of Ray job using Ray Dashboard API"""

    logging.basicConfig(
        level=logging.INFO,
        format=f"%(asctime)s.%(msecs)03d %(levelname)s {job_id} -- %(message)s",
        datefmt="%Y-%m-%d %H:%M:%S",
        force=True,
    )

    while True:
        job_status = client.get_job_status(job_id)

        if job_status == JobStatus.SUCCEEDED:
            logging.info("Job succeeded!")
            break

        elif job_status == JobStatus.FAILED:
            logging.info("Job failed!")
            break

        else:
            logging.info("Job is running...")
            time.sleep(60)

    return job_status


def read_json_files(bucket_name, prefix=None):
    """Reads JSON files from a cloud storage bucket and returns a Pandas DataFrame"""

    # Set up storage client
    storage_client = storage.Client()
    bucket = storage_client.bucket(bucket_name)
    blobs = bucket.list_blobs(prefix=prefix)

    dfs = []

    for blob in blobs:
        if blob.name.endswith(".json"):
            file_bytes = blob.download_as_bytes()
            file_string = file_bytes.decode("utf-8")
            with io.StringIO(file_string) as json_file:
                df = pd.read_json(json_file, lines=True)
            dfs.append(df)

    return pd.concat(dfs, ignore_index=True)

图书馆设置

启动一些图书馆设置。

In [None]:
login(token=HF_TOKEN)
datasets.disable_progress_bar()
transformers.set_seed(8)

### 创建一个 Vertex AI TensorBoard 实例

创建一个 Vertex AI TensorBoard 实例来跟踪和监控您的调整作业。

In [None]:
tensorboard = vertex_ai.Tensorboard.create(
    display_name=TENSORBOARD_NAME, project=PROJECT_ID, location=REGION
)

vertex_ai.init(
    project=PROJECT_ID,
    location=REGION,
    staging_bucket=BUCKET_URI,
    experiment_tensorboard=tensorboard,
)

## 使用 Ray Train 对 Gemma 进行微调

在本教程中，您将使用 HuggingFace Transformer 和 Ray 在 Vertex AI 上对 Gemma 2B (`gemma-2b-it`) 进行微调，用于总结报纸文章。为了使这个笔记本易于复制，您编写了一个简单的 Python `trainer.py` 脚本，并通过公共 Ray 仪表板使用 Ray Jobs API 将其提交到 Vertex AI 上的 Ray 集群。

正如在开头提到的那样，**请仅将此选项用于实验目的**。

### 初始化Ray包

创建一个`__init__.py`文件。

In [None]:
with open(src_path / "__init__.py", "a") as init_file:
    pass

### 准备训练脚本

创建`src/train.py`文件，这是用于使用HuggingFace TRL库初始化Gemma微调的Python脚本。

In [None]:
train_script = '''
# training libraries
import os
import numpy as np
import torch
from huggingface_hub import login
import datasets
import transformers
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, Seq2SeqTrainingArguments
from peft import LoraConfig
from trl import SFTTrainer
import evaluate
import ray
import ray.train.huggingface.transformers

def train_func(config):
    # Helpers
    def formatting_func(example):
        """Helper function for formatting data for instruction tuning according to Gemma documentation."""
        output_texts = []
        for i in range(len(example)):
          messages = [
            {"role": "user",
             "content": f"Summarize the following ARTICLE in one sentence.\\n###ARTICLE: {example['document'][i]}"},
            {"role": "assistant",
             "content": f"{example['summary'][i]}<eos>"} # Make minor gemma fixes #2029
             ]
          output_texts.append(tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=False))
        return output_texts

    def compute_metrics(eval_preds):
        """Helper function for computing metrics"""
        preds, labels = eval_preds
        preds = preds[0]

        preds = np.where(preds != -100, preds, tokenizer.pad_token_id)
        labels = np.where(labels != -100, labels, tokenizer.pad_token_id)

        decoded_preds = tokenizer.batch_decode(preds, skip_special_tokens=True)
        decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True)

        metrics = rouge.compute(predictions=decoded_preds,
                                references=decoded_labels,
                                rouge_types=['rouge1', 'rouge2', 'rougeL', 'rougeLsum'],
                                use_aggregator=True, use_stemmer=True)
        metrics = {k: round(v * 100, 4) for k, v in metrics.items()}
        return metrics

    def preprocess_logits_for_metrics(logits, labels):
        """Helper function for logits preprocessing for metrics"""
        preds = torch.argmax(logits, dim=-1)
        return preds, labels

    # Setting training
    login(token=os.environ['HF_TOKEN'], add_to_git_credential=True)
    transformers.set_seed(8)

    # Load dataset
    dataset_id = "xsum"
    dataset = datasets.load_dataset(dataset_id, trust_remote_code=True)
    train_dataset = dataset["train"]
    eval_dataset = dataset["test"]

    # Preprocess dataset
    model_id = "google/gemma-2b-it"
    tokenizer = AutoTokenizer.from_pretrained(model_id)
    tokenizer.padding_side = 'right'

    # Prepare model
    bnb_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_use_double_quant=True,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_compute_dtype=torch.bfloat16,
    )
    model = AutoModelForCausalLM.from_pretrained(model_id,
                                                 quantization_config=bnb_config,
                                                 device_map={'': torch.cuda.current_device()},
                                                 torch_dtype=torch.bfloat16,
                                                 # attn_implementation="flash_attention_2"
                                                 )
    lora_config = LoraConfig(
        r=32,
        lora_alpha=32,
        lora_dropout=0.05,
        target_modules="all-linear",
        bias="none",
        task_type="CAUSAL_LM"
    )

    # model.gradient_checkpointing_enable()
    rouge = evaluate.load("rouge")

    training_args = Seq2SeqTrainingArguments(
        output_dir="checkpoints",
        per_device_train_batch_size=config.get("per_device_train_batch_size"),
        per_device_eval_batch_size=config.get("per_device_eval_batch_size"),
        gradient_accumulation_steps=config.get("gradient_accumulation_steps"),
        logging_strategy="steps",
        save_strategy="steps",
        evaluation_strategy="steps",
        max_steps=config.get("max_steps"),
        save_steps=config.get("save_steps"),
        logging_steps=config.get("logging_steps"),
        learning_rate=config.get("learning_rate"),
        optim="paged_adamw_8bit",
        bf16=False,
        fp16=True,
        report_to="none",
        predict_with_generate=True,
        ddp_find_unused_parameters=False,
        gradient_checkpointing=True,
        push_to_hub=False,
        disable_tqdm=False,
        load_best_model_at_end=False
    )

    max_seq_length = 512
    trainer = SFTTrainer(
        model=model,
        tokenizer=tokenizer,
        args=training_args,
        train_dataset=train_dataset,
        eval_dataset=eval_dataset,
        max_seq_length=max_seq_length,
        compute_metrics=compute_metrics,
        preprocess_logits_for_metrics=preprocess_logits_for_metrics,
        peft_config=lora_config,
        formatting_func=formatting_func
    )
    # model.config.use_cache = False

    callback = ray.train.huggingface.transformers.RayTrainReportCallback()
    trainer.add_callback(callback)
    trainer = ray.train.huggingface.transformers.prepare_trainer(trainer)
    trainer.train()
'''

with open(src_path / "train.py", "w") as f:
    f.write(train_script)
f.close()

准备分布式训练脚本

创建`src/trainer.py`文件，这是用于执行Ray分布式训练任务的Python脚本。

In [None]:
trainer_script = """
# libraries
import argparse

# training libraries
from train import train_func

# ray libraries
import ray
import ray.train.huggingface.transformers
from ray.train import ScalingConfig, RunConfig, CheckpointConfig
from ray.train.torch import TorchTrainer


# helpers
def get_args():
    parser = argparse.ArgumentParser(description='Supervised tuning Gemma on Ray on Vertex AI')

    # some gemma parameters
    parser.add_argument("--train_batch_size", type=int, default=1, help="train batch size")
    parser.add_argument("--eval_batch_size", type=int, default=1, help="eval batch size")
    parser.add_argument("--gradient_accumulation_steps", type=int, default=4, help="gradient accumulation steps")
    parser.add_argument("--learning_rate", type=float, default=2e-4, help="learning rate")
    parser.add_argument("--max_steps", type=int, default=100, help="max steps")
    parser.add_argument("--save_steps", type=int, default=10, help="save steps")
    parser.add_argument("--logging_steps", type=int, default=10, help="logging steps")

    # ray parameters
    parser.add_argument('--num-workers', dest='num_workers', type=int, default=1, help='Number of workers')
    parser.add_argument('--use-gpu', dest='use_gpu', action='store_true', default=False, help='Use GPU')
    parser.add_argument('--experiment-name', dest='experiment_name', type=str, default='gemma-on-rov', help='Experiment name')
    parser.add_argument('--logging-dir', dest='logging_dir', type=str, help='Logging directory')
    args = parser.parse_args()
    return args


def main():

    args = get_args()
    config = vars(args)

    # initialize ray session
    ray.shutdown()
    ray.init()

    # training config
    train_loop_config = {
        "per_device_train_batch_size": config['train_batch_size'],
        "per_device_eval_batch_size": config['eval_batch_size'],
        "gradient_accumulation_steps": config['gradient_accumulation_steps'],
        "learning_rate": config['learning_rate'],
        "max_steps": config['max_steps'],
        "save_steps": config['save_steps'],
        "logging_steps": config['logging_steps'],
    }
    scaling_config = ScalingConfig(num_workers=config['num_workers'], use_gpu=config['use_gpu'])
    run_config = RunConfig(checkpoint_config=CheckpointConfig(num_to_keep=5,
                          checkpoint_score_attribute="loss",
                          checkpoint_score_order="min"),
                           storage_path=config['logging_dir'],
                           name=config['experiment_name'])
    trainer = TorchTrainer(
        train_loop_per_worker=train_func,
        train_loop_config=train_loop_config,
        run_config=run_config,
        scaling_config=scaling_config
    )
    # train
    result = trainer.fit()

    ray.shutdown()


if __name__ == "__main__":
    main()
"""

with open(src_path / "trainer.py", "w") as f:
    f.write(trainer_script)
f.close()

使用Ray Jobs API提交一个Ray作业

使用Ray Jobs API将脚本提交到在Vertex AI上的Ray集群，在公共Ray仪表板地址。

初始化客户端以提交作业。

In [None]:
client = JobSubmissionClient(
    address="vertex_ray://{}".format(ray_cluster.dashboard_address)
)

设置一些作业配置，包括实验名称、作业编号、训练入口等。

In [None]:
train_id = "".join(random.choices(string.ascii_lowercase + string.digits, k=3))
train_experiment_name = f"rov-dialog-gemma-tune-{train_id}"
train_submission_id = f"ray-job-{train_id}"
train_entrypoint = f"python3 trainer.py --experiment-name={train_experiment_name} --logging-dir={EXPERIMENTS_FOLDER_URI} --num-workers={WORKER_NODE_COUNT} --use-gpu"
train_experiment_uri = EXPERIMENTS_FOLDER_URI / train_experiment_name
train_runtime_env = {
    "working_dir": str(src_path),
    "env_vars": {"HF_TOKEN": HF_TOKEN, "TORCH_NCCL_ASYNC_ERROR_HANDLING": "3"},
}

提交工作。

In [None]:
train_job_id = client.submit_job(
    submission_id=train_submission_id,
    entrypoint=train_entrypoint,
    runtime_env=train_runtime_env,
)

使用`monitor_job`函数在任务运行时检查任务的状态。

In [None]:
train_job_status = monitor_job(client, train_job_id)

### 检查训练工件

Ray 训练作业完成后，在云存储位置查看模型工件。

In [None]:
! gsutil ls -l {train_experiment_uri}

使用Vertex AI TensorBoard来记录产生的指标，验证您的训练工作。

In [None]:
vertex_ai.upload_tb_log(
    tensorboard_id=tensorboard.name,
    tensorboard_experiment_name=train_experiment_name,
    logdir=str(train_experiment_uri),
)

使用Ray在Vertex AI上为离线预测提供调整后的Gemma模型

在Vertex AI上使用Ray开发AI/ML应用程序具有各种好处。在这种情况下，您可以使用云存储方便地存储模型检查点、指标等。这使您能够快速地为AI/ML下游任务消耗模型，包括使用Ray Data生成批处理预测。

生成预测（本地）

生成本地预测以验证调整后的模型。

#### 下载Ray培训检查点

从Ray作业中下载所有生成的检查点。

In [None]:
! gsutil -q cp -r {train_experiment_uri}/* {experiments_path}

获取最佳检查点

使用“ExperimentAnalysis”方法根据相关指标和模式检索最佳检查点。

In [None]:
experiment_analysis = ExperimentAnalysis(experiments_path)
log_path = experiment_analysis.get_best_trial(metric="eval_rougeLsum", mode="max")
best_checkpoint = experiment_analysis.get_best_checkpoint(
    log_path, metric="eval_rougeLsum", mode="max"
)

训练完成后加载模型

训练模型后，根据Hugging Face文档中的描述加载模型。

设置模型和适配器路径。还要设置存储结果调整模型的路径。

In [None]:
base_model_path = "google/gemma-2b-it"
peft_model_path = epath.Path(best_checkpoint.path) / "checkpoint"
tuned_model_path = models_path / "xsum-tuned-gemma-it"

启动关联的Gemma标记器和基础模型。同时启动生成的适配器。

In [None]:
tokenizer = AutoTokenizer.from_pretrained(base_model_path)
tokenizer.padding_side = "right"

base_model = AutoModelForCausalLM.from_pretrained(
    base_model_path, device_map="auto", torch_dtype=torch.float16
)
peft_model = PeftModel.from_pretrained(
    base_model,
    peft_model_path,
    device_map="auto",
    torch_dtype=torch.bfloat16,
    is_trainable=False,
)

将基础模型和适配器合并，以保存调整后的模型。

In [None]:
tuned_model = peft_model.merge_and_unload()
tuned_model.save_pretrained(tuned_model_path)

#### 生成摘要

使用调整过的模型生成摘要。加载教程数据集的验证集。

In [None]:
dataset = datasets.load_dataset(
    "xsum", split="validation", cache_dir=data_path, trust_remote_code=True
)

Sample一篇文章进行总结。

In [None]:
sample = dataset.select([random.randint(0, len(dataset) - 1)])
document = sample["document"][0]
reference_summary = sample["summary"][0]

根据[Gemma文档](https://ai.google.dev/gemma/docs/formatting)准备相关的提示。

In [None]:
messages = [
    {
        "role": "user",
        "content": f"Summarize the following ARTICLE in one sentence.\\n###ARTICLE: {document}",
    },
]

prompt = tokenizer.apply_chat_template(
    messages, tokenize=False, add_generation_prompt=True
)

启动文本生成管道以生成摘要。

In [None]:
tuned_gemma_pipeline = pipeline(
    "text-generation", model=tuned_model, tokenizer=tokenizer, max_new_tokens=50
)

生成相关摘要。

In [None]:
generated_tuned_gemma_summary = tuned_gemma_pipeline(
    prompt, do_sample=True, temperature=0.1, add_special_tokens=True
)[0]["generated_text"][len(prompt) :]

打印生成的摘要。

In [None]:
print(f"Reference summary: {reference_summary}")
print("-" * 100)
print(f"Tuned generated summary: {generated_tuned_gemma_summary}")

评估模型

作为一个额外的步骤，您可以评估调整过的模型。要评估模型，您需要在定性和定量上比较模型。

在一个案例中，您会比较基础Gemma模型生成的响应和调整过的Gemma模型生成的响应。在另一个案例中，您会计算ROUGE指标及其改进，这会让您了解调整过的模型相对于基础模型正确地复现参考摘要的能力。

通过比较生成的摘要来评估模型。

In [None]:
gemma_pipeline = pipeline(
    "text-generation", model=base_model, tokenizer=tokenizer, max_new_tokens=50
)

generated_gemma_summary = gemma_pipeline(
    prompt, do_sample=True, temperature=0.1, add_special_tokens=True
)[0]["generated_text"][len(prompt) :]

print(f"Reference summary: {reference_summary}")
print("-" * 100)
print(f"Base generated summary: {generated_gemma_summary}")
print("-" * 100)
print(f"Tuned generated summary: {generated_tuned_gemma_summary}")

通过计算ROUGE指标及其改进来评估模型。

In [None]:
rouge = evaluate.load("rouge")

In [None]:
gemma_results = rouge.compute(
    predictions=[generated_gemma_summary],
    references=[reference_summary],
    rouge_types=["rouge1", "rouge2", "rougeL", "rougeLsum"],
    use_aggregator=True,
    use_stemmer=True,
)

In [None]:
tuned_gemma_results = rouge.compute(
    predictions=[generated_tuned_gemma_summary],
    references=[reference_summary],
    rouge_types=["rouge1", "rouge2", "rougeL", "rougeLsum"],
    use_aggregator=True,
    use_stemmer=True,
)

In [None]:
improvements = {}
for rouge_metric, gemma_rouge in gemma_results.items():
    tuned_gemma_rouge = tuned_gemma_results[rouge_metric]
    if gemma_rouge != 0:
        improvement = ((tuned_gemma_rouge - gemma_rouge) / gemma_rouge) * 100
    else:
        improvement = None
    improvements[rouge_metric] = improvement

print("Base Gemma vs Tuned Gemma - ROUGE improvements")
for rouge_metric, improvement in improvements.items():
    print(f"{rouge_metric}: {improvement:.3f}%")

使用Ray Data进行批量预测

要使用Ray Data在Vertex AI上生成经过调整的模型的批量预测，您需要一个数据集来生成预测以及存储在云存储桶中的经过调整的模型。

接下来，您可以利用Ray Data，它提供了一个便于使用的API来进行离线批量推断。

上传调整后的模型

上传调整后的模型至云存储。

In [None]:
! gsutil -q cp -r {models_path} {MODELS_PATH}

准备批量预测训练脚本

准备`src/batch_predict.py`文件，这是执行Ray批量预测作业的Python脚本。

In [None]:
batch_predictor_script = """
# General
import argparse
import os
from huggingface_hub import login

# Serving
import datasets
import transformers
import numpy as np
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from transformers.pipelines import pipeline

# Ray
import ray

# Settings
datasets.disable_progress_bar()

# Variables
base_model_path = "google/gemma-2b-it"


# helpers
def get_args():
    parser = argparse.ArgumentParser(description='Batch prediction with Gemma on Ray on Vertex AI')
    parser.add_argument('--tuned_model_path', type=str, help='path of adapter model')
    parser.add_argument('--num_gpus', type=int, default=1, help='number of gpus')
    parser.add_argument('--batch_size', type=int, default=8, help='batch size')
    parser.add_argument('--sample_size', type=int, default=20, help='number of articles to summarize')
    parser.add_argument('--temperature', type=float, default=0.1, help='temperature for generating summaries')
    parser.add_argument('--max_new_tokens', type=int, default=50, help='max new token for generating summaries')
    parser.add_argument('--output_dir', type=str, help='output directory for predictions')
    args = parser.parse_args()
    return args

def main():

    # Set configuration
    args = get_args()
    config = vars(args)

    # Setting training
    login(token=os.environ['HF_TOKEN'], add_to_git_credential=True)
    transformers.set_seed(8)

    # Load dataset
    dataset_id = "xsum"
    sample_size = config["sample_size"]
    input_data = datasets.load_dataset(dataset_id, split="validation", trust_remote_code=True)
    input_data = input_data.select(range(sample_size))
    ray_input_data = ray.data.from_huggingface(input_data)

    # Generate predictions

    class Summarizer:

      def __init__(self):
          self.tokenizer = AutoTokenizer.from_pretrained(base_model_path)
          self.tokenizer.padding_side = "right"

          self.tuned_model = AutoModelForCausalLM.from_pretrained(config["tuned_model_path"],
                                                                  device_map='auto',
                                                                  torch_dtype=torch.float16)

          self.pipeline = pipeline("text-generation",
                                    model=self.tuned_model,
                                    tokenizer=self.tokenizer,
                                    max_new_tokens=config["max_new_tokens"])

      def __call__(self, batch: np.ndarray):

          # prepare dataset
          messages = [{"role": "user",
                      "content": f"Summarize the following ARTICLE in one sentence.\\n###ARTICLE: {document}"}
                      for document in batch["document"]]

          batch['prompt'] = [self.tokenizer.apply_chat_template([message], tokenize=False, add_generation_prompt=True)
                             for message in messages]

          # generate
          batch['generated_summary'] = [self.pipeline(prompt,
                                                    do_sample=True,
                                                    temperature=config["temperature"],
                                                    add_special_tokens=True)[0]["generated_text"][len(prompt):]
                                                    for prompt in batch['prompt']]

          return batch


    predictions_data = ray_input_data.map_batches(
        Summarizer,
        concurrency=config["num_gpus"],
        num_gpus=1,
        batch_size=config['batch_size'])

    # Store resulting predictions
    predictions_data.write_json(config["output_dir"], try_create_dir=True)


if __name__ == "__main__":
    main()
"""

with open(src_path / "batch_predictor.py", "w") as f:
    f.write(batch_predictor_script)
f.close()

使用Ray Jobs API提交一个Ray作业

通过公共Ray仪表板地址，使用Ray Jobs API将脚本提交到Vertex AI集群上的Ray。

初始化客户端以提交作业。

In [None]:
client = JobSubmissionClient(
    address="vertex_ray://{}".format(ray_cluster.dashboard_address)
)

设置一些作业配置，包括模型路径、作业ID、预测入口等。

In [None]:
batch_predict_id = "".join(random.choices(string.ascii_lowercase + string.digits, k=4))
batch_predict_submission_id = f"ray-job-{batch_predict_id}"
tuned_model_uri_path = str(MODELS_PATH / "xsum-tuned-gemma-it").replace(
    "gs://", "/gcs/"
)
batch_predict_entrypoint = f"python3 batch_predictor.py --tuned_model_path={tuned_model_uri_path} --num_gpus=2 --output_dir={PREDICTIONS_FOLDER_URI}"
batch_predict_runtime_env = {
    "working_dir": str(src_path),
    "env_vars": {"HF_TOKEN": HF_TOKEN},
}

提交工作。

In [None]:
batch_predict_job_id = client.submit_job(
    submission_id=batch_predict_submission_id,
    entrypoint=batch_predict_entrypoint,
    runtime_env=batch_predict_runtime_env,
)

使用`monitor_job`辅助函数检查工作的状态。

In [None]:
batch_predict_job_status = monitor_job(client, batch_predict_job_id)

获取生成的摘要

使用Pandas DataFrame 快速查看生成的摘要。

In [None]:
predictions_df = read_json_files(prefix="predictions/", bucket_name=BUCKET_NAME)
predictions_df = predictions_df[
    ["id", "document", "prompt", "summary", "generated_summary"]
]
predictions_df.head()

清理

要清理本项目中使用的所有谷歌云资源，您可以删除用于教程的[谷歌云项目](https://cloud.google.com/resource-manager/docs/creating-managing-projects#shutting_down_projects)。

否则，您可以删除在本教程中创建的各个资源。

In [None]:
import os

delete_tensorboards = False
delete_experiments = False
delete_ray_clusters = False
delete_image_repo = False
delete_bucket = False
delete_tutorial = False

# Delete tensorboard
if delete_tensorboards:
    tensorboard_list = vertex_ai.Tensorboard.list()
    for tensorboard in tensorboard_list:
        tensorboard.delete()

# Delete experiments
if delete_experiments:
    experiment_list = vertex_ai.Experiment.list()
    for experiment in experiment_list:
        experiment.delete()

# Delete ray on vertex cluster
if delete_ray_clusters:
    ray_cluster_list = vertex_ray.list_ray_clusters()
    for ray_cluster in ray_cluster_list:
        vertex_ray.delete_ray_cluster(ray_cluster.cluster_resource_name)

if delete_image_repo:
    ! gcloud artifacts repositories delete {REPO_NAME}

# Delete Cloud Storage objects that were created
if delete_bucket:
    ! gsutil -q -m rm -r {BUCKET_URI}

# Delete tutorial folder
if delete_tutorial:
    shutil.rmtree(tutorial_path)