In [None]:
# Copyright 2023 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Vertex AI LLM和流式预测

<table align="left">
  <td style="text-align: center">
    <a href="https://colab.research.google.com/github/GoogleCloudPlatform/vertex-ai-samples/blob/main/notebooks/official//prediction/llm_streaming_prediction.ipynb">
      <img src="https://cloud.google.com/ml-engine/images/colab-logo-32px.png" alt="Google Colaboratory logo"><br> 在Colab中打开
    </a>
  </td>
  <td style="text-align: center">
    <a href="https://console.cloud.google.com/vertex-ai/colab/import/https:%2F%2Fraw.githubusercontent.com%2FGoogleCloudPlatform%2Fvertex-ai-samples%2Fmain%2Fnotebooks%2Fofficial%2Fprediction%2Fllm_streaming_prediction.ipynb">
      <img width="32px" src="https://cloud.google.com/ml-engine/images/colab-enterprise-logo-32px.png" alt="Google Cloud Colab Enterprise logo"><br> 在Colab Enterprise中打开
    </a>
  </td>    
  <td style="text-align: center">
    <a href="https://console.cloud.google.com/vertex-ai/workbench/deploy-notebook?download_url=https://raw.githubusercontent.com/GoogleCloudPlatform/vertex-ai-samples/main/notebooks/official/prediction/llm_streaming_prediction.ipynb">
      <img src="https://lh3.googleusercontent.com/UiNooY4LUgW_oTvpsNhPpQzsstV5W8F7rYgxgGBD85cWJoLmrOzhVs_ksK_vgx40SHs7jCqkTkCk=e14-rj-sc0xffffff-h130-w32" alt="Vertex AI logo"><br> 在Workbench中打开
    </a>
  </td>
  <td style="text-align: center">
    <a href="https://github.com/GoogleCloudPlatform/vertex-ai-samples/blob/main/notebooks/official/prediction/llm_streaming_prediction.ipynb">
      <img src="https://cloud.google.com/ml-engine/images/github-logo-32px.png" alt="GitHub logo"><br> 在GitHub上查看
    </a>
  </td>
</table>

## 概述

本教程演示了如何使用Vertex AI LLM来对大型语言模型进行流式预测。

了解更多关于[Vertex AI语言模型](https://cloud.google.com/python/docs/reference/aiplatform/latest/vertexai.language_models.TextGenerationModel#vertexai_language_models_TextGenerationModel_predict_streaming)。

### 目标

在本教程中，您将学习如何使用Vertex AI LLM来下载预训练的LLM模型，进行预测并微调模型。

本教程使用以下Google Cloud ML服务：

- `Vertex AI LLM`
- `Vertex AI Prediction`

执行的步骤包括：

- 加载预训练的文本生成模型。
- 进行非流式预测。
- 加载支持流式的预训练文本生成模型。
- 进行流式预测。
- 加载预训练的聊天模型。
- 进行本地交互式聊天会话。
- 使用文本生成模型进行批量预测。
- 使用文本嵌入模型进行批量预测。

### 模型

本教程使用的预训练模型取自Vertex AI LLM存储库。使用的模型包括：

- 文本-美洲野牛
- 聊天-美洲野牛
- 文本嵌入-壁虎

### 成本
本教程使用谷歌云的计费组件：

- Vertex AI
- 云存储

了解[Vertex AI价格](https://cloud.google.com/vertex-ai/pricing)和[云存储价格](https://cloud.google.com/storage/pricing)，并使用[定价计算器](https://cloud.google.com/products/calculator/)根据您的预期使用情况生成成本估算。

开始吧

### 安装Vertex AI SDK for Python和其他必需的包

In [None]:
! pip3 install --upgrade --quiet google-cloud-aiplatform \
                                pandas

### 重新启动运行时（仅适用于Colab）

要使用新安装的软件包，您必须重新启动Google Colab上的运行时。

In [None]:
import sys

if "google.colab" in sys.modules:

    import IPython

    app = IPython.Application.instance()
    app.kernel.do_shutdown(True)

<div class="alert alert-block alert-warning">
<b>⚠️ 内核将重新启动。在继续下一步之前请等待完成。 ⚠️</b>
</div>

### 在谷歌Colab中验证您的笔记本环境

在谷歌Colab上验证您的环境。

In [None]:
import sys

if "google.colab" in sys.modules:

    from google.colab import auth

    auth.authenticate_user()

### 设置Google Cloud项目信息并初始化Python的Vertex AI SDK

要开始使用Vertex AI，您必须拥有现有的Google Cloud项目并[启用Vertex AI API](https://console.cloud.google.com/flows/enableapi?apiid=aiplatform.googleapis.com)。了解更多关于[设置项目和开发环境](https://cloud.google.com/vertex-ai/docs/start/cloud-environment)的信息。

In [None]:
PROJECT_ID = "[your-project-id]"  # @param {type:"string"}
LOCATION = "us-central1"  # @param {type:"string"}

#### 创建一个云存储桶

创建一个存储桶，用于存储中间产物，如数据集。

In [None]:
BUCKET_URI = f"gs://your-bucket-name-{PROJECT_ID}-unique"  # @param {type:"string"}

如果您的存储桶尚不存在：运行以下单元格以创建您的云存储存储桶。

In [None]:
! gsutil mb -l {LOCATION} -p {PROJECT_ID} {BUCKET_URI}

设置Google Cloud项目

In [None]:
import vertexai

vertexai.init(project=PROJECT_ID, location=LOCATION, staging_bucket=BUCKET_URI)

导入库

In [None]:
from vertexai.preview.language_models import TextGenerationModel

首先，您从Vertex AI LLM中加载文本-大型语言模型。加载完成后，您可以进行预测。

In [None]:
model = TextGenerationModel.from_pretrained("google/text-bison@001")

print(
    model.predict(
        "What is the best recipe for banana bread? Recipe:",
        # "Brainstorm some ideas combining VR and fitness:",
        # Optional:
        # max_output_tokens=128,
        # temperature=0,
        # top_p=1,
        # top_k=5,
    )
)

流式预测

接下来，您从Vertex AI LLM中加载支持流式预测的文本-野牛模型的一个版本。加载完成后，您进行预测。请注意，每次迭代该模型都会在预测响应中返回下一个连续的输出。

In [None]:
import datetime

text_generation_model = TextGenerationModel.from_pretrained("text-bison")

print("Start: ", datetime.datetime.now())
for response in text_generation_model.predict_streaming(
    prompt="Count to 100", max_output_tokens=1000
):
    print(datetime.datetime.now())
    print(response)
print("End: ", datetime.datetime.now())

聊天

接下来，您从Vertex AI LLM加载一个chat-bison模型的版本。加载完成后，您可以进行互动式聊天会话。

In [None]:
from vertexai.language_models import ChatModel, InputOutputTextPair

chat_model2 = ChatModel.from_pretrained("google/chat-bison@001")

chat2 = chat_model2.start_chat(
    # Optional:
    context="My name is Ned. You are my personal assistant. My favorite movies are Lord of the Rings and Hobbit.",
    examples=[
        InputOutputTextPair(
            input_text="Who do you work for?",
            output_text="I work for Ned.",
        ),
        InputOutputTextPair(
            input_text="What do I like?",
            output_text="Ned likes watching movies.",
        ),
    ],
)

print(chat2.send_message("Are my favorite movies based on a book series?"))

In [None]:
print(chat2.send_message("When where these books published?"))

文本嵌入

接下来，您从Vertex AI LLM加载了文本嵌入蜥蜴模型的一个版本，然后进行嵌入请求。

In [None]:
from vertexai.language_models import TextEmbeddingModel

model = TextEmbeddingModel.from_pretrained("google/textembedding-gecko@001")
embeddings = model.get_embeddings(["What is life?"])
for embedding in embeddings:
    vector = embedding.values
    print(len(vector))

## 批量预测

### 文本野牛模型

现在，您将使用预训练的文本野牛模型进行批量预测任务。

In [None]:
dataset = "gs://cloud-samples-data/vertex-ai/prediction/llm/test_table.jsonl"
destination_uri_prefix = f"{BUCKET_URI}/text-bison@001_"
! gsutil cp -r gs://cloud-samples-data/vertex-ai/prediction/llm/text-bison@001_/ {destination_uri_prefix}


from vertexai.language_models import TextGenerationModel

text_generation_model = TextGenerationModel.from_pretrained("text-bison")
batch_job_1 = text_generation_model.batch_predict(
    dataset=dataset,
    destination_uri_prefix=destination_uri_prefix,
    model_parameters={},
)

接下来，您可以使用预训练的text-embedding-gecko模型进行批处理预测任务。

In [None]:
dataset = "gs://cloud-samples-data/vertex-ai/prediction/llm/embedding_input.jsonl"

destination_uri_prefix = f"{BUCKET_URI}/textembedding-gecko@001_"

from vertexai.preview.language_models import TextEmbeddingModel

text_embedding_model = TextEmbeddingModel.from_pretrained("textembedding-gecko@001")
batch_job_2 = text_embedding_model.batch_predict(
    dataset=dataset,
    destination_uri_prefix=destination_uri_prefix,
    # Optional:
    model_parameters={},
)

## 调整

现在，您可以使用预训练的文本-生物模型进行微调，并使用微调后的模型进行预测。您的输入数据以JSONL格式存储在云存储位置中。

In [None]:
model3 = TextGenerationModel.from_pretrained("google/text-bison@001")

model3.list_tuned_model_names()

In [None]:
# Tuning model
! gsutil cp gs://cloud-samples-data/vertex-ai/prediction/llm/q_a_train_with_context.jsonl {BUCKET_URI}/q_a_train_with_context.jsonl

tuning_job = model3.tune_model(
    training_data=f"{BUCKET_URI}/q_a_train_with_context.jsonl",
    # Optional:
    train_steps=1,
    tuning_job_location="europe-west4",
    tuned_model_location="us-central1",
)

In [None]:
# Make a prediction with the fine tuned model
tuned_model = tuning_job.get_tuned_model()

print(tuned_model.predict("Tell me some ideas combining VR and fitness:"))

In [None]:
# List tuned model names
model3.list_tuned_model_names()

In [None]:
# Get tuned model
tuned_model4 = model3.get_tuned_model(
    tuned_model_name=model3.list_tuned_model_names()[0]
)

In [None]:
# Make a prediction with the fine tuned model
print(tuned_model4.predict("Brainstorm some ideas combining VR and fitness:"))

调整 pandas DataFrame

现在，您可以使用一个预训练的文本野牛模型进行微调，并使用微调后的模型进行预测。您的输入数据是内存中的 pandas DataFrame。

In [None]:
import pandas

training_data = pandas.DataFrame(
    data=[
        {"input_text": "Input 1", "output_text": "Output 1"},
        {"input_text": "Input 2", "output_text": "Output 2"},
        {"input_text": "Input 3", "output_text": "Output 3"},
        {"input_text": "Input 4", "output_text": "Output 4"},
        {"input_text": "Input 5", "output_text": "Output 5"},
        {"input_text": "Input 6", "output_text": "Output 6"},
        {"input_text": "Input 7", "output_text": "Output 7"},
        {"input_text": "Input 8", "output_text": "Output 8"},
        {"input_text": "Input 9", "output_text": "Output 9"},
        {"input_text": "Input 10", "output_text": "Output 10"},
    ]
)

training_data

In [None]:
# Tune pretrained model
model4 = TextGenerationModel.from_pretrained("google/text-bison@001")

tuning_job = model4.tune_model(
    training_data=training_data,
    # Optional:
    train_steps=10,
    tuning_job_location="europe-west4",
    tuned_model_location="us-central1",
)

In [None]:
# Make a prediction with the fine tuned model
tuned_model = tuning_job.get_tuned_model()

print(tuned_model.predict("Tell me some ideas combining VR and fitness:"))

清理

In [None]:
delete_bucket = False

# Delete job
batch_job_1.delete()
batch_job_2.delete()

if delete_bucket:
    ! gsutil -m rm -r $BUCKET_URI