In [None]:
# Copyright 2023 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Vertex AI 模型花园 Keras 稳定扩散

<table align="left">
  <td>
    <a href="https://colab.research.google.com/github/GoogleCloudPlatform/vertex-ai-samples/blob/main/notebooks/community/model_garden/model_garden_keras_stable_diffusion.ipynb">
      <img src="https://cloud.google.com/ml-engine/images/colab-logo-32px.png" alt="Colab logo"> 在 Colab 中运行
    </a>
  </td>

  <td>
    <a href="https://github.com/GoogleCloudPlatform/vertex-ai-samples/blob/main/notebooks/community/model_garden/model_garden_keras_stable_diffusion.ipynb">
      <img src="https://cloud.google.com/ml-engine/images/github-logo-32px.png" alt="GitHub logo">
      在 GitHub 上查看
    </a>
  </td>
  <td>
    <a href="https://console.cloud.google.com/vertex-ai/notebooks/deploy-notebook?download_url=https://raw.githubusercontent.com/GoogleCloudPlatform/vertex-ai-samples/main/notebooks/community/model_garden/model_garden_keras_stable_diffusion.ipynb">
      <img src="https://lh3.googleusercontent.com/UiNooY4LUgW_oTvpsNhPpQzsstV5W8F7rYgxgGBD85cWJoLmrOzhVs_ksK_vgx40SHs7jCqkTkCk=e14-rj-sc0xffffff-h130-w32" alt="Vertex AI logo">
 在 Vertex AI 工作台中打开
    </a>
  </td>
</table>

**_注意_**: 该笔记本已在以下环境中进行了测试：

* Python 版本 = 3.9

您可以直接在 colab 中打开这个笔记本，或者创建 [google managed](https://cloud.google.com/vertex-ai/docs/workbench/managed/create-instance) 或 [user managed](https://cloud.google.com/vertex-ai/docs/workbench/user-managed/create-new) 的工作台实例。

## 概述

本笔记本演示了如何在Vertex AI Model Garden中使用[Keras Stable Diffusion](https://keras.io/api/keras_cv/models/stable_diffusion)。

### 目的

- 在预训练或定制模型上进行本地推理
- 在谷歌云Vertex AI上部署预训练或定制模型
- 在谷歌云Vertex AI上微调模型

### 数据集

我们使用[Pokémon BLIP字幕数据集](https://huggingface.co/datasets/lambdalabs/pokemon-blip-captions)来展示如何微调稳定的扩散模型。但是，我们将使用一个略有不同的版本，该版本是从原始数据集中衍生出来的，以更好地配合`tf.data`。有关更多详细信息，请参考[文档](https://huggingface.co/datasets/sayakpaul/pokemon-blip-original-version)。

成本

本教程使用Google Cloud的计费组件：

* Vertex AI
* Cloud Storage

了解[Vertex AI价格](https://cloud.google.com/vertex-ai/pricing)和 [Cloud Storage价格](https://cloud.google.com/storage/pricing)，并使用[Pricing计算器](https://cloud.google.com/products/calculator/)根据您的预期使用量生成成本估算。

## 安装

安装以下所需的软件包以执行此笔记本。

In [None]:
if "google.colab" in str(get_ipython()):
    # Configs for colab notebooks.
    ! pip3 install --upgrade --quiet google-cloud-aiplatform

    # Automatically restart kernel after installs
    import IPython

    app = IPython.Application.instance()
    app.kernel.do_shutdown(True)

    from google.colab import auth as google_auth

    google_auth.authenticate_user()

# Configs for all notebooks.
! pip3 install --quiet keras-cv==0.4.1

##开始之前

###设置您的Google Cloud项目

**无论您使用哪种笔记本环境，都需要执行以下步骤。**

1. [选择或创建Google Cloud项目](https://console.cloud.google.com/cloud-resource-manager)。当您首次创建帐户时，您将获得$300的免费信用额度，可用于计算/存储成本。

2. [确保您的项目已启用计费](https://cloud.google.com/billing/docs/how-to/modify-project)。

3. [启用Vertex AI API和Compute Engine API](https://console.cloud.google.com/flows/enableapi?apiid=aiplatform.googleapis.com,compute_component)。

4. 如果您在本地运行此笔记本，您需要安装[Cloud SDK](https://cloud.google.com/sdk)。

5. [创建一个服务帐号](https://cloud.google.com/iam/docs/service-accounts-create#iam-service-accounts-create-console)，为部署优化模型到Vertex AI端点分配`Vertex AI用户`和`存储对象管理员`权限。

6. 在下面的单元格中输入您的项目ID。然后运行该单元格，以确保Cloud SDK在本笔记本中的所有命令中使用正确的项目。

**注意**：Jupyter以`!`前缀运行带有shell命令行，它会将以`$`前缀的Python变量插入这些命令中。

### 设置您的项目、区域和存储桶

**如果您不知道您的项目ID**，请尝试以下操作：
* 运行 `gcloud config list`。
* 运行 `gcloud projects list`。
* 查看支持页面：[查找项目ID](https://support.google.com/googleapi/answer/7014113)

您可以更改 Vertex AI 使用的 `REGION` 变量。了解更多关于 [Vertex AI 区域](https://cloud.google.com/vertex-ai/docs/general/locations)。

您可以创建一个存储桶来存储中间工件，例如数据集、训练模型等。

In [None]:
# The project and bucket are for experiments below.
PROJECT_ID = ""  # @param {type:"string"}

! gcloud config set project $PROJECT_ID

# The form for BUCKET_URI is gs://<bucket-name>.
BUCKET_URI = ""  # @param {type:"string"}
REGION = "us-central1"  # @param {type: "string"}

import os

STAGING_BUCKET = os.path.join(BUCKET_URI, "temporal")
EXPERIMENT_BUCKET = os.path.join(BUCKET_URI, "keras")
DATA_BUCKET = os.path.join(EXPERIMENT_BUCKET, "data")
MODEL_BUCKET = os.path.join(EXPERIMENT_BUCKET, "model")

### 初始化 Python 的 Vertex AI SDK

为您的项目初始化 Python 的 Vertex AI SDK。

In [None]:
from google.cloud import aiplatform

aiplatform.init(project=PROJECT_ID, location=REGION, staging_bucket=STAGING_BUCKET)

定义常量和常用函数

In [None]:
import base64
import os
from datetime import datetime
from io import BytesIO

import matplotlib.pyplot as plt
from google.cloud import storage
from PIL import Image

GCS_URI_PREFIX = "gs://"

# Training constants.
TRAINING_JOB_PREFIX = "train"
TRAIN_CONTAINER_URI = (
    "us-docker.pkg.dev/vertex-ai/vertex-vision-model-garden-dockers/keras-train:latest"
)
TRAIN_MACHINE_TYPE = "a2-highgpu-1g"
TRAIN_ACCELERATOR_TYPE = "NVIDIA_TESLA_A100"
TRAIN_NUM_GPU = 1
RESOLUTION = 512

# Prediction constants.
PREDICTION_CONTAINER_URI = (
    "us-docker.pkg.dev/vertex-ai/vertex-vision-model-garden-dockers/keras-serve:latest"
)
PREDICTION_ACCELERATOR_TYPE = "NVIDIA_TESLA_V100"
PREDICTION_MACHINE_TYPE = "n1-standard-8"
DEPLOY_JOB_PREFIX = "deploy"


def get_job_name_with_datetime(prefix: str):
    return prefix + datetime.now().strftime("_%Y%m%d_%H%M%S")


def download_data_to_gcs(tar_filepath, gcs_bucket):
    filename_with_ext = os.path.basename(tar_filepath)
    filename_without_ext = filename_with_ext.replace(".tar.gz", "")
    print("Download files from: ", tar_filepath)
    ! wget $tar_filepath -O $filename_with_ext
    ! mkdir -p $filename_without_ext
    ! tar -xvf $filename_with_ext -C .

    ! gsutil -m cp -r $filename_without_ext $gcs_bucket/
    gcs_path = os.path.join(gcs_bucket, filename_without_ext)
    print("Upload files to: ", gcs_path)
    return gcs_path


def download_gcs_file_to_local(gcs_uri: str, local_path: str):
    """Download a gcs file to a local path.

    Args:
      gcs_uri: A string of file path on GCS.
      local_path: A string of local file path.
    """
    if not gcs_uri.startswith(GCS_URI_PREFIX):
        raise ValueError(f"{gcs_uri} is not a GCS path starting with {GCS_URI_PREFIX}.")
    client = storage.Client()
    os.makedirs(os.path.dirname(local_path), exist_ok=True)
    with open(local_path, "wb") as f:
        client.download_blob_to_file(gcs_uri, f)


def deploy_model(model_path, service_account):

    deploy_model_name = get_job_name_with_datetime(DEPLOY_JOB_PREFIX)
    print("The deployed job name is: ", deploy_model_name)
    serving_env = {
        "MODEL_ID": "keras-stable-diffusion-v1-4-001",
        "MODEL_PATH": f"{model_path}",
        "IMAGE_WIDTH": f"{RESOLUTION}",
        "IMAGE_HEIGHT": f"{RESOLUTION}",
        "DEPLOY_SOURCE": "notebook",
    }

    endpoint = aiplatform.Endpoint.create(display_name=f"{deploy_model_name}-endpoint")

    model = aiplatform.Model.upload(
        display_name=deploy_model_name,
        serving_container_image_uri=PREDICTION_CONTAINER_URI,
        serving_container_ports=[8080],
        serving_container_predict_route="/predict",
        serving_container_health_route="/ping",
        serving_container_environment_variables=serving_env,
    )
    model.deploy(
        endpoint=endpoint,
        machine_type=PREDICTION_MACHINE_TYPE,
        accelerator_type=PREDICTION_ACCELERATOR_TYPE,
        accelerator_count=1,
        min_replica_count=1,
        max_replica_count=1,
        deploy_request_timeout=1800,
        service_account=service_account,
    )
    return model, endpoint


def base64_to_image(image_str):
    image = Image.open(BytesIO(base64.b64decode(image_str)))
    return image


def display_image(image):
    _ = plt.figure(figsize=(20, 15))
    plt.grid(False)
    plt.imshow(image)


def display_image_grid(imgs, rows=2, cols=2):
    w, h = imgs[0].size
    grid = Image.new("RGB", size=(cols * w, rows * h))
    for i, img in enumerate(imgs):
        grid.paste(img, box=(i % cols * w, i // cols * h))
    return grid

运行推断

本节展示了如何使用Keras Stable Diffusion模型进行推断。

1. 本地运行推断
2. 使用serving dockers运行推断

您可以使用Keras团队提供的预训练模型，也可以使用自己微调过的模型进行推断。

In [None]:
# Sets the model_path to empty to load the pre-trained model from Keras team.
# Sets the model_path to a gcs uri to load the finetuned models.
model_path = ""  # @param {type:"string"}

### 本地运行推断
使用GPU，本地推断可以在几秒钟内完成。

首先加载模型。

In [None]:
from keras_cv.models import StableDiffusion

model = StableDiffusion(img_height=RESOLUTION, img_width=RESOLUTION, jit_compile=True)
if model_path.startswith(GCS_URI_PREFIX):
    local_model_path = "/tmp/saved_model.h5"
    download_gcs_file_to_local(model_path, local_model_path)
    model.diffusion_model.load_weights(local_model_path)
elif model_path:
    model.diffusion_model.load_weights(model_path)

然后进行推理。

In [None]:
batch_size = 1
img = model.text_to_image(
    prompt="a squirrel in Picasso style",
    batch_size=batch_size,  # How many images to generate at once
    num_steps=25,  # Number of iterations (controls image quality)
    seed=123,  # A fixed seed guarantees the same prompt always generates the same image
)
for i in range(batch_size):
    display_image(img[i])

### 使用docker部署模型
当使用docker部署模型时，我们将在Google Cloud Vertex AI中部署模型。默认设置将使用1个V100 GPU进行部署。

如果您还没有，请为使用docker进行部署创建一个服务帐户。

模型部署大约需要~10分钟才能完成。

In [None]:
# The service account looks like:
# '<account_name>@<project>.iam.gserviceaccount.com'
# Please go to https://cloud.google.com/iam/docs/service-accounts-create#iam-service-accounts-create-console
# and create service account with `Vertex AI User` and `Storage Object Admin` roles.
SERVICE_ACCOUNT = ""  # @param {type:"string"}

model, endpoint = deploy_model(
    model_path=model_path,
    service_account=SERVICE_ACCOUNT,
)

endpoint_id = endpoint.name
print("endpoint id is: ", endpoint_id)

一旦部署完成，您可以发送一批文本提示到端点以生成图片。

请注意，在进行新部署的第一个请求的推断过程中，需要更多的时间来处理，大约需要在一个V100 GPU上花费45秒。在一个V100 GPU上，每张图片的后续请求的推断时间大约为12秒。

In [None]:
# # Loads an existing endpoint as below.
# endpoint_id = <An Existing Endpoint ID>
# aip_endpoint_name = (
#     f"projects/{PROJECT_ID}/locations/{REGION}/endpoints/{endpoint_id}"
# )
# endpoint = aiplatform.Endpoint(aip_endpoint_name)

instances = [
    {"prompt": "a squirrel in Picasso style"},
    {"prompt": "a dog in Picasso style"},
    {"prompt": "a cat in Picasso style"},
    {"prompt": "a deer in Picasso style"},
]

parameters = {
    "batch_size": 1,  # How many images to generate at once
    "num_steps": 25,  # Number of iterations (controls image quality)
    "seed": 123,  # A fixed seed guarantees the same prompt always generates the same image
}
response = endpoint.predict(instances=instances, parameters=parameters)
# prediction['predicted_image'] will contains the prediction images in a batch.
# The batch size in this example is 1, and the visualization only parses the
# first predicted image.
images = [
    base64_to_image(prediction["predicted_image"][0])
    for prediction in response.predictions
]
display_image_grid(images, rows=2, cols=2)

清理

In [None]:
# Undeploys models and deletes endpoints.
endpoint.delete(force=True)
# Deletes models.
model.delete()

微调模型
本节展示了如何使用训练docker来微调Keras Stable扩散模型。

如果您想使用微调后的模型，请转到“运行推理”部分。

### 下载数据
我们将数据下载到 GCS 存储中，用于训练 docker 实验。

In [None]:
# Skips this step if you have already downloaded the dataset.
download_data_to_gcs(
    "https://huggingface.co/datasets/sayakpaul/pokemon-blip-original-version/resolve/main/pokemon_dataset.tar.gz",
    DATA_BUCKET,
)

### 开始训练工作
我们使用1个A100 GPU在1个时代中对512*512稳定扩散模型进行微调，使用默认设置大约需要15分钟完成。

In [None]:
data_csv = os.path.join(DATA_BUCKET, "pokemon_dataset/data.csv")
epochs = 1

train_job_name = get_job_name_with_datetime(TRAINING_JOB_PREFIX)
model_dir = os.path.join(MODEL_BUCKET, train_job_name)
worker_pool_specs = [
    {
        "machine_spec": {
            "machine_type": TRAIN_MACHINE_TYPE,
            "accelerator_type": TRAIN_ACCELERATOR_TYPE,
            "accelerator_count": TRAIN_NUM_GPU,
        },
        "replica_count": 1,
        "disk_spec": {
            "boot_disk_type": "pd-ssd",
            "boot_disk_size_gb": 500,
        },
        "container_spec": {
            "image_uri": TRAIN_CONTAINER_URI,
            "command": [],
            "env": [
                {
                    "name": "RESOLUTION",
                    "value": f"{RESOLUTION}",
                },
            ],
            "args": [
                f"--epochs={epochs}",
                f"--input_csv_path={data_csv}",
                f"--output_model_dir={model_dir}",
            ],
        },
    }
]

train_job = aiplatform.CustomJob(
    display_name=train_job_name,
    project=PROJECT_ID,
    worker_pool_specs=worker_pool_specs,
    staging_bucket=STAGING_BUCKET,
)

train_job.run()

model_path = os.path.join(model_dir, "saved_model.h5")
print("The trained model is saved as: ", model_path)

训练结束后，您可以使用 `model_path`，然后转到上面的 `运行推断` 部分来进行预测。

### 清理

In [None]:
train_job.delete()

## 参考资料

- [微调稳定扩散](https://keras.io/examples/generative/finetune_stable_diffusion/)
- [StableDiffusion图像生成模型](https://keras.io/api/keras_cv/models/stable_diffusion/)
- [在KerasCV中使用稳定扩散进行高性能图像生成](https://keras.io/guides/keras_cv/generate_images_with_stable_diffusion/)