In [None]:
# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Vertex AI 特征存储 特征视图 服务代理 教程

<table align="left">
  <td style="text-align: center">
    <a href="https://colab.research.google.com/github/GoogleCloudPlatform/vertex-ai-samples/blob/main/notebooks/official/feature_store/vertex_ai_feature_store_feature_view_service_agents.ipynb">
      <img src="https://cloud.google.com/ml-engine/images/colab-logo-32px.png" alt="Google Colaboratory logo"><br> 在 Colab 中打开
    </a>
  </td>
  <td style="text-align: center">
    <a href="https://console.cloud.google.com/vertex-ai/colab/import/https:%2F%2Fraw.githubusercontent.com%2FGoogleCloudPlatform%2Fvertex-ai-samples%2Fmain%2Fnotebooks%2Fofficial%2Ffeature_store%2Fvertex_ai_feature_store_feature_view_service_agents.ipynb">
      <img width="32px" src="https://lh3.googleusercontent.com/JmcxdQi-qOpctIvWKgPtrzZdJJK-J3sWE1RsfjZNwshCFgE_9fULcNpuXYTilIR2hjwN" alt="Google Cloud Colab Enterprise logo"><br> 在 Colab Enterprise 中打开
    </a>
  </td>    
  <td style="text-align: center">
    <a href="https://console.cloud.google.com/vertex-ai/workbench/deploy-notebook?download_url=https://raw.githubusercontent.com/GoogleCloudPlatform/vertex-ai-samples/main/notebooks/official/feature_store/vertex_ai_feature_store_feature_view_service_agents.ipynb">
      <img src="https://lh3.googleusercontent.com/UiNooY4LUgW_oTvpsNhPpQzsstV5W8F7rYgxgGBD85cWJoLmrOzhVs_ksK_vgx40SHs7jCqkTkCk=e14-rj-sc0xffffff-h130-w32" alt="Vertex AI logo"><br> 在 Workbench 中打开
    </a>
  </td>
  <td style="text-align: center">
    <a href="https://github.com/GoogleCloudPlatform/vertex-ai-samples/blob/main/notebooks/official/feature_store/vertex_ai_feature_store_feature_view_service_agents.ipynb">
      <img src="https://cloud.google.com/ml-engine/images/github-logo-32px.png" alt="GitHub logo"><br> 在 GitHub 上查看
    </a>
  </td>
</table>

## 概述
在本教程中，您将了解如何启用功能视图服务代理，并授予每个功能视图访问所使用的特定源数据的权限。

了解有关[Vertex AI特征存储](https://cloud.google.com/vertex-ai/docs/featurestore/latest/overview)的更多信息。

### 目标
在本教程中，您将学习如何在Vertex AI特征存储中为特征视图使用专用服务代理。通过为特征视图使用专用服务代理，您可以授予从BigQuery提取特征数据的访问权限，从而实现端对端的特征提供工作流程。

本教程使用以下谷歌云服务和资源：
* Vertex AI特征存储

您将执行以下步骤：
* 创建配置为使用专用服务账号的特征视图。
* 为每个特征视图创建一个服务账号。该服务账号用于从BigQuery同步数据。
* Get/List特征视图API返回自动创建的服务账号。用户需要调用`bq add-iam-policy-binding`命令，为服务账号授予`roles/bigquery.dataViewer`角色。

## 注意
这是一个预览版发布。通过使用该功能，您承认您已了解存在的问题，并且该预览版是根据预先GA服务条款提供的“原样”服务。

### 成本
本教程使用谷歌云的计费组件：
* Vertex AI
* BigQuery

了解关于[Vertex AI定价](https://cloud.google.com/vertex-ai/pricing)和[BigQuery定价](https://cloud.google.com/bigquery/pricing)，并使用[定价计算器](https://cloud.google.com/products/calculator/)，根据您的预计使用情况生成成本估算。

## 入门

### 安装Vertex AI SDK和其他必需的软件包

In [None]:
# Install the packages
! pip3 install --upgrade --quiet google-cloud-aiplatform\
                                 google-cloud-bigquery\
                                 db-dtypes

### 重新启动运行时（仅限Colab）

为了使用新安装的包，您必须重新启动Google Colab上的运行时。

In [None]:
import sys

if "google.colab" in sys.modules:

    import IPython

    app = IPython.Application.instance()
    app.kernel.do_shutdown(True)

<div class="alert alert-block alert-warning">
<b>⚠️内核将重新启动。请等待直到完成才继续执行下一步。⚠️</b>
</div>

### 在谷歌Colab上验证您的笔记本环境。

In [None]:
import sys

if "google.colab" in sys.modules:

    from google.colab import auth

    auth.authenticate_user()

### 设置谷歌云项目信息并初始化 Vertex AI SDK

要开始使用 Vertex AI，您必须拥有现有的谷歌云项目并[启用 Vertex AI API](https://console.cloud.google.com/flows/enableapi?apiid=aiplatform.googleapis.com)。了解更多关于[设置项目和开发环境](https://cloud.google.com/vertex-ai/docs/start/cloud-environment)的信息。

In [None]:
PROJECT_ID = "[your-project-id]"  # @param {type:"string"}
LOCATION = "us-central1"  # @param {type:"string"}


from google.cloud import aiplatform

aiplatform.init(project=PROJECT_ID, location=LOCATION)

### 导入库

In [None]:
from google.cloud.aiplatform_v1beta1 import (
    FeatureOnlineStoreAdminServiceClient, FeatureOnlineStoreServiceClient,
    FeatureRegistryServiceClient)
from google.cloud.aiplatform_v1beta1.types import feature as feature_pb2
from google.cloud.aiplatform_v1beta1.types import \
    feature_group as feature_group_pb2
from google.cloud.aiplatform_v1beta1.types import \
    feature_online_store as feature_online_store_pb2
from google.cloud.aiplatform_v1beta1.types import \
    feature_online_store_admin_service as \
    feature_online_store_admin_service_pb2
from google.cloud.aiplatform_v1beta1.types import \
    feature_online_store_service as feature_online_store_service_pb2
from google.cloud.aiplatform_v1beta1.types import \
    feature_registry_service as feature_registry_service_pb2
from google.cloud.aiplatform_v1beta1.types import \
    feature_view as feature_view_pb2
from google.cloud.aiplatform_v1beta1.types import \
    featurestore_service as featurestore_service_pb2
from google.cloud.aiplatform_v1beta1.types import io as io_pb2

初始化AI平台特征存储客户端

为您的项目初始化AI平台特征存储客户端。

In [None]:
API_ENDPOINT = f"{LOCATION}-aiplatform.googleapis.com"

admin_client = FeatureOnlineStoreAdminServiceClient(
    client_options={"api_endpoint": API_ENDPOINT}
)
registry_client = FeatureRegistryServiceClient(
    client_options={"api_endpoint": API_ENDPOINT}
)
data_client = FeatureOnlineStoreServiceClient(
    client_options={"api_endpoint": API_ENDPOINT}
)

为这个colab生成样本数据。

In [None]:
DATASET_ID = "test_data"+"_"+LOCATION.replace('-', '_')  # @param {type:"string"}
TABLE_ID = "tableA"  # @param {type:"string"}

!bq mk --dataset_id={DATASET_ID} --location={LOCATION}
!bq query --nouse_legacy_sql \
"CREATE TABLE {DATASET_ID}.{TABLE_ID} AS (" \
"SELECT * FROM UNNEST(ARRAY<STRUCT<entity_id STRING, feature_timestamp TIMESTAMP, feature1 INT64, feature2 INT64>>[" \
"('test', TIMESTAMP('2024-02-26 08:00:00 UTC'), 10, 20)," \
"('test', TIMESTAMP('2024-02-27 08:00:00 UTC'), 30, 40)," \
"('test', TIMESTAMP('2024-02-28 08:00:00 UTC'), 50, 60)]))"

创建一个特征组。

In [None]:
FEATURE_GROUP_ID = "product_features_unique"  # @param {type: "string"}

FEATURE_IDS = ["feature1", "feature2"]  # @param

DATA_SOURCE = f"bq://{PROJECT_ID}.{DATASET_ID}.{TABLE_ID}"

In [None]:
# Create a feature group
feature_group_config = feature_group_pb2.FeatureGroup(
    big_query=feature_group_pb2.FeatureGroup.BigQuery(
        big_query_source=io_pb2.BigQuerySource(input_uri=DATA_SOURCE),
        entity_id_columns=["entity_id"],
    ),
    description="This is a feature group for testing",
)

create_group_lro = registry_client.create_feature_group(
    feature_registry_service_pb2.CreateFeatureGroupRequest(
        parent=f"projects/{PROJECT_ID}/locations/{LOCATION}",
        feature_group_id=FEATURE_GROUP_ID,
        feature_group=feature_group_config,
    )
)
print(create_group_lro.result())

# Create features under the feature group
create_feature_lros = []
for id in FEATURE_IDS:
    create_feature_lros.append(
        registry_client.create_feature(
            featurestore_service_pb2.CreateFeatureRequest(
                parent=f"projects/{PROJECT_ID}/locations/{LOCATION}/featureGroups/{FEATURE_GROUP_ID}",
                feature_id=id,
                feature=feature_pb2.Feature(),
            )
        )
    )
for lro in create_feature_lros:
    print(lro.result())

验证已创建的特征组。

In [None]:
# Verify feature group is created.
registry_client.get_feature_group(
    name=f"projects/{PROJECT_ID}/locations/{LOCATION}/featureGroups/{FEATURE_GROUP_ID}"
)

验证创建的特性。

In [None]:
# Use list to verify the features are created.
registry_client.list_features(
    parent=f"projects/{PROJECT_ID}/locations/{LOCATION}/featureGroups/{FEATURE_GROUP_ID}"
)

创建特色在线商店

接下来，创建一个标准的在线商店。

In [None]:
FEATURE_ONLINE_STORE_ID = "test_fos_unique"  # @param {type:"string"}

In [None]:
online_store_config = feature_online_store_pb2.FeatureOnlineStore(
    bigtable=feature_online_store_pb2.FeatureOnlineStore.Bigtable(
        auto_scaling=feature_online_store_pb2.FeatureOnlineStore.Bigtable.AutoScaling(
            min_node_count=1, max_node_count=1, cpu_utilization_target=50
        )
    )
)

create_store_lro = admin_client.create_feature_online_store(
    feature_online_store_admin_service_pb2.CreateFeatureOnlineStoreRequest(
        parent=f"projects/{PROJECT_ID}/locations/{LOCATION}",
        feature_online_store_id=FEATURE_ONLINE_STORE_ID,
        feature_online_store=online_store_config,
    )
)

# Wait for the LRO to finish and get the LRO result.
# This operation might take up to 10 minutes to complete.
print(create_store_lro.result())

验证已创建的功能在线商店。

In [None]:
# Use list to verify the store is created.
admin_client.get_feature_online_store(
    name=f"projects/{PROJECT_ID}/locations/{LOCATION}/featureOnlineStores/{FEATURE_ONLINE_STORE_ID}"
)

创建特征视图

In [None]:
FEATURE_VIEW_ID = "test_fv"  # @param {type: "string"}

# A schedule will be created based on this cron setting.
CRON_SCHEDULE = "TZ=America/Los_Angeles 0 12 * * *"  # @param {type: "string"}

In [None]:
# Create Feature View
feature_registry_source = feature_view_pb2.FeatureView.FeatureRegistrySource(
    feature_groups=[
        feature_view_pb2.FeatureView.FeatureRegistrySource.FeatureGroup(
            feature_group_id=FEATURE_GROUP_ID, feature_ids=FEATURE_IDS
        )
    ]
)

# Set cron schedule.
sync_config = feature_view_pb2.FeatureView.SyncConfig(cron=CRON_SCHEDULE)

create_view_lro = admin_client.create_feature_view(
    parent=f"projects/{PROJECT_ID}/locations/{LOCATION}/featureOnlineStores/{FEATURE_ONLINE_STORE_ID}",
    feature_view_id="test_fv",
    feature_view=feature_view_pb2.FeatureView(
        feature_registry_source=feature_registry_source,
        sync_config=sync_config,
        service_agent_type=feature_view_pb2.FeatureView.ServiceAgentType.SERVICE_AGENT_TYPE_FEATURE_VIEW,
    ),
)

In [None]:
print(create_view_lro.result())

验证所创建的特征视图。

In [None]:
# Use list to verify the store is created.
admin_client.get_feature_view(
    name=f"projects/{PROJECT_ID}/locations/{LOCATION}/featureOnlineStores/{FEATURE_ONLINE_STORE_ID}/featureViews/{FEATURE_VIEW_ID}"
)

### 授予Feature View Service Agent对BigQuery的访问权限

注意：确保您完成本节中描述的步骤。否则，特征视图的同步将失败。

接下来，将BigQuery数据查看器角色授予创建的特征视图Service Agent。这需要两个步骤：
1. 检索特征视图的 `service_account_email`。
2. 更新BigQuery源端的IAM策略。

In [None]:
# Step 1: Retrieve the service account email for the feature view.

# Get feature view
feature_view = admin_client.get_feature_view(
    name=f"projects/{PROJECT_ID}/locations/{LOCATION}/featureOnlineStores/{FEATURE_ONLINE_STORE_ID}/featureViews/{FEATURE_VIEW_ID}"
)
SERVICE_ACCOUNT = feature_view.service_account_email

In [None]:
# Step 2: Update the IAM policy on the BigQuery Source.

!bq add-iam-policy-binding --member=serviceAccount:$SERVICE_ACCOUNT --role=roles/bigquery.dataViewer {DATASET_ID}.{TABLE_ID}

请等一段时间，让新添加的IAM策略绑定生效。
* 注意：策略更改通常需要2分钟生效，因此可能会减少这种延迟。有关更多详细信息，请参阅[IAM文档](https://cloud.google.com/iam/docs/access-change-propagation)。

In [None]:
# Wait 10 minutes for the newly updated IAM policy binding to become effective.
from time import sleep

sleep(600)

按需批量同步运行。

In [None]:
sync_response = admin_client.sync_feature_view(
    feature_view=f"projects/{PROJECT_ID}/locations/{LOCATION}/featureOnlineStores/{FEATURE_ONLINE_STORE_ID}/featureViews/{FEATURE_VIEW_ID}"
)

# Wait for sync completion
while True:
    feature_view_sync = admin_client.get_feature_view_sync(
        name=sync_response.feature_view_sync
    )
    if feature_view_sync.run_time.end_time.seconds > 0:
        status = "Succeed" if feature_view_sync.final_status.code == 0 else "Failed"
        print(f"Sync {status} for {feature_view_sync.name}.")
        break
    else:
        print("Sync ongoing, waiting for 30 seconds.")
    sleep(30)

确认批量同步的状态。

In [None]:
admin_client.get_feature_view_sync(name=sync_response.feature_view_sync)

开始在线服务

数据同步完成后，使用`FetchFeatureValues` API来检索数据。

In [None]:
data_client = FeatureOnlineStoreServiceClient(
    client_options={"api_endpoint": API_ENDPOINT}
)

从特色网店读取同步数据。

In [None]:
data_client.fetch_feature_values(
    request=feature_online_store_service_pb2.FetchFeatureValuesRequest(
        feature_view=f"projects/{PROJECT_ID}/locations/{LOCATION}/featureOnlineStores/{FEATURE_ONLINE_STORE_ID}/featureViews/{FEATURE_VIEW_ID}",
        data_key=feature_online_store_service_pb2.FeatureViewDataKey(key="test"),
    )
)

清理工作

要清理此项目中使用的所有Google Cloud资源，您可以[删除您用于教程的Google Cloud项目](https://cloud.google.com/resource-manager/docs/creating-managing-projects#shutting_down_projects)。

否则，您可以删除您在本教程中创建的集群。

In [None]:
# Delete feature view
admin_client.delete_feature_view(
    name=f"projects/{PROJECT_ID}/locations/{LOCATION}/featureOnlineStores/{FEATURE_ONLINE_STORE_ID}/featureViews/{FEATURE_VIEW_ID}"
)

# Delete online store
admin_client.delete_feature_online_store(
    name=f"projects/{PROJECT_ID}/locations/{LOCATION}/featureOnlineStores/{FEATURE_ONLINE_STORE_ID}"
)

# Delete features
for feature_id in FEATURE_IDS:
    registry_client.delete_feature(
        name=f"projects/{PROJECT_ID}/locations/{LOCATION}/featureGroups/{FEATURE_GROUP_ID}/features/{feature_id}"
    )

# Delete feature group
registry_client.delete_feature_group(
    name=f"projects/{PROJECT_ID}/locations/{LOCATION}/featureGroups/{FEATURE_GROUP_ID}"
)

# Delete test data
!bq rm -f {DATASET_ID}.{TABLE_ID}