In [None]:
# Copyright 2021 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# 自动机器学习文本实体提取模型

## 安装

安装最新版本的AutoML SDK。

In [None]:
! pip3 install google-cloud-automl

安装Google云存储库。

In [None]:
! pip3 install google-cloud-storage

### 重新启动内核

一旦您安装了AutoML SDK和Google *cloud-storage*，您需要重新启动笔记本内核，以便它可以找到这些软件包。

In [None]:
import os

if not os.getenv("AUTORUN"):
    # Automatically restart kernel after installs
    import IPython

    app = IPython.Application.instance()
    app.kernel.do_shutdown(True)

## 在开始之前

### GPU运行时

*确保在GPU运行时下运行此笔记本。在Colab中，选择* **运行时 > 更改运行时类型 > GPU**

### 设置您的GCP项目

**无论您的笔记本环境如何，以下步骤都是必需的。**

1. [选择或创建一个GCP项目](https://console.cloud.google.com/cloud-resource-manager)。当您首次创建帐户时，您可以获得$300的免费信用用于计算/存储成本。

2. [确保为您的项目启用了计费。](https://cloud.google.com/billing/docs/how-to/modify-project)

3. [启用AutoML API和Compute Engine API。](https://console.cloud.google.com/flows/enableapi?apiid=ml.googleapis.com,compute_component)

4. [Google Cloud SDK](https://cloud.google.com/sdk) 已经安装在AutoML笔记本中。

5. 在下面的单元格中输入您的项目ID。然后运行这个单元格，以确保
Cloud SDK 在此笔记本中所有命令中使用正确的项目。

**注意**: Jupyter用`！`前缀运行带有`!`前缀的行作为shell命令，并将以`$`前缀的Python变量插入这些命令中。

In [None]:
PROJECT_ID = "[your-project-id]"  # @param {type:"string"}

In [None]:
if PROJECT_ID == "" or PROJECT_ID is None or PROJECT_ID == "[your-project-id]":
    # Get your GCP project id from gcloud
    shell_output = !gcloud config list --format 'value(core.project)' 2>/dev/null
    PROJECT_ID = shell_output[0]
    print("Project ID:", PROJECT_ID)

In [None]:
! gcloud config set project $PROJECT_ID

区域

您还可以更改“REGION”变量，该变量用于整个笔记本的操作。以下是AutoML支持的区域。我们建议尽可能选择距离您最近的区域。

- 美洲：`us-central1`
- 欧洲：`europe-west4`
- 亚太地区：`asia-east1`

您不能使用多区域存储桶来进行AutoML培训。并非所有区域都支持所有AutoML服务。有关每个区域的最新支持，请参见[AutoML服务的区域支持]()。

In [None]:
REGION = "us-central1"  # @param {type: "string"}

时间戳

如果您正在参加直播教程会话，您可能正在使用共享测试账户或项目。为了避免用户之间资源名称的冲突，您为每个实例会话创建一个时间戳，并将其附加到将在本教程中创建的资源的名称上。

In [None]:
from datetime import datetime

TIMESTAMP = datetime.now().strftime("%Y%m%d%H%M%S")

###验证您的GCP帐户

**如果您正在使用AutoML笔记本**，您的环境已经验证。请跳过此步骤。

*注意：如果您正在使用AutoML笔记本并运行单元格，则该单元格会跳过执行身份验证步骤。*

In [None]:
import os
import sys

# If you are running this notebook in Colab, run this cell and follow the
# instructions to authenticate your Google Cloud account. This provides access
# to your Cloud Storage bucket and lets you submit training jobs and prediction
# requests.

# If on Vertex, then don't execute this code
if not os.path.exists("/opt/deeplearning/metadata/env_version"):
    if "google.colab" in sys.modules:
        from google.colab import auth as google_auth

        google_auth.authenticate_user()

    # If you are running this tutorial in a notebook locally, replace the string
    # below with the path to your service account key and run this cell to
    # authenticate your Google Cloud account.
    else:
        %env GOOGLE_APPLICATION_CREDENTIALS your_path_to_credentials.json

    # Log in to your account on Google Cloud
    ! gcloud auth login

创建一个云存储桶

**无论您的笔记本环境如何，都需要执行以下步骤。**

本教程旨在使用位于公共云存储桶中的训练数据以及用于批量预测的本地云存储桶。您也可以使用自己存储在本地云存储桶中的训练数据。

在下面设置您的云存储桶的名称。它在所有云存储桶中必须是唯一的。

In [None]:
BUCKET_NAME = "[your-bucket-name]"  # @param {type:"string"}

In [None]:
if BUCKET_NAME == "" or BUCKET_NAME is None or BUCKET_NAME == "[your-bucket-name]":
    BUCKET_NAME = PROJECT_ID + "aip-" + TIMESTAMP

只有在您的存储桶还不存在的情况下才运行以下单元格来创建您的云存储存储桶。

In [None]:
! gsutil mb -l $REGION gs://$BUCKET_NAME

最后，通过检查Cloud Storage存储桶的内容来验证访问权限。

In [None]:
! gsutil ls -al gs://$BUCKET_NAME

### 设置变量

接下来，设置一些在教程中使用的变量。
### 导入库并定义常量

#### 导入 AutoML SDK

将 AutoML SDK 导入我们的 Python 环境。

In [None]:
import json
import os
import sys
import time

from google.cloud import automl
from google.protobuf.json_format import MessageToJson
from google.protobuf.struct_pb2 import Value

### AutoML 常量

为 AutoML 设置以下常量：

- `PARENT`: 数据集、模型和端点资源的 AutoML 位置根路径。

In [None]:
# AutoM location root path for your dataset, model and endpoint resources
PARENT = "projects/" + PROJECT_ID + "/locations/" + REGION

客户端

AutoML SDK 采用客户端/服务器模型。在你的一端（Python 脚本）上，你将创建一个客户端，向服务器（AutoML）发送请求并接收响应。

在本教程中，你将使用多个客户端，所以请提前设置好它们。

In [None]:
def automl_client():
    return automl.AutoMlClient()


def prediction_client():
    return automl.PredictionServiceClient()


def operations_client():
    return automl.AutoMlClient()._transport.operations_client


clients = {}
clients["automl"] = automl_client()
clients["prediction"] = prediction_client()
clients["operations"] = operations_client()

for client in clients.items():
    print(client)

In [None]:
IMPORT_FILE = "gs://cloud-ml-data/NL-entity/dataset.csv"

In [None]:
! gsutil cat $IMPORT_FILE | head -n 10

*示例输出*：
```
训练,gs://cloud-ml-data/NL-entity/train.jsonl
测试,gs://cloud-ml-data/NL-entity/test.jsonl
验证,gs://cloud-ml-data/NL-entity/validation.jsonl
```

创建数据集

### 准备数据

### [projects.locations.datasets.create](https://cloud.google.com/automl/docs/reference/rest/v1beta1/projects.locations.datasets/create)

### [projects.locations.datasets.create](https://cloud.google.com/automl/docs/reference/rest/v1beta1/projects.locations.datasets/create)

请求

In [None]:
dataset = {
    "display_name": "entity_" + TIMESTAMP,
    "text_extraction_dataset_metadata": {},
}

print(
    MessageToJson(
        automl.CreateDatasetRequest(parent=PARENT, dataset=dataset).__dict__["_pb"]
    )
)

{
 “父级”: “projects/migration-ucaip-training/locations/us-central1”,
 “数据集”: {
   “显示名称”: “entity_20210303201139”,
   “文本提取数据集元数据”: {}
 }
}

呼叫

In [None]:
request = clients["automl"].create_dataset(parent=PARENT, dataset=dataset)

#### 响应

In [None]:
result = request.result()

print(MessageToJson(result.__dict__["_pb"]))

{
  "name": "projects/116273516712/locations/us-central1/datasets/TEN4244124229064196096"
}
{
  "名称": "projects/116273516712/locations/us-central1/datasets/TEN4244124229064196096"
}

In [None]:
# The full unique ID for the dataset
dataset_id = result.name
# The short numeric ID for the dataset
dataset_short_id = dataset_id.split("/")[-1]

print(dataset_id)

### [projects.locations.datasets.importData](https://cloud.google.com/automl/docs/reference/rest/v1beta1/projects.locations.datasets/importData)

### [projects.locations.datasets.importData](https://cloud.google.com/automl/docs/reference/rest/v1beta1/projects.locations.datasets/importData)

请求

In [None]:
input_config = {"gcs_source": {"input_uris": [IMPORT_FILE]}}

print(
    MessageToJson(
        automl.ImportDataRequest(name=dataset_id, input_config=input_config).__dict__[
            "_pb"
        ]
    )
)

*示例输出*：
```
{
  "name": "projects/116273516712/locations/us-central1/datasets/TEN4244124229064196096",
  "inputConfig": {
    "gcsSource": {
      "inputUris": [
        "gs://cloud-ml-data/NL-entity/dataset.csv"
      ]
    }
  }
}
```

#### 呼叫

In [None]:
request = clients["automl"].import_data(name=dataset_id, input_config=input_config)

#### 回应

In [None]:
result = request.result()

print(MessageToJson(result))

*示例输出*：
```
{}
```

训练一个模型

### [projects.locations.models.create](https://cloud.google.com/automl/docs/reference/rest/v1beta1/projects.locations.models/create)

### [projects.locations.models.create](https://cloud.google.com/automl/docs/reference/rest/v1beta1/projects.locations.models/create)

请求

In [None]:
model = {
    "display_name": "entity_" + TIMESTAMP,
    "dataset_id": dataset_short_id,
    "text_extraction_model_metadata": {},
}

print(
    MessageToJson(automl.CreateModelRequest(parent=PARENT, model=model).__dict__["_pb"])
)

{
  "parent": "projects/migration-ucaip-training/locations/us-central1",
  "model": {
    "displayName": "entity_20210303201139",
    "datasetId": "TEN4244124229064196096",
    "textExtractionModelMetadata": {}
  }
}
```

中文翻译：
```
{
  "parent": "projects/migration-ucaip-training/locations/us-central1",
  "model": {
    "displayName": "entity_20210303201139",
    "datasetId": "TEN4244124229064196096",
    "textExtractionModelMetadata": {}
  }
}
```

#### 电话

In [None]:
request = clients["automl"].create_model(parent=PARENT, model=model)

####回复

In [None]:
result = request.result()

print(MessageToJson(result.__dict__["_pb"]))

*示例输出*：
```
{
  "name": "projects/116273516712/locations/us-central1/models/TEN7821373765161320448"
}
```

*示例输出*：
```
{
  "name": "projects/116273516712/locations/us-central1/models/TEN7821373765161320448"
}
```

In [None]:
# The full unique ID for the training pipeline
model_id = result.name
# The short numeric ID for the training pipeline
model_short_id = model_id.split("/")[-1]

print(model_short_id)

评估模型

### [projects.locations.models.modelEvaluations.list](https://cloud.google.com/automl/docs/reference/rest/v1beta1/projects.locations.models.modelEvaluations/list)

### [projects.locations.models.modelEvaluations.list](https://cloud.google.com/automl/docs/reference/rest/v1beta1/projects.locations.models.modelEvaluations/list)

#### 电话

In [None]:
request = clients["automl"].list_model_evaluations(parent=model_id, filter="")

回复

In [None]:
import json

model_evaluations = [json.loads(MessageToJson(me.__dict__["_pb"])) for me in request]
# The evaluation slice
evaluation_slice = request.model_evaluation[0].name

print(json.dumps(model_evaluations, indent=2))

*示例输出*：
```
[
  {
    "name": "projects/116273516712/locations/us-central1/models/TEN7821373765161320448/modelEvaluations/132746642406774043",
    "createTime": "2021-03-03T22:30:27.832506Z",
    "evaluatedExampleCount": 60,
    "textExtractionEvaluationMetrics": {
      "confidenceMetricsEntries": [
        {
          "confidenceThreshold": 0.04,
          "recall": 0.79928315,
          "precision": 0.7950089,
          "f1Score": 0.7971403
        },
        {
          "confidenceThreshold": 0.96,
          "recall": 0.75089604,
          "precision": 0.8603696,
          "f1Score": 0.80191386
        },
        
        # 为简洁起见已删除部分内容
        
        {
          "confidenceThreshold": 0.43,
          "recall": 0.5913978,
          "precision": 0.57894737,
          "f1Score": 0.5851064
        },
        {
          "confidenceThreshold": 0.44,
          "recall": 0.5913978,
          "precision": 0.57894737,
          "f1Score": 0.5851064
        }
      ]
    },
    "displayName": "DiseaseClass"
  }
]
```

### [projects.locations.models.modelEvaluations.get](https://cloud.google.com/automl/docs/reference/rest/v1beta1/projects.locations.models.modelEvaluations/get)  

### [projects.locations.models.modelEvaluations.get](https://cloud.google.com/automl/docs/reference/rest/v1beta1/projects.locations.models.modelEvaluations/get)  

### [projects.locations.models.modelEvaluations.get](https://cloud.google.com/automl/docs/reference/rest/v1beta1/projects.locations.models.modelEvaluations/get)  

#### 电话

In [None]:
request = clients["automl"].get_model_evaluation(name=evaluation_slice)

#### 回应

In [None]:
print(MessageToJson(request.__dict__["_pb"]))

{
  "name": "projects/116273516712/locations/us-central1/models/TEN7821373765161320448/modelEvaluations/132746642406774043",
  "createTime": "2021-03-03T22:30:27.832506Z",
  "evaluatedExampleCount": 60,
  "textExtractionEvaluationMetrics": {
    "confidenceMetricsEntries": [
      {
        "confidenceThreshold": 0.04,
        "recall": 0.79928315,
        "precision": 0.7950089,
        "f1Score": 0.7971403
      },
      {
        "confidenceThreshold": 0.96,
        "recall": 0.75089604,
        "precision": 0.8603696,
        "f1Score": 0.80191386
      },
      
      #为简洁起见已删除
      
      {
        "confidenceThreshold": 0.43,
        "recall": 0.7921147,
        "precision": 0.7935368,
        "f1Score": 0.7928251
      },
      {
        "confidenceThreshold": 0.44,
        "recall": 0.7921147,
        "precision": 0.7935368,
        "f1Score": 0.7928251
      }
    ]
  }
}
```

制作批量预测

### 为批量预测准备文件

In [None]:
import tensorflow as tf

test_item = 'Molecular basis of hexosaminidase A deficiency and pseudodeficiency in the Berks County Pennsylvania Dutch.\\tFollowing the birth of two infants with Tay-Sachs disease ( TSD ) , a non-Jewish , Pennsylvania Dutch kindred was screened for TSD carriers using the biochemical assay . A high frequency of individuals who appeared to be TSD heterozygotes was detected ( Kelly et al . , 1975 ) . Clinical and biochemical evidence suggested that the increased carrier frequency was due to at least two altered alleles for the hexosaminidase A alpha-subunit . We now report two mutant alleles in this Pennsylvania Dutch kindred , and one polymorphism . One allele , reported originally in a French TSD patient ( Akli et al . , 1991 ) , is a GT-- > AT transition at the donor splice-site of intron 9 . The second , a C-- > T transition at nucleotide 739 ( Arg247Trp ) , has been shown by Triggs-Raine et al . ( 1992 ) to be a clinically benign " pseudodeficient " allele associated with reduced enzyme activity against artificial substrate . Finally , a polymorphism [ G-- > A ( 759 ) ] , which leaves valine at codon 253 unchanged , is described'

gcs_input_uri = "gs://" + BUCKET_NAME + "/test.jsonl"
with tf.io.gfile.GFile(gcs_input_uri, "w") as f:
    data = {"id": 0, "text_snippet": {"content": test_item}}
    f.write(json.dumps(data) + "\n")

! gsutil cat $gcs_input_uri

```
{"id": 0, "text_snippet": {"content": "宾夕法尼亚州德国宾夕法尼亚人的差异及伪缺陷性的葡萄糖胺酶A缺陷的分子基础\\t在有两名Tay-Sachs病（TSD）婴儿出生后，对一个非犹太人的宾夕法尼亚州德国宗亲进行了TSD携带者的生化检测。检测到出现了许多看似是TSD杂合子的个体（Kelly等人，1975年）。临床和生化证据表明，增加的携带者频率是由于至少有两个改变的葡萄糖胺酶Aα亚基等位基因。我们现在报告了这个宾夕法尼亚州德国宗亲中的两个突变等位基因和一个多态性。一种等位基因，最初是在一个法国TSD患者（Akli等人，1991年）中报告的，是一个位于第9内含子供体剪切位点的GT->AT转换。第二个是在核苷酸739处的C->T转换（Arg247Trp），已被Triggs-Raine等人（1992年）证明是一个临床良性的“伪缺陷”等位基因，与对人工底物的酶活性降低有关。最后，描述了一种多态性[G->A（759）]，使密码子253处的缬氨酸保持不变"}}
```

### [projects.locations.models.batchPredict](https://cloud.google.com/automl/docs/reference/rest/v1beta1/projects.locations.models/batchPredict)

### [projects.locations.models.batchPredict](https://cloud.google.com/automl/docs/reference/rest/v1beta1/projects.locations.models/batchPredict)

#### 请求

In [None]:
input_config = {"gcs_source": {"input_uris": [gcs_input_uri]}}

output_config = {
    "gcs_destination": {"output_uri_prefix": "gs://" + f"{BUCKET_NAME}/batch_output/"}
}

print(
    MessageToJson(
        automl.BatchPredictRequest(
            name=model_id, input_config=input_config, output_config=output_config
        ).__dict__["_pb"]
    )
)

*示例输出*：
```
{
  "name": "projects/116273516712/locations/us-central1/models/TEN7821373765161320448",
  "inputConfig": {
    "gcsSource": {
      "inputUris": [
        "gs://migration-ucaip-trainingaip-20210303201139/test.jsonl"
      ]
    }
  },
  "outputConfig": {
    "gcsDestination": {
      "outputUriPrefix": "gs://migration-ucaip-trainingaip-20210303201139/batch_output/"
    }
  }
}
```

#### 呼叫

In [None]:
request = clients["prediction"].batch_predict(
    name=model_id, input_config=input_config, output_config=output_config
)

#### 回复

In [None]:
result = request.result()

print(MessageToJson(result.__dict__["_pb"]))

*示例输出*：
```
{}
```

In [None]:
destination_uri = output_config["gcs_destination"]["output_uri_prefix"][:-1]

! gsutil ls $destination_uri/*
! gsutil cat $destination_uri/prediction*/*.jsonl

*示例输出*：
```
gs://migration-ucaip-trainingaip-20210303201139/batch_output/prediction-entity_20210303201139-2021-03-03T22:30:36.292153Z/text_extraction_1.jsonl
{"textSnippet":{"content":"宾夕法尼亚州德国宾夕法尼亚人的六氨基葡糖苷酶A缺乏和伪缺乏的分子基础。\t在两名患有Tay-Sachs病（TSD）的婴儿出生后，对一个非犹太、宾夕法尼亚德国家族进行了TSD携带者的生化测定筛查。 检测到了看似TSD杂合子的人高频率（Kelly等，1975年）。临床和生化证据表明，增加的携带者频率是由于至少有两个六氨基葡糖苷酶Aα亚单位的改变等位基因。我们现在报告了这个宾夕法尼亚德国家庭中的两个突变等位基因，以及一个多态性。一种等位基因，最初在一个法国TSD患者中报道（Akli等，1991年），是在第9内含子供体位点处的GT-- \u003e AT过渡。第二，即核苷酸739处的C-- \u003e T过渡（Arg247Trp），已被Triggs-Raine等（1992年）证明是一种临床良性的“伪缺乏”等位基因，与对人工底物的酶活性降低有关。最后，描述了一种多态性[G-- \u003e A（759）]，使第253密码子处的缬氨酸保持不变"},"annotations":[{"displayName":"SpecificDisease","textExtraction":{"score":0.99955064,"textSegment":{"startOffset":"19","endOffset":"46","content":"hexosaminidase A deficiency"}}},{"displayName":"SpecificDisease","textExtraction":{"score":0.9995449,"textSegment":{"startOffset":"149","endOffset":"166","content":"Tay-Sachs disease"}}},{"displayName":"SpecificDisease","textExtraction":{"score":0.99939877,"textSegment":{"startOffset":"169","endOffset":"172","content":"TSD"}}},{"displayName":"Modifier","textExtraction":{"score":0.9993252,"textSegment":{"startOffset":"236","endOffset":"239","content":"TSD"}}},{"displayName":"Modifier","textExtraction":{"score":0.9993484,"textSegment":{"startOffset":"330","endOffset":"333","content":"TSD"}}},{"displayName":"Modifier","textExtraction":{"score":0.9993844,"textSegment":{"startOffset":"688","endOffset":"691","content":"TSD"}}}],"id":"0"}
```

进行在线预测

### [项目.位置.模型.部署](https://cloud.google.com/automl/docs/reference/rest/v1beta1/projects.locations.models/deploy)

#### 电话

In [None]:
request = clients["automl"].deploy_model(name=model_id)

#### 回复

In [None]:
result = request.result()

print(MessageToJson(result))

*示例输出*：
``` 
{} 
```

### [projects.locations.models.predict](https://cloud.google.com/automl/docs/reference/rest/v1beta1/projects.locations.models/predict)

### [项目.地点.模型.预测](https://cloud.google.com/automl/docs/reference/rest/v1beta1/projects.locations.models/predict)

为在线预测准备数据项

In [None]:
test_item = 'Molecular basis of hexosaminidase A deficiency and pseudodeficiency in the Berks County Pennsylvania Dutch.\\tFollowing the birth of two infants with Tay-Sachs disease ( TSD ) , a non-Jewish , Pennsylvania Dutch kindred was screened for TSD carriers using the biochemical assay . A high frequency of individuals who appeared to be TSD heterozygotes was detected ( Kelly et al . , 1975 ) . Clinical and biochemical evidence suggested that the increased carrier frequency was due to at least two altered alleles for the hexosaminidase A alpha-subunit . We now report two mutant alleles in this Pennsylvania Dutch kindred , and one polymorphism . One allele , reported originally in a French TSD patient ( Akli et al . , 1991 ) , is a GT-- > AT transition at the donor splice-site of intron 9 . The second , a C-- > T transition at nucleotide 739 ( Arg247Trp ) , has been shown by Triggs-Raine et al . ( 1992 ) to be a clinically benign " pseudodeficient " allele associated with reduced enzyme activity against artificial substrate . Finally , a polymorphism [ G-- > A ( 759 ) ] , which leaves valine at codon 253 unchanged , is described'

请求

In [None]:
payload = {"text_snippet": {"content": test_item, "mime_type": "text/plain"}}

request = automl.PredictRequest(
    name=model_id,
    payload=payload,
)

print(MessageToJson(request.__dict__["_pb"]))

{
  "name": "projects/116273516712/locations/us-central1/models/TEN7821373765161320448",
  "payload": {
    "textSnippet": {
      "content": "缺陷和假缺陷的分子基础Berks County Pennsylvania的德国。\\新生的两个Tay-Sachs病（TSD）婴儿出生后，筛选了一个非犹太人的宾夕法尼亚荷兰亲属，以使用生化测定法筛选TSD携带者。发现了大量表现为TSD杂合子的个体（Kelly等，1975年）。临床和生化证据表明，提高的携带频率至少是由于两个hexosaminidase A alpha亚基的改变等位基因。我们现在报告了这个宾夕法尼亚荷兰亲属中的两个突变等位基因和一个多态性。最初在法国TSD患者（Akli等，1991年）中报告的一个等位基因是从甲9内含子的供体剪接位点到AT的过渡。第二个，739核苷酸（Arg247Trp）处的C- > T过渡，已由Triggs-Raine等人（1992年）证明为临床良性的“假缺陷”等位基因，与人工底物的酶活性降低有关。最后，描述了一个多态性[G- > A （759）]，保留了密码子253处的缬氨酸不变",
      "mimeType": "text/plain"
    }
  }
}

#### 电话

In [None]:
request = clients["prediction"].predict(request=request)

回应

In [None]:
print(MessageToJson(request.__dict__["_pb"]))

```
{
  "payload": [
    {
      "annotationSpecId": "8605379431835369472",
      "displayName": "SpecificDisease",
      "textExtraction": {
        "score": 0.99955064,
        "textSegment": {
          "startOffset": "19",
          "endOffset": "46",
          "content": "hexosaminidase A deficiency"
        }
      }
    },
    {
      "annotationSpecId": "8605379431835369472",
      "displayName": "SpecificDisease",
      "textExtraction": {
        "score": 0.9995449,
        "textSegment": {
          "startOffset": "149",
          "endOffset": "166",
          "content": "Tay-Sachs disease"
        }
      }
    },
    {
      "annotationSpecId": "8605379431835369472",
      "displayName": "SpecificDisease",
      "textExtraction": {
        "score": 0.99939877,
        "textSegment": {
          "startOffset": "169",
          "endOffset": "172",
          "content": "TSD"
        }
      }
    },
    {
      "annotationSpecId": "3417232661104558080",
      "displayName": "Modifier",
      "textExtraction": {
        "score": 0.9993252,
        "textSegment": {
          "startOffset": "236",
          "endOffset": "239",
          "content": "TSD"
        }
      }
    },
    {
      "annotationSpecId": "3417232661104558080",
      "displayName": "Modifier",
      "textExtraction": {
        "score": 0.9993484,
        "textSegment": {
          "startOffset": "330",
          "endOffset": "333",
          "content": "TSD"
        }
      }
    },
    {
      "annotationSpecId": "3417232661104558080",
      "displayName": "Modifier",
      "textExtraction": {
        "score": 0.9993844,
        "textSegment": {
          "startOffset": "688",
          "endOffset": "691",
          "content": "TSD"
        }
      }
    }
  ]
}
```

清理

要清理此项目中使用的所有 GCP 资源，您可以删除用于本教程的 GCP 项目。

否则，您可以删除此教程中创建的各个资源。

In [None]:
delete_dataset = True
delete_model = True
delete_bucket = True

# Delete the dataset using the AutoML fully qualified identifier for the dataset
try:
    if delete_dataset:
        clients["automl"].delete_dataset(name=dataset_id)
except Exception as e:
    print(e)

# Delete the model using the AutoML fully qualified identifier for the model
try:
    if delete_model:
        clients["automl"].delete_model(name=model_id)
except Exception as e:
    print(e)


if delete_bucket and "BUCKET_NAME" in globals():
    ! gsutil rm -r gs://$BUCKET_NAME