In [None]:
# Copyright 2021 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


AutoML SDK：AutoML图像分类模型

## 安装

安装最新（预览）版本的AutoML SDK。

In [None]:
! pip3 install -U google-cloud-automl --user


安装Google *云存储* 库。

In [None]:
! pip3 install google-cloud-storage


### 重新启动内核

安装完AutoML SDK和Google *cloud-storage*之后，您需要重新启动笔记本内核，以便它可以找到这些包。

In [None]:
import os


if not os.getenv("AUTORUN"):
    # Automatically restart kernel after installs
    import IPython
    app = IPython.Application.instance()
    app.kernel.do_shutdown(True)


## 开始之前

### GPU 运行时

*如果您有这个选项，请确保在 GPU 运行时中运行此笔记本。在 Colab 中，选择* **运行时 > 更改运行时类型 > GPU**

### 设置您的 GCP 项目

**无论您的笔记本环境如何，以下步骤都是必需的。**

1. [选择或创建一个 GCP 项目](https://console.cloud.google.com/cloud-resource-manager)。当您第一次创建帐号时，您将获得 $300 的免费信用额度，可用于计算/存储成本。

2. [确保为您的项目启用了计费。](https://cloud.google.com/billing/docs/how-to/modify-project)

3. [启用 AutoML API 和 Compute Engine API。](https://console.cloud.google.com/flows/enableapi?apiid=ml.googleapis.com,compute_component)

4. [Google Cloud SDK 已经安装在 AutoML 笔记本中。](https://cloud.google.com/sdk)

5. 在下面的单元格中输入您的项目 ID。然后运行该单元格，以确保 Cloud SDK 在此笔记本中的所有命令中使用正确的项目。

**注意**：Jupyter 在以 `!` 为前缀的行中运行作为 shell 命令，并且将以 `$` 为前缀的 Python 变量插入这些命令中。

In [None]:
PROJECT_ID = "[your-project-id]" #@param {type:"string"}


In [None]:
if PROJECT_ID == "" or PROJECT_ID is None or PROJECT_ID == "[your-project-id]":
    # Get your GCP project id from gcloud
    shell_output = !gcloud config list --format 'value(core.project)' 2>/dev/null
    PROJECT_ID = shell_output[0]
    print("Project ID:", PROJECT_ID)


In [None]:
! gcloud config set project $PROJECT_ID


#### 区域

您还可以更改“REGION”变量，该变量用于整个笔记本的操作。以下是AutoML支持的区域。我们建议尽可能选择距离您最近的区域。

- 美洲：`us-central1`
- 欧洲：`europe-west4`
- 亚太地区：`asia-east1`

您不能使用多区域存储桶来训练AutoML。并非所有地区都支持所有AutoML服务。有关每个地区的最新支持，请参见[AutoML服务的区域支持]()。

In [None]:
REGION = 'us-central1' #@param {type: "string"}


时间戳

如果您正在参加实时教程会话，您可能正在使用共享测试帐户或项目。为了避免用户在创建的资源之间发生名称冲突，您需要为每个实例会话创建一个时间戳，并附加在将在本教程中创建的资源名称上。

In [None]:
from datetime import datetime

TIMESTAMP = datetime.now().strftime("%Y%m%d%H%M%S")


### 认证您的GCP账户

**如果您使用的是AutoML笔记本**，您的环境已经被认证。请跳过此步骤。

*注意：如果您在AutoML笔记本上运行该单元格，该单元格会知道跳过执行认证步骤。*

In [None]:
import os
import sys

# If you are running this notebook in Colab, run this cell and follow the
# instructions to authenticate your Google Cloud account. This provides access
# to your Cloud Storage bucket and lets you submit training jobs and prediction
# requests.

# If on AutoML, then don't execute this code
if not os.path.exists('/opt/deeplearning/metadata/env_version'):
    if 'google.colab' in sys.modules:
        from google.colab import auth as google_auth
        google_auth.authenticate_user()

    # If you are running this tutorial in a notebook locally, replace the string
    # below with the path to your service account key and run this cell to
    # authenticate your Google Cloud account.
    else:
        %env GOOGLE_APPLICATION_CREDENTIALS your_path_to_credentials.json

    # Log in to your account on Google Cloud
    ! gcloud auth login


创建云存储桶

**无论您的笔记本环境如何，都需要执行以下步骤。**

本教程旨在使用位于公共云存储桶和本地云存储桶中的训练数据，并为批量预测使用本地云存储桶。您也可以使用您存储在本地云存储桶中的自己的训练数据。

在下面设置您的云存储桶的名称。它必须在所有云存储桶中是唯一的。

In [None]:
BUCKET_NAME = "[your-bucket-name]" #@param {type:"string"}


In [None]:
if BUCKET_NAME == "" or BUCKET_NAME is None or BUCKET_NAME == "[your-bucket-name]":
    BUCKET_NAME = PROJECT_ID + "aip-" + TIMESTAMP


只有在您的存储桶尚不存在的情况下才能运行以下单元格来创建您的云存储存储桶。

In [None]:
! gsutil mb -l $REGION gs://$BUCKET_NAME


最后，通过检查云存储桶的内容来验证对其的访问权限。

In [None]:
! gsutil ls -al gs://$BUCKET_NAME


设置变量

接下来，设置一些在教程中使用的变量。
### 导入库并定义常量

#### 导入自动机器学习 SDK

将自动机器学习 SDK 导入我们的 Python 环境中。

In [None]:
import json
import os
import sys
import time


from google.cloud import automl_v1beta1 as automl


from google.protobuf.json_format import MessageToJson
from google.protobuf.json_format import ParseDict


#### AutoML 常量

为 AutoML 设置以下常量：

- `PARENT`：数据集、模型和端点资源的 AutoML 位置根路径。

In [None]:
# AutoML location root path for your dataset, model and endpoint resources
PARENT = "projects/" + PROJECT_ID + "/locations/" + REGION


客户端

AutoML SDK工作模式为客户端/服务器模式。在您的一侧（Python脚本）中，您将创建一个客户端，该客户端向服务器（AutoML）发送请求并接收响应。

在本教程中，您将使用多个客户端，因此请提前设置它们。

In [None]:
def automl_client():
    return automl.AutoMlClient()

def prediction_client():
    return automl.PredictionServiceClient()

def operations_client():
    return automl.AutoMlClient()._transport.operations_client

clients = {}
clients["automl"] = automl_client()
clients["prediction"] = prediction_client()
clients["operations"] = operations_client()

for client in clients.items():
    print(client)


In [None]:
IMPORT_FILE = 'gs://automl-video-demo-data/hmdb_split1.csv'


In [None]:
! gsutil cat $IMPORT_FILE | head -n 10 


示例输出：
```
训练集，gs://automl-video-demo-data/hmdb_split1_5classes_train_inf.csv
测试集，gs://automl-video-demo-data/hmdb_split1_5classes_test_inf.csv
```

## 创建一个数据集

### [projects.locations.datasets.create](https://cloud.google.com/automl/docs/reference/rest/v1beta1/projects.locations.datasets/create)

### [projects.locations.datasets.create](https://cloud.google.com/automl/docs/reference/rest/v1beta1/projects.locations.datasets/create)

#### 请求

In [None]:
dataset = {
    "display_name": "hmdb_" + TIMESTAMP,
    "video_classification_dataset_metadata": {}
}

print(MessageToJson(
    automl.CreateDatasetRequest(
        parent=PARENT,
        dataset=dataset
    ).__dict__["_pb"])
)


{
  "parent": "projects/migration-ucaip-training/locations/us-central1",
  "dataset": {
    "displayName": "hmdb_20210228225744",
    "videoClassificationDatasetMetadata": {}
  }
}
```

*示例输出*:
```
{
  "parent": "projects/migration-ucaip-training/locations/us-central1",
  "dataset": {
    "displayName": "hmdb_20210228225744",
    "videoClassificationDatasetMetadata": {}
  }
}
```

电话

In [None]:
request = clients["automl"].create_dataset(
    parent=PARENT,
    dataset=dataset
)


回答

In [None]:
result = request

print(MessageToJson(result.__dict__["_pb"]))


{
  "姓名": "projects/116273516712/locations/us-central1/datasets/VCN6574174086275006464",
  "显示名称": "hmdb_20210228225744",
  "创建时间": "2021-02-28T23:06:43.197904Z",
  "标签": "AB3BwFrtf0Yl4fgnXW4leoEEANTAGQdOngyIqdQSJBT9pKEChgeXom-0OyH7dKtfvA4=",
  "视频分类数据集元数据": {}
}

In [None]:
# The full unique ID for the dataset
dataset_id = result.name
# The short numeric ID for the dataset
dataset_short_id = dataset_id.split('/')[-1]

print(dataset_id)


### [projects.locations.datasets.importData](https://cloud.google.com/automl/docs/reference/rest/v1beta1/projects.locations.datasets/importData)

### [项目.位置.数据集.导入数据](https://cloud.google.com/automl/docs/reference/rest/v1beta1/projects.locations.datasets/importData)

请求

In [None]:
input_config = {
    "gcs_source": {
        "input_uris": [IMPORT_FILE]
    }
}

print(MessageToJson(
    automl.ImportDataRequest(
        name=dataset_short_id,
        input_config=input_config
    ).__dict__["_pb"])
)


{
  "name": "VCN6574174086275006464",
  "inputConfig": {
    "gcsSource": {
      "inputUris": [
        "gs://automl-video-demo-data/hmdb_split1.csv"
      ]
    }
  }
}  

{
  "名": "VCN6574174086275006464",
  "inputConfig": {
    "gcsSource": {
      "inputUris": [
        "gs://automl-video-demo-data/hmdb_split1.csv"
      ]
    }
  }
}

呼叫

In [None]:
request = clients["automl"].import_data(
    name=dataset_id,
    input_config=input_config
)


####回应

In [None]:
result = request.result()

print(MessageToJson(result))


*示例输出*：
```
{}
```

训练一个模型

### [projects.locations.models.create](https://cloud.google.com/automl/docs/reference/rest/v1beta1/projects.locations.models/create)

### [projects.locations.models.create](https://cloud.google.com/automl/docs/reference/rest/v1beta1/projects.locations.models/create)

#### 请求

In [None]:
model = {
    "display_name": "hmdb_" + TIMESTAMP,
    "dataset_id": dataset_short_id,
    "video_classification_model_metadata": {}
}

print(MessageToJson(
    automl.CreateModelRequest(
        parent=PARENT,
        model=model
    ).__dict__["_pb"])
)


*示例输出*:
```
{
  "parent": "projects/migration-ucaip-training/locations/us-central1",
  "model": {
    "displayName": "hmdb_20210228225744",
    "datasetId": "VCN6574174086275006464",
    "videoClassificationModelMetadata": {}
  }
}
```

####电话

In [None]:
request = clients["automl"].create_model(
    parent=PARENT,
    model=model
)


#### 回应

In [None]:
result = request.result()

print(MessageToJson(result.__dict__["_pb"]))


*示例输出*：
```
{
  "name": "projects/116273516712/locations/us-central1/models/VCN6188818900239515648"
}
``` 

中文翻译：
```
{
  "name": "projects/116273516712/locations/us-central1/models/VCN6188818900239515648"
}
```

In [None]:
# The full unique ID for the training pipeline
model_id = result.name
# The short numeric ID for the training pipeline
model_short_id = model_id.split('/')[-1]

print(model_short_id)


## 评估模型

### [projects.locations.models.modelEvaluations.list](https://cloud.google.com/automl/docs/reference/rest/v1beta1/projects.locations.models.modelEvaluations/list)

### [项目.位置.模型.模型评估.列表](https://cloud.google.com/automl/docs/reference/rest/v1beta1/projects.locations.models.modelEvaluations/list)

#### 电话

In [None]:
request = clients["automl"].list_model_evaluations(
    parent=model_id
)


回应

In [None]:
import json


model_evaluations = [
    json.loads(MessageToJson(me.__dict__["_pb"])) for me in request 
]
# The evaluation slice
evaluation_slice = request.model_evaluation[0].name

print(json.dumps(model_evaluations, indent=2))


```
[
  {
    "name": "projects/116273516712/locations/us-central1/models/VCN6188818900239515648/modelEvaluations/1998146574672720266",
    "createTime": "2021-03-01T01:02:02.452298Z",
    "evaluatedExampleCount": 150,
    "classificationEvaluationMetrics": {
      "auPrc": 1.0,
      "confidenceMetricsEntry": [
        {
          "confidenceThreshold": 0.016075565,
          "recall": 1.0,
          "precision": 0.2,
          "f1Score": 0.33333334
        },
        {
          "confidenceThreshold": 0.017114623,
          "recall": 1.0,
          "precision": 0.202977,
          "f1Score": 0.3374578
        },
        
        # REMOVED FOR BREVITY
        
        {
          "confidenceThreshold": 0.9299338,
          "recall": 0.033333335,
          "precision": 1.0,
          "f1Score": 0.06451613
        }
      ]
    },
    "displayName": "golf"
  }
]
```

### [projects.locations.models.modelEvaluations.get](https://cloud.google.com/automl/docs/reference/rest/v1beta1/projects.locations.models.modelEvaluations/get)###

### [projects.locations.models.modelEvaluations.get](https://cloud.google.com/automl/docs/reference/rest/v1beta1/projects.locations.models.modelEvaluations/get)###

电话 (Diànhuà)

In [None]:
request = clients["automl"].get_model_evaluation(
    name=evaluation_slice
)


#### 回复

In [None]:
print(MessageToJson(request.__dict__["_pb"]))


```
{
  "name": "projects/116273516712/locations/us-central1/models/VCN6188818900239515648/modelEvaluations/1998146574672720266",
  "createTime": "2021-03-01T01:02:02.452298Z",
  "evaluatedExampleCount": 150,
  "classificationEvaluationMetrics": {
    "auPrc": 1.0,
    "confidenceMetricsEntry": [
      {
        "confidenceThreshold": 0.016075565,
        "recall": 1.0,
        "precision": 0.2,
        "f1Score": 0.33333334
      },
      {
        "confidenceThreshold": 0.017114623,
        "recall": 1.0,
        "precision": 0.202977,
        "f1Score": 0.3374578
      },
      
      # 略
      
      {
        "confidenceThreshold": 0.9299338,
        "recall": 0.006666667,
        "precision": 1.0,
        "f1Score": 0.013245033
      }
    ],
    "confusionMatrix": {
      "annotationSpecId": [
        "175274248095399936",
        "2048771693081526272",
        "4354614702295220224",
        "6660457711508914176",
        "8966300720722608128"
      ],
      "row": [
        {
          "exampleCount": [
            30,
            0,
            0,
            0,
            0
          ]
        },
        {
          "exampleCount": [
            0,
            30,
            0,
            0,
            0
          ]
        },
        {
          "exampleCount": [
            0,
            0,
            30,
            0,
            0
          ]
        },
        {
          "exampleCount": [
            0,
            0,
            0,
            30,
            0
          ]
        },
        {
          "exampleCount": [
            0,
            0,
            0,
            0,
            30
          ]
        }
      ],
      "displayName": [
        "ride_horse",
        "golf",
        "cartwheel",
        "pullup",
        "kick_ball"
      ]
    }
  }
}
```

## 进行批量预测

生成批量输入文件

要从AutoML Video请求一批预测，需要创建一个CSV文件，列出要进行注释的视频在Cloud Storage中的路径。您还可以指定一个开始时间和结束时间，告诉AutoML Video只对视频的一个段（段级别）进行注释。开始时间必须为零或更大，且必须在结束时间之前。结束时间必须大于开始时间且小于或等于视频的持续时间。您还可以使用inf来表示视频的结束。

例如：  
    `gs://my-videos-vcm/short_video_1.avi,0.0,5.566667`  
    `gs://my-videos-vcm/car_chase.avi,0.0,3.933333`

In [None]:
TRAIN_FILES = "gs://automl-video-demo-data/hmdb_split1_5classes_train_inf.csv"

test_items = ! gsutil cat $TRAIN_FILES | head -n2

cols = str(test_items[0]).split(',')
test_item_1, test_label_1, test_start_1, test_end_1  = str(cols[0]), str(cols[1]), str(cols[2]), str(cols[3])
print(test_item_1, test_label_1)

cols = str(test_items[1]).split(',')
test_item_2, test_label_2, test_start_2, test_end_2  = str(cols[0]), str(cols[1]), str(cols[2]), str(cols[3])
print(test_item_2, test_label_2)


*示例输出*:
```
gs://automl-video-demo-data/hmdb51/_Rad_Schlag_die_Bank__cartwheel_f_cm_np1_le_med_0.avi 翻筋斗
gs://automl-video-demo-data/hmdb51/Acrobacias_de_un_fenomeno_cartwheel_f_cm_np1_ba_bad_8.avi 翻筋斗
```

In [None]:
import tensorflow as tf
import json

gcs_input_uri = "gs://" + BUCKET_NAME + '/test.csv'
with tf.io.gfile.GFile(gcs_input_uri, 'w') as f:
    data = f"{test_item_1}, {test_start_1}, {test_end_1}"
    f.write(data + '\n')
    data = f"{test_item_2}, {test_start_2}, {test_end_2}"
    f.write(data + '\n')
    
print(gcs_input_uri)
! gsutil cat $gcs_input_uri


以下是示例输出：
```
gs://migration-ucaip-trainingaip-20210228225744/test.csv
gs://automl-video-demo-data/hmdb51/_Rad_Schlag_die_Bank__cartwheel_f_cm_np1_le_med_0.avi, 0.0, inf
gs://automl-video-demo-data/hmdb51/Acrobacias_de_un_fenomeno_cartwheel_f_cm_np1_ba_bad_8.avi, 0.0, inf
```

### [projects.locations.models.batchPredict](https://cloud.google.com/automl/docs/reference/rest/v1beta1/projects.locations.models/batchPredict)
### [项目.位置.模型.批量预测](https://cloud.google.com/automl/docs/reference/rest/v1beta1/projects.locations.models/batchPredict)

请求

In [None]:
input_config = {
    "gcs_source": {
        "input_uris": [gcs_input_uri]
    }
}

output_config = {
    "gcs_destination": {
        "output_uri_prefix": "gs://" + f"{BUCKET_NAME}/batch_output/"
    }
}

batch_prediction = automl.BatchPredictRequest(
    name=model_id,
    input_config=input_config,
    output_config=output_config
)
                        
print(MessageToJson(
    batch_prediction.__dict__["_pb"])
)


```
{
  "名称": "projects/116273516712/locations/us-central1/models/VCN6188818900239515648",
  "inputConfig": {
    "gcsSource": {
      "inputUris": [
        "gs://migration-ucaip-trainingaip-20210228225744/test.csv"
      ]
    }
  },
  "outputConfig": {
    "gcsDestination": {
      "outputUriPrefix": "gs://migration-ucaip-trainingaip-20210228225744/batch_output/"
    }
  }
}
```

叫声

In [None]:
request = clients["prediction"].batch_predict(
    request=batch_prediction
)


### 回应

In [None]:
result = request.result()

print(MessageToJson(result.__dict__["_pb"]))


*示例输出*：

```
{}
```

清理工作

要清理此项目中使用的所有GCP资源，您可以删除用于本教程的[GCP项目](https://cloud.google.com/resource-manager/docs/creating-managing-projects#shutting_down_projects)。

否则，您可以删除在本教程中创建的单个资源。

In [None]:
delete_dataset = True
delete_model = True
delete_bucket = True

# Delete the dataset using the AutoML fully qualified identifier for the dataset
try:
    if delete_dataset:
        clients['automl'].delete_dataset(name=dataset_id)
except Exception as e:
    print(e)

# Delete the model using the AutoML fully qualified identifier for the model
try:
    if delete_model:
        clients['automl'].delete_model(name=model_id)
except Exception as e:
    print(e)

if delete_bucket and 'BUCKET_NAME' in globals():
    ! gsutil rm -r gs://$BUCKET_NAME
