In [None]:
# Copyright 2021 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


# AI Platform Custom Training Custom Container TF Keras

## Installation


Install the Google *cloud-storage* library as well.

In [None]:
! pip3 install google-cloud-storage


### Restart the Kernel

Once you've installed the AI Platform (Unified) SDK and Google *cloud-storage*, you need to restart the notebook kernel so it can find the packages.

In [None]:
import os


if not os.getenv("AUTORUN"):
    # Automatically restart kernel after installs
    import IPython
    app = IPython.Application.instance()
    app.kernel.do_shutdown(True)
    

#### Project ID

**If you don't know your project ID**, try to get your project ID using `gcloud` command by executing the second cell below.

In [None]:
PROJECT_ID = "[your-project-id]" #@param {type:"string"}


In [None]:
if PROJECT_ID == "" or PROJECT_ID is None or PROJECT_ID == "[your-project-id]":
    # Get your GCP project id from gcloud
    shell_output = !gcloud config list --format 'value(core.project)' 2>/dev/null
    PROJECT_ID = shell_output[0]
    print("Project ID:", PROJECT_ID)
    

In [None]:
! gcloud config set project $PROJECT_ID


#### Region

You can also change the `REGION` variable, which is used for operations
throughout the rest of this notebook.  Below are regions supported forAutoML. We recommend when possible, to choose the region closest to you. 

Currently project resources must be in the `us-central1` region to use this API.

In [None]:
REGION = 'us-central1' #@param {type: "string"}


#### Timestamp

If you are in a live tutorial session, you might be using a shared test account or project. To avoid name collisions between users on resources created, you create a timestamp for each instance session, and append onto the name of resources which will be created in this tutorial.

In [None]:
from datetime import datetime


TIMESTAMP = datetime.now().strftime("%Y%m%d%H%M%S")


### Authenticate your GCP account

**If you are using AutoML Notebooks**, your environment is already
authenticated. Skip this step.

*Note: If you are on an AutoML notebook and run the cell, the cell knows to skip executing the authentication steps.*



In [None]:
import os
import sys

# If you are running this notebook in Colab, run this cell and follow the
# instructions to authenticate your Google Cloud account. This provides access
# to your Cloud Storage bucket and lets you submit training jobs and prediction
# requests.

# If on AutoML, then don't execute this code
if not os.path.exists('/opt/deeplearning/metadata/env_version'):
    if 'google.colab' in sys.modules:
        from google.colab import auth as google_auth
        google_auth.authenticate_user()

    # If you are running this tutorial in a notebook locally, replace the string
    # below with the path to your service account key and run this cell to
    # authenticate your Google Cloud account.
    else:
        %env GOOGLE_APPLICATION_CREDENTIALS your_path_to_credentials.json

    # Log in to your account on Google Cloud
    ! gcloud auth login


### Create a Cloud Storage bucket

**The following steps are required, regardless of your notebook environment.**

This tutorial is designed to use training data that is in a public Cloud Storage bucket and a local Cloud Storage bucket for your batch predictions. You may alternatively use your own training data that you have stored in a local Cloud Storage bucket.

Set the name of your Cloud Storage bucket below. It must be unique across all Cloud Storage buckets. 

In [None]:
BUCKET_NAME = "[your-bucket-name]" #@param {type:"string"}


In [None]:
if BUCKET_NAME == "" or BUCKET_NAME is None or BUCKET_NAME == "[your-bucket-name]":
    BUCKET_NAME = PROJECT_ID + "aip-" + TIMESTAMP


**Only if your bucket doesn't already exist**: Run the following cell to create your Cloud Storage bucket.

In [None]:
! gsutil mb -l $REGION gs://$BUCKET_NAME
    

Finally, validate access to your Cloud Storage bucket by examining its contents:

In [None]:
! gsutil ls -al gs://$BUCKET_NAME
    

### Set up variables

Next, set up some variables used throughout the tutorial.
### Import libraries and define constants

In [None]:
import json
import time


from googleapiclient import discovery


#### AutoML constants

Setup up the following constants for AutoML:

- `PARENT`: The AutoM location root path for dataset, model and endpoint resources.

In [None]:
# AutoM location root path for your dataset, model and endpoint resources
PARENT = "projects/" + PROJECT_ID 


## AI Platform API Client

We use the Google APIs Client Library for Python to call the AI Platform Training and Prediction API without manually constructing HTTP requests.

In [None]:
cloudml = discovery.build("ml", "v1")


## Prepare trainer script and custom container

In [None]:
%%writefile cifar/Dockerfile

FROM gcr.io/deeplearning-platform-release/tf2-cpu.2-1
WORKDIR /root

WORKDIR /

# Copies the trainer code to the docker image.
COPY trainer /trainer

# Sets up the entry point to invoke the trainer.
ENTRYPOINT ["python", "-m", "trainer.task"]


In [None]:
# Add package information
! touch cifar/README.md

setup_cfg = "[egg_info]\n\
tag_build =\n\
tag_date = 0"
! echo "$setup_cfg" > cifar/setup.cfg

setup_py = "import setuptools\n\
# Requires TensorFlow Datasets\n\
setuptools.setup(\n\
    install_requires=[\n\
        'tensorflow_datasets==1.3.0',\n\
    ],\n\
    packages=setuptools.find_packages())" 
! echo "$setup_py" > cifar/setup.py

pkg_info = "Metadata-Version: 1.0\n\
Name: Custom Training CIFAR-10\n\
Version: 0.0.0\n\
Summary: Demonstration training script\n\
Home-page: www.google.com\n\
Author: Google\n\
Author-email: aferlitsch@google.com\n\
License: Public\n\
Description: Demo\n\
Platform: AI Platform (Unified)"
! echo "$pkg_info" > cifar/PKG-INFO

# Make the training subfolder
! mkdir cifar/trainer
! touch cifar/trainer/__init__.py


In [None]:
%%writefile cifar/trainer/task.py
import tensorflow_datasets as tfds
import tensorflow as tf
from tensorflow.python.client import device_lib
import argparse
import os
import sys

tfds.disable_progress_bar()

parser = argparse.ArgumentParser()
parser.add_argument('--model-dir', dest='model_dir',
                    default='/tmp/saved_model', type=str, help='Model dir.')
parser.add_argument('--lr', dest='lr',
                    default=0.01, type=float,
                    help='Learning rate.')
parser.add_argument('--epochs', dest='epochs',
                    default=10, type=int,
                    help='Number of epochs.')
parser.add_argument('--steps', dest='steps',
                    default=200, type=int,
                    help='Number of steps per epoch.')
parser.add_argument('--distribute', dest='distribute', type=str, default='single',
                    help='distributed training strategy')
args = parser.parse_args()

print('Python Version = {}'.format(sys.version))
print('TensorFlow Version = {}'.format(tf.__version__))
print('TF_CONFIG = {}'.format(os.environ.get('TF_CONFIG', 'Not found')))
print('DEVICES', device_lib.list_local_devices())

# Single Machine, single compute device
if args.distribute == 'single':
    if tf.test.is_gpu_available():
        strategy = tf.distribute.OneDeviceStrategy(device="/gpu:0")
    else:
        strategy = tf.distribute.OneDeviceStrategy(device="/cpu:0")
# Single Machine, multiple compute device
elif args.distribute == 'mirror':
    strategy = tf.distribute.MirroredStrategy()
# Multiple Machine, multiple compute device
elif args.distribute == 'multi':
    strategy = tf.distribute.experimental.MultiWorkerMirroredStrategy()

# Multi-worker configuration
print('num_replicas_in_sync = {}'.format(strategy.num_replicas_in_sync))

# Preparing dataset
BUFFER_SIZE = 10000
BATCH_SIZE = 64

def make_datasets_unbatched():
  # Scaling CIFAR10 data from (0, 255] to (0., 1.]
  def scale(image, label):
    image = tf.cast(image, tf.float32)
    image /= 255.0
    return image, label

  datasets, info = tfds.load(name='cifar10',
                            with_info=True,
                            as_supervised=True)
  return datasets['train'].map(scale).cache().shuffle(BUFFER_SIZE).repeat()


# Build the Keras model
def build_and_compile_cnn_model():
  model = tf.keras.Sequential([
      tf.keras.layers.Conv2D(32, 3, activation='relu', input_shape=(32, 32, 3)),
      tf.keras.layers.MaxPooling2D(),
      tf.keras.layers.Conv2D(32, 3, activation='relu'),
      tf.keras.layers.MaxPooling2D(),
      tf.keras.layers.Flatten(),
      tf.keras.layers.Dense(10, activation='softmax')
  ])
  model.compile(
      loss=tf.keras.losses.sparse_categorical_crossentropy,
      optimizer=tf.keras.optimizers.SGD(learning_rate=args.lr),
      metrics=['accuracy'])
  return model

# Train the model
NUM_WORKERS = strategy.num_replicas_in_sync
# Here the batch size scales up by number of workers since
# `tf.data.Dataset.batch` expects the global batch size.
GLOBAL_BATCH_SIZE = BATCH_SIZE * NUM_WORKERS
train_dataset = make_datasets_unbatched().batch(GLOBAL_BATCH_SIZE)

with strategy.scope():
  # Creation of dataset, and model building/compiling need to be within
  # `strategy.scope()`.
  model = build_and_compile_cnn_model()

model.fit(x=train_dataset, epochs=args.epochs, steps_per_epoch=args.steps)
model.save(args.model_dir)


In [None]:
TRAIN_IMAGE = f"gcr.io/{PROJECT_ID}/cifar_migration:v1"

! docker build cifar -t $TRAIN_IMAGE
! docker push $TRAIN_IMAGE


## Train a model

### [projects.jobs.create](https://cloud.google.com/ai-platform/training/docs/reference/rest/v1/projects.jobs/create)

#### Request

In [None]:
JOB_NAME = "custom_container_" + TIMESTAMP

TRAINING_INPUTS = {
    "scaleTier": "CUSTOM",
    "masterType": "n1-standard-4",
    "masterConfig": {
        "imageUri": TRAIN_IMAGE
    },
    "args": [
        "--model-dir=" + 'gs://{}/{}'.format(BUCKET_NAME, JOB_NAME),
        "--epochs=" + str(20),
        "--steps=" + str(100)
    ],
    "region": REGION
}

body = {"jobId": JOB_NAME, "trainingInput": TRAINING_INPUTS}

request = cloudml.projects().jobs().create(
    parent=PARENT
)
request.body = json.loads(json.dumps(TRAINING_INPUTS, indent=2))

print(json.dumps(json.loads(request.to_json()), indent=2))

request = cloudml.projects().jobs().create(
    parent=PARENT,
    body=body
)


*Example output*:
```
{
  "uri": "https://ml.googleapis.com/v1/projects/migration-ucaip-training/jobs?alt=json",
  "method": "POST",
  "body": {
    "scaleTier": "CUSTOM",
    "masterType": "n1-standard-4",
    "masterConfig": {
      "imageUri": "gcr.io/migration-ucaip-training/cifar_migration:v1"
    },
    "args": [
      "--model-dir=gs://migration-ucaip-trainingaip-20210325215916/custom_container_20210325215916",
      "--epochs=20",
      "--steps=100"
    ],
    "region": "us-central1"
  },
  "headers": {
    "accept": "application/json",
    "accept-encoding": "gzip, deflate",
    "user-agent": "(gzip)",
    "x-goog-api-client": "gdcl/1.12.8 gl-python/3.7.8"
  },
  "methodId": "ml.projects.jobs.create",
  "resumable": null,
  "response_callbacks": [],
  "_in_error_state": false,
  "body_size": 0,
  "resumable_uri": null,
  "resumable_progress": 0
}
```



#### Call

In [None]:
response = request.execute()


#### Response

In [None]:
print(json.dumps(response, indent=2))


*Example output*:
```
{
  "jobId": "custom_container_20210325215916",
  "trainingInput": {
    "scaleTier": "CUSTOM",
    "masterType": "n1-standard-4",
    "args": [
      "--model-dir=gs://migration-ucaip-trainingaip-20210325215916/custom_container_20210325215916",
      "--epochs=20",
      "--steps=100"
    ],
    "region": "us-central1",
    "masterConfig": {
      "imageUri": "gcr.io/migration-ucaip-training/cifar_migration:v1"
    }
  },
  "createTime": "2021-03-25T21:59:28Z",
  "state": "QUEUED",
  "trainingOutput": {},
  "etag": "DZ8rE8+ASE4="
}
```



In [None]:
# The full unique ID for the custom training job
custom_training_id = f'{PARENT}/jobs/{response["jobId"]}'
# The short numeric ID for the custom training job
custom_training_short_id = response["jobId"]

print(custom_training_id)


### [projects.jobs.get](https://cloud.google.com/ai-platform/training/docs/reference/rest/v1/projects.jobs/get)

#### Response

In [None]:
request = cloudml.projects().jobs().get(
    name=custom_training_id
)

response = request.execute()


#### Response

In [None]:
print(json.dumps(response, indent=2))


*Example output*:
```
{
  "jobId": "custom_container_20210325215916",
  "trainingInput": {
    "scaleTier": "CUSTOM",
    "masterType": "n1-standard-4",
    "args": [
      "--model-dir=gs://migration-ucaip-trainingaip-20210325215916/custom_container_20210325215916",
      "--epochs=20",
      "--steps=100"
    ],
    "region": "us-central1",
    "masterConfig": {
      "imageUri": "gcr.io/migration-ucaip-training/cifar_migration:v1"
    }
  },
  "createTime": "2021-03-25T21:59:28Z",
  "state": "PREPARING",
  "trainingOutput": {},
  "etag": "Nn3P/Dd/c9A="
}
```



In [None]:
while True:
    response = cloudml.projects().jobs().get(name=custom_training_id).execute()
    if response["state"] != "SUCCEEDED":
        print("Training job has not completed:", response["state"])
        if response["state"] == "FAILED":
            break
    else:
        break
    time.sleep(20)

# model artifact output directory on Google Cloud Storage  
model_artifact_dir =response["trainingInput"]["args"][0].split("=")[-1]
print("artifact location  " + model_artifact_dir)


### Serving function for trained model (image data)

In [None]:
import tensorflow as tf


model = tf.keras.models.load_model(model_artifact_dir)


In [None]:
CONCRETE_INPUT = "numpy_inputs"

def _preprocess(bytes_input):
    decoded = tf.io.decode_jpeg(bytes_input, channels=3)
    decoded = tf.image.convert_image_dtype(decoded, tf.float32)
    resized = tf.image.resize(decoded, size=(32, 32))
    rescale = tf.cast(resized / 255.0, tf.float32)
    return rescale

@tf.function(input_signature=[tf.TensorSpec([None], tf.string)])
def preprocess_fn(bytes_inputs):
    decoded_images = tf.map_fn(_preprocess, bytes_inputs, dtype=tf.float32, back_prop=False)
    return {CONCRETE_INPUT: decoded_images}  # User needs to make sure the key matches model's input

m_call = tf.function(model.call).get_concrete_function(
    [tf.TensorSpec(shape=[None, 32, 32, 3], dtype=tf.float32, name=CONCRETE_INPUT)]
)

@tf.function(input_signature=[tf.TensorSpec([None], tf.string), tf.TensorSpec([None], tf.string)])
def serving_fn(bytes_inputs, key):
    images = preprocess_fn(bytes_inputs)
    prob = m_call(**images)
    return {"prediction": prob, "key":key}

tf.saved_model.save(model, model_artifact_dir, signatures={'serving_default': serving_fn})


In [None]:
loaded = tf.saved_model.load(model_artifact_dir)

tensors_specs = list(loaded.signatures['serving_default'].structured_input_signature)
print('Tensors specs:', tensors_specs)

input_name = [ v for k, v in tensors_specs[1].items() if k != "key"][0].name
print('Bytes input tensor name:', input_name)


*Example output*:
```
Tensors specs: [(), {'bytes_inputs': TensorSpec(shape=(None,), dtype=tf.string, name='bytes_inputs'), 'key': TensorSpec(shape=(None,), dtype=tf.string, name='key')}]
Bytes input tensor name: bytes_inputs
```



## Make batch predictions

### Prepare files for batch prediction

In [None]:
import base64
import cv2
import json
import numpy as np
import tensorflow as tf


(_, _), (x_test, y_test) = tf.keras.datasets.cifar10.load_data()

test_image_1, test_label_1 = x_test[0], y_test[0]
test_image_2, test_label_2 = x_test[1], y_test[1]

cv2.imwrite('tmp1.jpg', (test_image_1 * 255).astype(np.uint8))
cv2.imwrite('tmp2.jpg', (test_image_2 * 255).astype(np.uint8))

gcs_input_uri = "gs://" + BUCKET_NAME + "/" + "test.json"
with tf.io.gfile.GFile(gcs_input_uri, 'w') as f:
    for img in ["tmp1.jpg", "tmp2.jpg"]:
        bytes = tf.io.read_file(img)
        b64str = base64.b64encode(bytes.numpy()).decode('utf-8')
        f.write(json.dumps({"key": img, input_name: {"b64":  b64str}}) + '\n')
    
! gsutil cat $gcs_input_uri


*Example output*:
```
{"key": "tmp1.jpg", "bytes_inputs": {"b64": "/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAIBAQEBAQIBAQECAgICAgQDAgICAgUEBAMEBgUGBgYFBgYGBwkIBgcJBwYGCAsICQoKCgoKBggLDAsKDAkKCgr/2wBDAQICAgICAgUDAwUKBwYHCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgr/wAARCAAgACADASIAAhEBAxEB/8QAHwAAAQUBAQEBAQEAAAAAAAAAAAECAwQFBgcICQoL/8QAtRAAAgEDAwIEAwUFBAQAAAF9AQIDAAQRBRIhMUEGE1FhByJxFDKBkaEII0KxwRVS0fAkM2JyggkKFhcYGRolJicoKSo0NTY3ODk6Q0RFRkdISUpTVFVWV1hZWmNkZWZnaGlqc3R1dnd4eXqDhIWGh4iJipKTlJWWl5iZmqKjpKWmp6ipqrKztLW2t7i5usLDxMXGx8jJytLT1NXW19jZ2uHi4+Tl5ufo6erx8vP09fb3+Pn6/8QAHwEAAwEBAQEBAQEBAQAAAAAAAAECAwQFBgcICQoL/8QAtREAAgECBAQDBAcFBAQAAQJ3AAECAxEEBSExBhJBUQdhcRMiMoEIFEKRobHBCSMzUvAVYnLRChYkNOEl8RcYGRomJygpKjU2Nzg5OkNERUZHSElKU1RVVldYWVpjZGVmZ2hpanN0dXZ3eHl6goOEhYaHiImKkpOUlZaXmJmaoqOkpaanqKmqsrO0tba3uLm6wsPExcbHyMnK0tPU1dbX2Nna4uPk5ebn6Onq8vP09fb3+Pn6/9oADAMBAAIRAxEAPwD6E1zw/qemaZY669mkdtqsTPZTpMH85Y3KMcKeOR36444NZGj2/ibWPHaaPeHSLXRbq3jSw1O7u3V3u9zb0ZAh+QIFO4EkliCBjnwv9lfxtrviTxBbW974le/0nQ/h5ohms7m4b92bhVlkEfPIDuwJ6gyADgCuWh1fxP8As6/tGad8H5PiRrHjW6tNd1O/iXUr5Z7mx0uSZlinHODiRQCqrgGTGPmwPyqfClGlnM6Em3TSi/N3Wtnto015H6y+MK08kp14QSqScle6tFxel0+6aZ9d6/rvhXwH4407wWtq+uSXth9pa5jcwKUBIbyxzkL0Ock8nHQV2x0NtN0Gw8a6PDOunXc3liO5GZIGxwG6YBxx1x0zkV4L8Xfij4k8X/Gr4V+HdJtDpdgui3GoajJBAXlkuGvNoUEDcD5MYyuN3zEnpX0B4Q+Iunafdap8OPFCG/sL+PzLkGNgbQB1O7Jxh1JOCOvHXNfUYrh/LPqMo0oKDgvdl10117nzGD4izR5hGdWcp8zs4+umisflx8DNXi/Z/wDHviPTfiP4g+x2WieFtV03U5r9miLw2ilonTIySWijZCB6Yr2X4R/tQT/tC/s56f8AGn4C/AvxTrXiq7jksW1G78NxRlNiRxIrzO5EwiVHAePAfeoO1lIrqv2pf2Xz+1t+z3feC9E1GLSvE2paQtraa1cISXiEqu9tKVydrbMZ5Kkg8jIr234a/Bq7+EngjQPAng3wzB/ZOl6ZFa2tpp/yeWiqFB2Hq2ASeuTz15r9ixHBa+vSp1JXpxXuy6vyfpbXuz8jocUyWCVSirTb1j09V95e+E3hnwXr8dn8QPjLaSWZBguP+EcudKSW6gnSMfLHOrcQh2djCSAxY5BxkzfEDx1H4n8ZyvpEC2WnMAwighMe8hvl3gZyQCB15K5xWNq3iKbVNVk8MW91NZzxLllkt9jL2z0I/DrXCeG47T4seNL3wN4c1nULKPTY2GoX8YYNcSkfKisxwis2ASMnk9AK7f8AiHuQ47CulWlKzfM7S5W+vRfgZQ47zvA4qNako3irK8eZLpfVn//Z"}}
{"key": "tmp2.jpg", "bytes_inputs": {"b64": "/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAIBAQEBAQIBAQECAgICAgQDAgICAgUEBAMEBgUGBgYFBgYGBwkIBgcJBwYGCAsICQoKCgoKBggLDAsKDAkKCgr/2wBDAQICAgICAgUDAwUKBwYHCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgr/wAARCAAgACADASIAAhEBAxEB/8QAHwAAAQUBAQEBAQEAAAAAAAAAAAECAwQFBgcICQoL/8QAtRAAAgEDAwIEAwUFBAQAAAF9AQIDAAQRBRIhMUEGE1FhByJxFDKBkaEII0KxwRVS0fAkM2JyggkKFhcYGRolJicoKSo0NTY3ODk6Q0RFRkdISUpTVFVWV1hZWmNkZWZnaGlqc3R1dnd4eXqDhIWGh4iJipKTlJWWl5iZmqKjpKWmp6ipqrKztLW2t7i5usLDxMXGx8jJytLT1NXW19jZ2uHi4+Tl5ufo6erx8vP09fb3+Pn6/8QAHwEAAwEBAQEBAQEBAQAAAAAAAAECAwQFBgcICQoL/8QAtREAAgECBAQDBAcFBAQAAQJ3AAECAxEEBSExBhJBUQdhcRMiMoEIFEKRobHBCSMzUvAVYnLRChYkNOEl8RcYGRomJygpKjU2Nzg5OkNERUZHSElKU1RVVldYWVpjZGVmZ2hpanN0dXZ3eHl6goOEhYaHiImKkpOUlZaXmJmaoqOkpaanqKmqsrO0tba3uLm6wsPExcbHyMnK0tPU1dbX2Nna4uPk5ebn6Onq8vP09fb3+Pn6/9oADAMBAAIRAxEAPwD8V5UubRQlxlSvDAtyD6dadbW91fK8lrFI6o6KzrnALHCj8cH8jX3J+1V+wR8adOjsrDxR8EPhzohsg13qKfD+zddRWBF2u0sR42AnIzjJAAzzXmnwh/Yk+D3jX4Q6h478cftgaX4Al/tR4f8AhHdf0eRruVI+IpdkbFiWLsAqgnrXZLBVFWcI6/gc0MVSlSU2eZaX+zdr954Nv/EEt7FNeWyrJHZ2moRn93tYsTuwcg7OBz19q8sa7AUEMf8AvqvoHwX+yz8Vb74gXtn4M+Euq/EbSYpV+y6vf2txptrMOAz+XIysR0xu9M4qf9pn9mf4jJoNprJ+BGgeCn0mHZfQ2OqRl793fAZUDkkAbcd8k1pUw1OUE6e/bf8AEVOs1JqT3P19/aT/AOCMf7RH7Qfx5134zeNf2z7S18Q+PkSWWDSb6406BrSMFYrWNCCAsakDbnOSSeTXg+sf8G3viHwt49ez1jxdY6zqds1veTwT+MzBdqJWnWCYb0DhXe3n2sOGMD4J2HH7IfD3xnc/EPwl4Y8R6t458M28y+EL1NRh1nS3vGXV3a1+w3S4mjCwxxpdCaFSjTNLGRImwk+A6f8AAL9oH4gaX4+tf+Ckn7Vfw4+I2k3fiW6m+HOneFNPn0WDw9piTLLbuUiYGWZsCNYp/tMtqiSbL+b7RMrqvWxVDKamZ89BOg03Q9+deupOpBRotU1CM4OMak/aSUIxkouTbUjmllc0qic60XrGNldX/dtNr/n2+aS5r3XI3ytKz+Jof+CN2r6LYHU/ibqOo2iQzFmmn8eXLfugMbDhwMcdeprg/iV+zX+zx8O9Mu9f8NaRplw9oSr6g0sl0BgdBNMzZ+i9K+svi9P+yv8ADAnRfhl4MfxNdhSDe63fzS2sJHdYpHbfjtu/KvhL9ub4tarruhy2JvJMsdjJFGFj28gKqrgKo9B6VhlvEGMzfDxm8M6N+kpRlJeT5dE/mwoZDiMO+evVb8j/2Q=="}}
```



### [projects.jobs.create](https://cloud.google.com/ai-platform/prediction/docs/reference/rest/v1/projects.jobs/create)

#### Request

In [None]:
body = {
    'jobId': "custom_container_pred_" + TIMESTAMP,
    'predictionInput': {
        'dataFormat': 'JSON',
        'inputPaths': gcs_input_uri,
        'outputPath': "gs://" + f"{BUCKET_NAME}/batch_output/",
        "runtime_version": "2.1",
        "uri": model_artifact_dir,
        'region': REGION
    }
}

request = cloudml.projects().jobs().create(
        parent=PARENT
)
request.body = json.loads(json.dumps(body, indent=2))

print(json.dumps(json.loads(request.to_json()), indent=2))

request = cloudml.projects().jobs().create(
    parent=PARENT,
    body=body
)


*Example output*:
```
{
  "uri": "https://ml.googleapis.com/v1/projects/migration-ucaip-training/jobs?alt=json",
  "method": "POST",
  "body": {
    "jobId": "custom_container_pred_20210325215916",
    "predictionInput": {
      "dataFormat": "JSON",
      "inputPaths": "gs://migration-ucaip-trainingaip-20210325215916/test.json",
      "outputPath": "gs://migration-ucaip-trainingaip-20210325215916/batch_output/",
      "runtime_version": "2.1",
      "uri": "gs://migration-ucaip-trainingaip-20210325215916/custom_container_20210325215916",
      "region": "us-central1"
    }
  },
  "headers": {
    "accept": "application/json",
    "accept-encoding": "gzip, deflate",
    "user-agent": "(gzip)",
    "x-goog-api-client": "gdcl/1.12.8 gl-python/3.7.8"
  },
  "methodId": "ml.projects.jobs.create",
  "resumable": null,
  "response_callbacks": [],
  "_in_error_state": false,
  "body_size": 0,
  "resumable_uri": null,
  "resumable_progress": 0
}
```



#### Call

In [None]:
response = request.execute()


#### Response

In [None]:
print(json.dumps(response, indent=2))


*Example output*:
```
{
  "jobId": "custom_container_pred_20210325215916",
  "predictionInput": {
    "dataFormat": "JSON",
    "inputPaths": [
      "gs://migration-ucaip-trainingaip-20210325215916/test.json"
    ],
    "outputPath": "gs://migration-ucaip-trainingaip-20210325215916/batch_output/",
    "region": "us-central1",
    "runtimeVersion": "2.1",
    "uri": "gs://migration-ucaip-trainingaip-20210325215916/custom_container_20210325215916",
    "framework": "TENSORFLOW"
  },
  "createTime": "2021-03-25T22:15:15Z",
  "state": "QUEUED",
  "predictionOutput": {
    "outputPath": "gs://migration-ucaip-trainingaip-20210325215916/batch_output/"
  },
  "etag": "GNq2pYok7CI="
}
```



In [None]:
# The full unique ID for the batch prediction job
batch_job_id = PARENT + "/jobs/" + response["jobId"]

print(batch_job_id)


### [projects.jobs.get](https://cloud.google.com/ai-platform/prediction/docs/reference/rest/v1/projects.jobs/get)

#### Call

In [None]:
request = cloudml.projects().jobs().get(
        name=batch_job_id
)

response = request.execute()


#### Response

In [None]:
print(json.dumps(response, indent=2))


*Example output*:
```
{
  "jobId": "custom_container_pred_20210325215916",
  "predictionInput": {
    "dataFormat": "JSON",
    "inputPaths": [
      "gs://migration-ucaip-trainingaip-20210325215916/test.json"
    ],
    "outputPath": "gs://migration-ucaip-trainingaip-20210325215916/batch_output/",
    "region": "us-central1",
    "runtimeVersion": "2.1",
    "uri": "gs://migration-ucaip-trainingaip-20210325215916/custom_container_20210325215916",
    "framework": "TENSORFLOW"
  },
  "createTime": "2021-03-25T22:15:15Z",
  "state": "QUEUED",
  "predictionOutput": {
    "outputPath": "gs://migration-ucaip-trainingaip-20210325215916/batch_output/"
  },
  "etag": "Sxnlx4MEtTo="
}
```



In [None]:
while True:
    response = request = cloudml.projects().jobs().get(name=batch_job_id).execute()
    if response["state"] != "SUCCEEDED":
        print("The job has not completed:", response["state"])
        if response["state"] == "FAILED":
            break
    else:
        folder = response["predictionInput"]["outputPath"][:-1]
        ! gsutil ls $folder/prediction*

        ! gsutil cat $folder/prediction*
        break
    time.sleep(60)


*Example output*:
```
gs://migration-ucaip-trainingaip-20210325215916/batch_output/prediction.errors_stats-00000-of-00001
gs://migration-ucaip-trainingaip-20210325215916/batch_output/prediction.results-00000-of-00001
{"prediction": [0.04647013917565346, 0.06366760283708572, 0.1313525140285492, 0.11146997660398483, 0.1568831354379654, 0.09669718891382217, 0.18583332002162933, 0.10817062109708786, 0.03371051326394081, 0.06574499607086182], "key": "tmp1.jpg"}
{"prediction": [0.04657613858580589, 0.06360984593629837, 0.13138002157211304, 0.11128606647253036, 0.15718042850494385, 0.096551313996315, 0.1853194385766983, 0.10867659002542496, 0.03375411406159401, 0.06566616892814636], "key": "tmp2.jpg"}
```



## Make online predictions

### Deploy the model

### [projects.models.create](https://cloud.google.com/ai-platform/prediction/docs/reference/rest/v1/projects.models/create)

#### Request

In [None]:
request = cloudml.projects().models().create(
    parent=PARENT
)
request.body = json.loads(json.dumps({"name": "custom_container_" + TIMESTAMP}, indent=2))

print(json.dumps(json.loads(request.to_json()), indent=2))

request = cloudml.projects().models().create(
    parent=PARENT,
    body={
        "name": "custom_container_" + TIMESTAMP
    }
)


*Example output*:
```
{
  "uri": "https://ml.googleapis.com/v1/projects/migration-ucaip-training/models?alt=json",
  "method": "POST",
  "body": {
    "name": "custom_container_20210325215916"
  },
  "headers": {
    "accept": "application/json",
    "accept-encoding": "gzip, deflate",
    "user-agent": "(gzip)",
    "x-goog-api-client": "gdcl/1.12.8 gl-python/3.7.8"
  },
  "methodId": "ml.projects.models.create",
  "resumable": null,
  "response_callbacks": [],
  "_in_error_state": false,
  "body_size": 0,
  "resumable_uri": null,
  "resumable_progress": 0
}
```



#### Call

In [None]:
response = request.execute()


#### Response

In [None]:
print(json.dumps(response, indent=2))


*Example output*:
```
{
  "name": "projects/migration-ucaip-training/models/custom_container_20210325215916",
  "regions": [
    "us-central1"
  ],
  "etag": "gBP35vWqHPE="
}
```



In [None]:
# The full unique ID for the training pipeline
model_id = response["name"]
# The short numeric ID for the training pipeline
model_short_name = model_id.split("/")[-1]

print(model_id)


### [projects.models.versions.create](https://cloud.google.com/ai-platform/prediction/docs/reference/rest/v1/projects.models.versions/create)

#### Request

In [None]:
version = {
  "name": "custom_container_" + TIMESTAMP,
  "deploymentUri": model_artifact_dir,
  "runtimeVersion": "2.1",
  "framework": "TENSORFLOW",
  "pythonVersion": "3.7",
  "machineType": "mls1-c1-m2"
}

request = cloudml.projects().models().versions().create(
    parent=response["name"]
)
request.body = json.loads(json.dumps(version, indent=2))

print(json.dumps(json.loads(request.to_json()), indent=2))

request = cloudml.projects().models().versions().create(
    parent=response["name"],
    body=version
)


*Example output*:
```
{
  "uri": "https://ml.googleapis.com/v1/projects/migration-ucaip-training/models/custom_container_20210325215916/versions?alt=json",
  "method": "POST",
  "body": {
    "name": "custom_container_20210325215916",
    "deploymentUri": "gs://migration-ucaip-trainingaip-20210325215916/custom_container_20210325215916",
    "runtimeVersion": "2.1",
    "framework": "TENSORFLOW",
    "pythonVersion": "3.7",
    "machineType": "mls1-c1-m2"
  },
  "headers": {
    "accept": "application/json",
    "accept-encoding": "gzip, deflate",
    "user-agent": "(gzip)",
    "x-goog-api-client": "gdcl/1.12.8 gl-python/3.7.8"
  },
  "methodId": "ml.projects.models.versions.create",
  "resumable": null,
  "response_callbacks": [],
  "_in_error_state": false,
  "body_size": 0,
  "resumable_uri": null,
  "resumable_progress": 0
}
```



#### Call

In [None]:
response = request.execute()


#### Response

In [None]:
print(json.dumps(response, indent=2))


*Example output*:
```
{
  "name": "projects/migration-ucaip-training/operations/create_custom_container_20210325215916_custom_container_20210325215916-1616710881327",
  "metadata": {
    "@type": "type.googleapis.com/google.cloud.ml.v1.OperationMetadata",
    "createTime": "2021-03-25T22:21:21Z",
    "operationType": "CREATE_VERSION",
    "modelName": "projects/migration-ucaip-training/models/custom_container_20210325215916",
    "version": {
      "name": "projects/migration-ucaip-training/models/custom_container_20210325215916/versions/custom_container_20210325215916",
      "deploymentUri": "gs://migration-ucaip-trainingaip-20210325215916/custom_container_20210325215916",
      "createTime": "2021-03-25T22:21:21Z",
      "runtimeVersion": "2.1",
      "etag": "d2zy+bRwFOw=",
      "framework": "TENSORFLOW",
      "machineType": "mls1-c1-m2",
      "pythonVersion": "3.7"
    }
  }
}
```



In [None]:
# The full unique ID for the model version
model_version_name = response["metadata"]["version"]["name"]

print(model_version_name)


In [None]:
while True:
    response = cloudml.projects().models().versions().get(
        name=model_version_name
    ).execute()
    if response["state"] == "READY":
        print("Model version created.")
        break
    time.sleep(60)


### Prepare input for online prediction

In [None]:
import base64
import cv2
import json
import tensorflow as tf


(_, _), (x_test, y_test) = tf.keras.datasets.cifar10.load_data()

test_image_1, test_label_1 = x_test[0], y_test[0]
test_image_2, test_label_2 = x_test[1], y_test[1]

cv2.imwrite('tmp1.jpg', (test_image_1 * 255).astype(np.uint8))
cv2.imwrite('tmp2.jpg', (test_image_2 * 255).astype(np.uint8))


### [projects.predict](https://cloud.google.com/ai-platform/prediction/docs/reference/rest/v1/projects/predict)

#### Request

In [None]:


instances_list = []
for img in ["tmp1.jpg", "tmp2.jpg"]:
    bytes = tf.io.read_file(img)
    b64str = base64.b64encode(bytes.numpy()).decode('utf-8')
    instances_list.append({
        'key': img, 
        input_name: {
            'b64': b64str
        }
    })

request = cloudml.projects().predict(name=model_version_name)
request.body = json.loads(json.dumps({'instances': instances_list}, indent=2))

print(json.dumps(json.loads(request.to_json()), indent=2))

request = cloudml.projects().predict(
    name=model_version_name,
    body={'instances': instances_list}
)


*Example output*:
```
{
  "uri": "https://ml.googleapis.com/v1/projects/migration-ucaip-training/models/custom_container_20210325215916/versions/custom_container_20210325215916:predict?alt=json",
  "method": "POST",
  "body": {
    "instances": [
      {
        "key": "tmp1.jpg",
        "bytes_inputs": {
          "b64": "/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAIBAQEBAQIBAQECAgICAgQDAgICAgUEBAMEBgUGBgYFBgYGBwkIBgcJBwYGCAsICQoKCgoKBggLDAsKDAkKCgr/2wBDAQICAgICAgUDAwUKBwYHCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgr/wAARCAAgACADASIAAhEBAxEB/8QAHwAAAQUBAQEBAQEAAAAAAAAAAAECAwQFBgcICQoL/8QAtRAAAgEDAwIEAwUFBAQAAAF9AQIDAAQRBRIhMUEGE1FhByJxFDKBkaEII0KxwRVS0fAkM2JyggkKFhcYGRolJicoKSo0NTY3ODk6Q0RFRkdISUpTVFVWV1hZWmNkZWZnaGlqc3R1dnd4eXqDhIWGh4iJipKTlJWWl5iZmqKjpKWmp6ipqrKztLW2t7i5usLDxMXGx8jJytLT1NXW19jZ2uHi4+Tl5ufo6erx8vP09fb3+Pn6/8QAHwEAAwEBAQEBAQEBAQAAAAAAAAECAwQFBgcICQoL/8QAtREAAgECBAQDBAcFBAQAAQJ3AAECAxEEBSExBhJBUQdhcRMiMoEIFEKRobHBCSMzUvAVYnLRChYkNOEl8RcYGRomJygpKjU2Nzg5OkNERUZHSElKU1RVVldYWVpjZGVmZ2hpanN0dXZ3eHl6goOEhYaHiImKkpOUlZaXmJmaoqOkpaanqKmqsrO0tba3uLm6wsPExcbHyMnK0tPU1dbX2Nna4uPk5ebn6Onq8vP09fb3+Pn6/9oADAMBAAIRAxEAPwD6E1zw/qemaZY669mkdtqsTPZTpMH85Y3KMcKeOR36444NZGj2/ibWPHaaPeHSLXRbq3jSw1O7u3V3u9zb0ZAh+QIFO4EkliCBjnwv9lfxtrviTxBbW974le/0nQ/h5ohms7m4b92bhVlkEfPIDuwJ6gyADgCuWh1fxP8As6/tGad8H5PiRrHjW6tNd1O/iXUr5Z7mx0uSZlinHODiRQCqrgGTGPmwPyqfClGlnM6Em3TSi/N3Wtnto015H6y+MK08kp14QSqScle6tFxel0+6aZ9d6/rvhXwH4407wWtq+uSXth9pa5jcwKUBIbyxzkL0Ock8nHQV2x0NtN0Gw8a6PDOunXc3liO5GZIGxwG6YBxx1x0zkV4L8Xfij4k8X/Gr4V+HdJtDpdgui3GoajJBAXlkuGvNoUEDcD5MYyuN3zEnpX0B4Q+Iunafdap8OPFCG/sL+PzLkGNgbQB1O7Jxh1JOCOvHXNfUYrh/LPqMo0oKDgvdl10117nzGD4izR5hGdWcp8zs4+umisflx8DNXi/Z/wDHviPTfiP4g+x2WieFtV03U5r9miLw2ilonTIySWijZCB6Yr2X4R/tQT/tC/s56f8AGn4C/AvxTrXiq7jksW1G78NxRlNiRxIrzO5EwiVHAePAfeoO1lIrqv2pf2Xz+1t+z3feC9E1GLSvE2paQtraa1cISXiEqu9tKVydrbMZ5Kkg8jIr234a/Bq7+EngjQPAng3wzB/ZOl6ZFa2tpp/yeWiqFB2Hq2ASeuTz15r9ixHBa+vSp1JXpxXuy6vyfpbXuz8jocUyWCVSirTb1j09V95e+E3hnwXr8dn8QPjLaSWZBguP+EcudKSW6gnSMfLHOrcQh2djCSAxY5BxkzfEDx1H4n8ZyvpEC2WnMAwighMe8hvl3gZyQCB15K5xWNq3iKbVNVk8MW91NZzxLllkt9jL2z0I/DrXCeG47T4seNL3wN4c1nULKPTY2GoX8YYNcSkfKisxwis2ASMnk9AK7f8AiHuQ47CulWlKzfM7S5W+vRfgZQ47zvA4qNako3irK8eZLpfVn//Z"
        }
      },
      {
        "key": "tmp2.jpg",
        "bytes_inputs": {
          "b64": "/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAIBAQEBAQIBAQECAgICAgQDAgICAgUEBAMEBgUGBgYFBgYGBwkIBgcJBwYGCAsICQoKCgoKBggLDAsKDAkKCgr/2wBDAQICAgICAgUDAwUKBwYHCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgr/wAARCAAgACADASIAAhEBAxEB/8QAHwAAAQUBAQEBAQEAAAAAAAAAAAECAwQFBgcICQoL/8QAtRAAAgEDAwIEAwUFBAQAAAF9AQIDAAQRBRIhMUEGE1FhByJxFDKBkaEII0KxwRVS0fAkM2JyggkKFhcYGRolJicoKSo0NTY3ODk6Q0RFRkdISUpTVFVWV1hZWmNkZWZnaGlqc3R1dnd4eXqDhIWGh4iJipKTlJWWl5iZmqKjpKWmp6ipqrKztLW2t7i5usLDxMXGx8jJytLT1NXW19jZ2uHi4+Tl5ufo6erx8vP09fb3+Pn6/8QAHwEAAwEBAQEBAQEBAQAAAAAAAAECAwQFBgcICQoL/8QAtREAAgECBAQDBAcFBAQAAQJ3AAECAxEEBSExBhJBUQdhcRMiMoEIFEKRobHBCSMzUvAVYnLRChYkNOEl8RcYGRomJygpKjU2Nzg5OkNERUZHSElKU1RVVldYWVpjZGVmZ2hpanN0dXZ3eHl6goOEhYaHiImKkpOUlZaXmJmaoqOkpaanqKmqsrO0tba3uLm6wsPExcbHyMnK0tPU1dbX2Nna4uPk5ebn6Onq8vP09fb3+Pn6/9oADAMBAAIRAxEAPwD8V5UubRQlxlSvDAtyD6dadbW91fK8lrFI6o6KzrnALHCj8cH8jX3J+1V+wR8adOjsrDxR8EPhzohsg13qKfD+zddRWBF2u0sR42AnIzjJAAzzXmnwh/Yk+D3jX4Q6h478cftgaX4Al/tR4f8AhHdf0eRruVI+IpdkbFiWLsAqgnrXZLBVFWcI6/gc0MVSlSU2eZaX+zdr954Nv/EEt7FNeWyrJHZ2moRn93tYsTuwcg7OBz19q8sa7AUEMf8AvqvoHwX+yz8Vb74gXtn4M+Euq/EbSYpV+y6vf2txptrMOAz+XIysR0xu9M4qf9pn9mf4jJoNprJ+BGgeCn0mHZfQ2OqRl793fAZUDkkAbcd8k1pUw1OUE6e/bf8AEVOs1JqT3P19/aT/AOCMf7RH7Qfx5134zeNf2z7S18Q+PkSWWDSb6406BrSMFYrWNCCAsakDbnOSSeTXg+sf8G3viHwt49ez1jxdY6zqds1veTwT+MzBdqJWnWCYb0DhXe3n2sOGMD4J2HH7IfD3xnc/EPwl4Y8R6t458M28y+EL1NRh1nS3vGXV3a1+w3S4mjCwxxpdCaFSjTNLGRImwk+A6f8AAL9oH4gaX4+tf+Ckn7Vfw4+I2k3fiW6m+HOneFNPn0WDw9piTLLbuUiYGWZsCNYp/tMtqiSbL+b7RMrqvWxVDKamZ89BOg03Q9+deupOpBRotU1CM4OMak/aSUIxkouTbUjmllc0qic60XrGNldX/dtNr/n2+aS5r3XI3ytKz+Jof+CN2r6LYHU/ibqOo2iQzFmmn8eXLfugMbDhwMcdeprg/iV+zX+zx8O9Mu9f8NaRplw9oSr6g0sl0BgdBNMzZ+i9K+svi9P+yv8ADAnRfhl4MfxNdhSDe63fzS2sJHdYpHbfjtu/KvhL9ub4tarruhy2JvJMsdjJFGFj28gKqrgKo9B6VhlvEGMzfDxm8M6N+kpRlJeT5dE/mwoZDiMO+evVb8j/2Q=="
        }
      }
    ]
  },
  "headers": {
    "accept": "application/json",
    "accept-encoding": "gzip, deflate",
    "user-agent": "(gzip)",
    "x-goog-api-client": "gdcl/1.12.8 gl-python/3.7.8"
  },
  "methodId": "ml.projects.predict",
  "resumable": null,
  "response_callbacks": [],
  "_in_error_state": false,
  "body_size": 0,
  "resumable_uri": null,
  "resumable_progress": 0
}
```



#### Call

In [None]:
response = request.execute()


#### Response

In [None]:
print(json.dumps(response, indent=2))


*Example output*:
```
{
  "predictions": [
    {
      "key": "tmp1.jpg",
      "prediction": [
        0.04647013917565346,
        0.06366760283708572,
        0.1313525289297104,
        0.11146996915340424,
        0.15688316524028778,
        0.09669718891382217,
        0.18583332002162933,
        0.10817062109708786,
        0.03371051698923111,
        0.06574499607086182
      ]
    },
    {
      "key": "tmp2.jpg",
      "prediction": [
        0.04657613858580589,
        0.06360984593629837,
        0.13138002157211304,
        0.11128604412078857,
        0.15718042850494385,
        0.09655129164457321,
        0.1853194385766983,
        0.10867657512426376,
        0.03375410661101341,
        0.06566616147756577
      ]
    }
  ]
}
```



### [projects.models.versions.delete](https://cloud.google.com/ai-platform/prediction/docs/reference/rest/v1/projects.models.versions/delete)

#### Call

In [None]:
request = cloudml.projects().models().versions().delete(
    name=model_version_name
)

response = request.execute()


#### Response



In [None]:
print(json.dumps(response, indent=2))


*Example output*:
```
{
  "name": "projects/migration-ucaip-training/operations/delete_custom_container_20210325215916_custom_container_20210325215916-1616710943615",
  "metadata": {
    "@type": "type.googleapis.com/google.cloud.ml.v1.OperationMetadata",
    "createTime": "2021-03-25T22:22:23Z",
    "operationType": "DELETE_VERSION",
    "modelName": "projects/migration-ucaip-training/models/custom_container_20210325215916",
    "version": {
      "name": "projects/migration-ucaip-training/models/custom_container_20210325215916/versions/custom_container_20210325215916",
      "deploymentUri": "gs://migration-ucaip-trainingaip-20210325215916/custom_container_20210325215916",
      "createTime": "2021-03-25T22:21:21Z",
      "runtimeVersion": "2.1",
      "state": "READY",
      "etag": "kfUhdXr8GRg=",
      "framework": "TENSORFLOW",
      "machineType": "mls1-c1-m2",
      "pythonVersion": "3.7"
    }
  }
}
```



# Cleanup

In [None]:
delete_model = True
delete_bucket = True

    
# Delete the model using the AI Platform (Unified) fully qualified identifier for the model
try:
    if delete_model:
        cloudml.projects().models().delete(
            name=model_id
        )
except Exception as e:
    print(e) 

if delete_bucket and 'BUCKET_NAME' in globals():
    ! gsutil rm -r gs://$BUCKET_NAME
