In [2]:
import os
import shutil
import numpy as np
import tensorflow as tf
from datetime import datetime
print(tf.__version__)

1.15.0


In [3]:
PROJECT = "qwiklabs-gcp-ml-49b827b781ab"  # Replace with your PROJECT
REGION = "us-central1"            # Choose an available region for Cloud MLE
TFVERSION = "1.14"                # TF version for CMLE to use

In [4]:
# 나중에 하이퍼파라미터로 쓸수 있는것.

BUCKET     = "qwiklabs-gcp-ml-49b827b781ab"  # Replace with your BUCKET
DATA_DIR   = "gs://{}/babyweight/preproc".format(BUCKET)
DATA_DIR   = "."
OUTPUT_DIR = "babyweight_trained_"+datetime.now().strftime('%Y%m%d_%H%M%S')
PATTERN    = ""
TRAIN_STEPS = 1000
BATCH_SIZE = 128
NNSIZE = [64,32,1]
NNSIZE = [128,64,32]
NEMBEDS = 6
SAVE_CHECKPOINTS_SECS = 30
KEEP_CHECKPOINT_MAX = 10
EVAL_SECS= 30

def parameter_display():
    print('>>>>> BUCKET                : {}'.format(BUCKET))
    print('>>>>> DATA_DIR              : {}'.format(DATA_DIR))
    print('>>>>> OUTPUT_DIR            : {}'.format(OUTPUT_DIR))
    print('>>>>> PATTERN               : {}'.format(PATTERN))
    print('>>>>> TRAIN_STEPS           : {}'.format(TRAIN_STEPS))
    print('>>>>> BATCH_SIZE            : {}'.format(BATCH_SIZE))
    print('>>>>> NNSIZE                : {}'.format(NNSIZE))
    print('>>>>> NEMBEDS               : {}'.format(NEMBEDS))
    print('>>>>> SAVE_CHECKPOINTS_SECS : {}'.format(SAVE_CHECKPOINTS_SECS))
    print('>>>>> KEEP_CHECKPOINT_MAX   : {}'.format(KEEP_CHECKPOINT_MAX))
    print('>>>>> EVAL_SECS             : {}'.format(EVAL_SECS))  

In [5]:
os.environ["BUCKET"] = BUCKET
os.environ["PROJECT"] = PROJECT
os.environ["REGION"] = REGION
os.environ["TFVERSION"] = TFVERSION

In [6]:
# Columns명 지정
CSV_COLUMNS = "weight_pounds,is_male,mother_age,plurality,gestation_weeks".split(',')
LABEL_COLUMN = "weight_pounds"
# Set default values for each CSV column
DEFAULTS = [[0.0], ["null"], [0.0], ["null"], [0.0]]

In [7]:
def add_engineered_features(features):
    features["dummy"] = features["mother_age"]
    
    return features

In [8]:
def read_dataset(data_dir, filename_pattern, mode, batch_size = 512):
    def _input_fn():
        def decode_csv(value_column):
            columns = tf.decode_csv(records = value_column, record_defaults = DEFAULTS)
            # Default feature
            features = dict(zip(CSV_COLUMNS, columns))
            # NEW: Add engineered features
            features = add_engineered_features(features)
            # Default label
            label = features.pop(LABEL_COLUMN)
            return features, label
    
        if PATTERN == "":
            file_path = "{}/{}*".format(data_dir, filename_pattern)
        else:
            file_path = "{}/{}*{}*".format(data_dir, filename_pattern, PATTERN)
        print('>>>>> data filename : {}'.format(file_path))
        
        # Create list of files that match pattern
        file_list = tf.gfile.Glob(filename = file_path)

        # Create dataset from file list
        dataset = (tf.data.TextLineDataset(filenames = file_list)  # Read text file
                     .map(map_func = decode_csv))  # Transform each elem by applying decode_csv fn

        if mode == tf.estimator.ModeKeys.TRAIN:
            num_epochs = None # indefinitely
            dataset = dataset.shuffle(buffer_size = 10 * batch_size)
        else:
            num_epochs = 1 # end-of-input after this

        dataset = dataset.repeat(count = num_epochs).batch(batch_size = batch_size)
        return dataset
    return _input_fn

In [9]:
def get_categorical_indicator(name, values):
    return tf.feature_column.indicator_column(
        categorical_column = tf.feature_column.categorical_column_with_vocabulary_list(key = name, vocabulary_list = values))

def get_feature_cols():
    # Vocabulary List
    voca_list_is_male = ["True","False","Unknown"]
    voca_list_plurality = ["Single(1)","Twins(2)","Triplets(3)","Quadruplets(4)","Quintuplets(5)","Multiple(2+)"]

    # Default Feature column
    fc_is_male = tf.feature_column.categorical_column_with_vocabulary_list(key="is_male", vocabulary_list=voca_list_is_male)
    fc_plurality = tf.feature_column.categorical_column_with_vocabulary_list(key="plurality", vocabulary_list=voca_list_plurality)
    fc_mother_age = tf.feature_column.numeric_column(key = "mother_age")
    fc_gestation_weeks = tf.feature_column.numeric_column(key = "gestation_weeks")
    # DNNRegressor모델인 경우에는, 아래의 함수를 써야 한다.
    # One-Hot Encoding을 해서 넣어야 하는거 같다.
    # fc_is_male   = get_categorical_indicator("is_male", voca_list_is_male)
    # fc_plurality = get_categorical_indicator("plurality", voca_list_plurality)

    
    # ADD Feature column
    fc_dummy = tf.feature_column.numeric_column(key = "dummy")

    # Bucketized columns
    fc_buckets_mother_age = tf.feature_column.bucketized_column(source_column = fc_mother_age, boundaries = np.arange(start = 15, stop = 45, step = 1).tolist())
    fc_buckets_gestation_weeks = tf.feature_column.bucketized_column(source_column = fc_gestation_weeks, boundaries = np.arange(start = 17, stop = 47, step = 1).tolist())
   
    # Embeded Feature columns
    crossed = tf.feature_column.crossed_column(keys=[fc_is_male,fc_plurality,fc_buckets_mother_age,fc_buckets_gestation_weeks], 
                                            hash_bucket_size = 20000)
    fc_embed = tf.feature_column.embedding_column(categorical_column = crossed, dimension = NEMBEDS)

    # Feature columns
    feature_columns = [fc_is_male,
                       fc_plurality,
                       fc_mother_age,
                       fc_gestation_weeks,
                       fc_dummy
                      ]
    
    # Sparse wide columns
    wide = [fc_is_male,fc_plurality,fc_buckets_mother_age,fc_buckets_gestation_weeks]
    
    #Deep colomns
    deep = [fc_mother_age,
            fc_gestation_weeks,
            fc_embed]
    
    return feature_columns, wide, deep

In [10]:
def serving_input_fn():
    feature_placeholders = {
        "is_male"        : tf.placeholder(dtype = tf.string,  shape = [None]),
        "mother_age"     : tf.placeholder(dtype = tf.float32, shape = [None]),
        "plurality"      : tf.placeholder(dtype = tf.string,  shape = [None]),
        "gestation_weeks": tf.placeholder(dtype = tf.float32, shape = [None])
    }
    
    features = add_engineered_features(feature_placeholders)
    
    # feature의 shape=(?,)이면, 아래 방법으로 차원을 증가시킨다.
    features = {
                key: tf.expand_dims(input = tensor, axis = -1)
                for key, tensor in feature_placeholders.items()
               }

    print(features["is_male"])
    return tf.estimator.export.ServingInputReceiver(features = features, receiver_tensors = feature_placeholders)

In [11]:
# BestExporter를 사용하게 되면, 아래 Function이 있어야 하며, compare_fn으로 사용한다.
def _accuracy_bigger(best_eval_result, current_eval_result):
    metric = 'accuracy'
    return best_eval_result[metric] < current_eval_result[metric]

In [None]:
def my_rmse(labels, predictions):
    pred_values = predictions["predictions"]
    return {
        "rmse": tf.metrics.root_mean_squared_error(
            labels=labels,
            predictions=pred_values
        )
    }

In [12]:
def train_and_evaluate(output_dir):
    parameter_display()
    
    feature_columns, wide, deep = get_feature_cols()
        
    run_config = tf.estimator.RunConfig(
        save_checkpoints_secs = SAVE_CHECKPOINTS_SECS,
        keep_checkpoint_max = KEEP_CHECKPOINT_MAX)

    estimator = tf.estimator.DNNLinearCombinedRegressor(
        model_dir = output_dir,
        linear_feature_columns = wide,
        dnn_feature_columns = deep,
        dnn_hidden_units = NNSIZE,
        config = run_config)
 
    estimator = tf.contrib.estimator.add_metrics(estimator, my_rmse)

    train_spec = tf.estimator.TrainSpec(
        input_fn = read_dataset(DATA_DIR, "train", mode = tf.estimator.ModeKeys.TRAIN, batch_size=BATCH_SIZE),
        max_steps = TRAIN_STEPS)
    
    Final_exporter = tf.estimator.FinalExporter('./exporter', serving_input_receiver_fn=serving_input_fn)
    exporters = [Final_exporter]
    
    eval_spec = tf.estimator.EvalSpec(
        input_fn = read_dataset(DATA_DIR, "eval", mode = tf.estimator.ModeKeys.EVAL, batch_size=BATCH_SIZE),
        steps = None,
        start_delay_secs = 60, # start evaluating after N seconds
        throttle_secs = EVAL_SECS,  # evaluate every N seconds
        exporters = exporters)
    
    tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec)

In [65]:
!rm -rf {OUTPUT_DIR}
train_and_evaluate(OUTPUT_DIR)

>>>>> BUCKET                : qwiklabs-gcp-ml-49b827b781ab
>>>>> DATA_DIR              : .
>>>>> OUTPUT_DIR            : babyweight_trained_20191130_051555
>>>>> PATTERN               : 
>>>>> TRAIN_STEPS           : 1000
>>>>> BATCH_SIZE            : 128
>>>>> NNSIZE                : [128, 64, 32]
>>>>> NEMBEDS               : 6
>>>>> SAVE_CHECKPOINTS_SECS : 30
>>>>> KEEP_CHECKPOINT_MAX   : 10
>>>>> EVAL_SECS             : 30
INFO:tensorflow:Using config: {'_experimental_max_worker_delay_secs': None, '_task_type': 'worker', '_master': '', '_save_checkpoints_steps': None, '_model_dir': 'babyweight_trained_20191130_051555', '_protocol': None, '_session_creation_timeout_secs': 7200, '_log_step_count_steps': 100, '_num_worker_replicas': 1, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7f7c98252358>, '_task_id': 0, '_tf_random_seed':

In [35]:
%%bash
touch babyweight/trainer/__init__.py

In [55]:
%%writefile babyweight/trainer/task.py
import argparse
import json
import os

from . import model

import tensorflow as tf


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--bucket",
        help="GCS path to data. We assume that data is in \
        gs://BUCKET/babyweight/preproc/",
        required=True
    )
    parser.add_argument(
        "--data_dir",
        help="train and eval data directory",
        required=True
    )
    parser.add_argument(
        "--output_dir",
        help="GCS location to write checkpoints and export models",
        required=True
    )
    parser.add_argument(
        "--pattern",
        help="data file pattern",
        default="of"
    )
    parser.add_argument(
        "--train_steps",
        help="Number of Train Step.",
        type=int,
        default=1000
    )
    parser.add_argument(
        "--batch_size",
        help="Number of examples to compute gradient over.",
        type=int,
        default=512
    )
    parser.add_argument(
        "--job-dir",
        help="this model ignores this field, but it is required by gcloud",
        default="junk"
    )
    parser.add_argument(
        "--nnsize",
        help="Hidden layer sizes to use for DNN feature columns -- provide \
        space-separated layers",
        nargs="+",
        type=int,
        default=[128, 32, 4]
    )
    parser.add_argument(
        "--nembeds",
        help="Embedding size of a cross of n key real-valued parameters",
        type=int,
        default=6
    )
    parser.add_argument(
        "--keep_checkpoints_max",
        help="",
        type=int,
        default=10
    )

    # Parse arguments
    args = parser.parse_args()
    arguments = args.__dict__

    # Pop unnecessary args needed for gcloud
    arguments.pop("job-dir", None)

    # Assign the arguments to the model variables
    output_dir                  = arguments.pop("output_dir")
    model.OUTPUT_DIR            = output_dir
    model.BUCKET                = arguments.pop("bucket")
    model.DATA_DIR              = arguments.pop("data_dir")
    model.PATTERN               = arguments.pop("pattern")
    model.TRAIN_STEPS           = arguments.pop("train_steps")
    model.BATCH_SIZE            = arguments.pop("batch_size")
    model.NNSIZE                = arguments.pop("nnsize")
    model.NEMBEDS               = arguments.pop("nembeds")
    model.KEEP_CHECKPOINT_MAX   = arguments.pop("keep_checkpoints_max")
    
    # Append trial_id to path if we are doing hptuning
    # This code can be removed if you are not using hyperparameter tuning
    output_dir = os.path.join(
        output_dir,
        json.loads(
            os.environ.get("TF_CONFIG", "{}")
        ).get("task", {}).get("trial", "")
    )

    # Run the training job
    model.train_and_evaluate(output_dir)


Overwriting babyweight/trainer/task.py


In [58]:
%%writefile babyweight/trainer/model.py
# -*- coding: utf-8 -*- 
import os
import shutil
import numpy as np
import tensorflow as tf
from datetime import datetime
print(tf.__version__)

def parameter_display():
    print('>>>>> BUCKET                : {}'.format(BUCKET))
    print('>>>>> DATA_DIR              : {}'.format(DATA_DIR))
    print('>>>>> OUTPUT_DIR            : {}'.format(OUTPUT_DIR))
    print('>>>>> PATTERN               : {}'.format(PATTERN))
    print('>>>>> TRAIN_STEPS           : {}'.format(TRAIN_STEPS))
    print('>>>>> BATCH_SIZE            : {}'.format(BATCH_SIZE))
    print('>>>>> NNSIZE                : {}'.format(NNSIZE))
    print('>>>>> NEMBEDS               : {}'.format(NEMBEDS))
    print('>>>>> KEEP_CHECKPOINT_MAX   : {}'.format(KEEP_CHECKPOINT_MAX))
    

######################################################################
# Columns define
CSV_COLUMNS = "weight_pounds,is_male,mother_age,plurality,gestation_weeks".split(',')
LABEL_COLUMN = "weight_pounds"
# Set default values for each CSV column
DEFAULTS = [[0.0], ["null"], [0.0], ["null"], [0.0]]

######################################################################
def add_engineered_features(features):
    features["dummy"] = features["mother_age"]
    return features

######################################################################
def get_categorical_indicator(name, values):
    return tf.feature_column.indicator_column(
        categorical_column = tf.feature_column.categorical_column_with_vocabulary_list(key = name, vocabulary_list = values))

######################################################################
def read_dataset(data_dir, filename_pattern, mode, batch_size = 512):
    def _input_fn():
        def decode_csv(value_column):
            columns = tf.decode_csv(records = value_column, record_defaults = DEFAULTS)
            # Default feature
            features = dict(zip(CSV_COLUMNS, columns))
            # NEW: Add engineered features
            #features = add_engineered_features(features)
            # Default label
            label = features.pop(LABEL_COLUMN)
            return features, label
    
        if PATTERN == "":
            file_path = "{}/{}*".format(data_dir, filename_pattern)
        else:
            file_path = "{}/{}*{}*".format(data_dir, filename_pattern, PATTERN)
        print('>>>>> data filename : {}'.format(file_path))
        
        # Create list of files that match pattern
        file_list = tf.gfile.Glob(filename = file_path)

        # Create dataset from file list
        dataset = (tf.data.TextLineDataset(filenames = file_list)  # Read text file
                     .map(map_func = decode_csv))  # Transform each elem by applying decode_csv fn

        if mode == tf.estimator.ModeKeys.TRAIN:
            num_epochs = None # indefinitely
            dataset = dataset.shuffle(buffer_size = 10 * batch_size)
        else:
            num_epochs = 1 # end-of-input after this

        dataset = dataset.repeat(count = num_epochs).batch(batch_size = batch_size)
        return dataset
    return _input_fn

######################################################################
def get_feature_cols():
    # Vocabulary List
    voca_list_is_male = ["True","False","Unknown"]
    voca_list_plurality = ["Single(1)","Twins(2)","Triplets(3)","Quadruplets(4)","Quintuplets(5)","Multiple(2+)"]

    # Default Feature column
    fc_is_male = tf.feature_column.categorical_column_with_vocabulary_list(key="is_male", vocabulary_list=voca_list_is_male)
    fc_plurality = tf.feature_column.categorical_column_with_vocabulary_list(key="plurality", vocabulary_list=voca_list_plurality)
    fc_mother_age = tf.feature_column.numeric_column(key = "mother_age")
    fc_gestation_weeks = tf.feature_column.numeric_column(key = "gestation_weeks")

    # if DNNRegressor model, use below line
    # fc_is_male   = get_categorical_indicator("is_male", voca_list_is_male)
    # fc_plurality = get_categorical_indicator("plurality", voca_list_plurality)

    
    # ADD Feature column
    fc_dummy = tf.feature_column.numeric_column(key = "dummy")

    # Bucketized columns
    fc_buckets_mother_age = tf.feature_column.bucketized_column(source_column = fc_mother_age, boundaries = np.arange(start = 15, stop = 45, step = 1).tolist())
    fc_buckets_gestation_weeks = tf.feature_column.bucketized_column(source_column = fc_gestation_weeks, boundaries = np.arange(start = 17, stop = 47, step = 1).tolist())
   
    # Embeded Feature columns
    crossed = tf.feature_column.crossed_column(keys=[fc_is_male,fc_plurality,fc_buckets_mother_age,fc_buckets_gestation_weeks], 
                                            hash_bucket_size = 20000)
    fc_embed = tf.feature_column.embedding_column(categorical_column = crossed, dimension = NEMBEDS)

    # Feature columns
    feature_columns = [fc_is_male,
                       fc_plurality,
                       fc_mother_age,
                       fc_gestation_weeks,
                       fc_dummy
                      ]
    
    # Sparse wide columns
    wide = [fc_is_male,fc_plurality,fc_buckets_mother_age,fc_buckets_gestation_weeks]
    
    #Deep colomns
    deep = [fc_mother_age,
            fc_gestation_weeks,
            fc_embed]
    
    return feature_columns, wide, deep

######################################################################
def serving_input_fn():
    feature_placeholders = {
        "is_male"        : tf.placeholder(dtype = tf.string,  shape = [None]),
        "mother_age"     : tf.placeholder(dtype = tf.float32, shape = [None]),
        "plurality"      : tf.placeholder(dtype = tf.string,  shape = [None]),
        "gestation_weeks": tf.placeholder(dtype = tf.float32, shape = [None])
    }
    
    #features = add_engineered_features(feature_placeholders)
    
    # if feature의 shape=(?,), use below line
    features = {
                key: tf.expand_dims(input = tensor, axis = -1)
                for key, tensor in feature_placeholders.items()
               }
    return tf.estimator.export.ServingInputReceiver(features = features, receiver_tensors = feature_placeholders)


######################################################################
def _accuracy_bigger(best_eval_result, current_eval_result):
    metric = 'accuracy'
    return best_eval_result[metric] < current_eval_result[metric]


######################################################################
def my_rmse(labels, predictions):
    pred_values = predictions["predictions"]
    return {
        "rmse": tf.metrics.root_mean_squared_error(
            labels=labels,
            predictions=pred_values
        )
    }

######################################################################
def train_and_evaluate(output_dir):
    EVAL_INTERVAL = 300  # seconds
    
    parameter_display()
    feature_columns, wide, deep = get_feature_cols()
        
    run_config = tf.estimator.RunConfig(
        save_checkpoints_secs = EVAL_INTERVAL,
        keep_checkpoint_max = KEEP_CHECKPOINT_MAX)

    estimator = tf.estimator.DNNLinearCombinedRegressor(
        model_dir = output_dir,
        linear_feature_columns = wide,
        dnn_feature_columns = deep,
        dnn_hidden_units = NNSIZE,
        config = run_config)
 
    estimator = tf.contrib.estimator.add_metrics(estimator, my_rmse)

    train_spec = tf.estimator.TrainSpec(
        input_fn = read_dataset(DATA_DIR, "train", mode = tf.estimator.ModeKeys.TRAIN, batch_size=BATCH_SIZE),
        max_steps = TRAIN_STEPS)
    
    #Final_exporter = tf.estimator.FinalExporter('./exporter', serving_input_receiver_fn=serving_input_fn)
    #exporter = Final_exporter

    exporter = tf.estimator.LatestExporter(
        name="exporter",
        serving_input_receiver_fn=serving_input_fn,
        exports_to_keep=None)
    
    
    eval_spec = tf.estimator.EvalSpec(
        input_fn = read_dataset(DATA_DIR, "eval", mode = tf.estimator.ModeKeys.EVAL, batch_size=BATCH_SIZE),
        steps = None,
        start_delay_secs = 60, # start evaluating after N seconds
        throttle_secs = EVAL_INTERVAL,  # evaluate every N seconds
        exporters = exporter)
    
    tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec)
    

Overwriting babyweight/trainer/model.py


# Model을 CMLE에 올려보자.

In [6]:
%%bash
if ! gsutil ls -r gs://$BUCKET | grep -q gs://$BUCKET/babyweight/preproc; then
    gsutil mb -l ${REGION} gs://${BUCKET}
    # copy canonical set of preprocessed files if you didn't do previous notebook
    gsutil -m cp -R gs://cloud-training-demos/babyweight gs://${BUCKET}
fi

In [7]:
%%bash
gsutil ls gs://${BUCKET}/babyweight/preproc/*-00000*

gs://qwiklabs-gcp-ml-49b827b781ab/babyweight/preproc/eval.csv-00000-of-00002
gs://qwiklabs-gcp-ml-49b827b781ab/babyweight/preproc/train.csv-00000-of-00028


In [82]:
# 실행전 변수 정의
DATA_DIR   = "gs://{}/babyweight/preproc".format(BUCKET)
OUTDIR="gs://{}/babyweight/trained_model_20191130".format(BUCKET)
print('DATA_DIR:{} \nOUTDIR  :{}'.format(DATA_DIR, OUTDIR))

DATA_DIR:gs://qwiklabs-gcp-ml-49b827b781ab/babyweight/preproc 
OUTDIR  :gs://qwiklabs-gcp-ml-49b827b781ab/babyweight/trained_model_20191130


## AI-platforma training

In [59]:
%%bash
OUTDIR=gs://${BUCKET}/babyweight/trained_model_test
DATA_DIR=gs://${BUCKET}/babyweight/preproc
JOBNAME=babyweight_$(date -u +%y%m%d_%H%M%S)
echo $OUTDIR $REGION $JOBNAME
gsutil -m rm -rf $OUTDIR
gcloud ai-platform jobs submit training $JOBNAME \
    --region=$REGION \
    --module-name=trainer.task \
    --package-path=$(pwd)/babyweight/trainer \
    --job-dir=$OUTDIR \
    --staging-bucket=gs://$BUCKET \
    --scale-tier=PREMIUM_1 \
    --runtime-version=$TFVERSION \
    -- \
    --bucket=qwiklabs-gcp-ml-49b827b781ab \
    --data_dir=$DATA_DIR \
    --output_dir=$OUTDIR \
    --train_steps=1000 \
    --batch_size=128 \
    --nembeds=6 \
    --keep_checkpoints_max=10    

gs://qwiklabs-gcp-ml-49b827b781ab/babyweight/trained_model_test us-central1 babyweight_191130_105150
jobId: babyweight_191130_105150
state: QUEUED


Removing gs://qwiklabs-gcp-ml-49b827b781ab/babyweight/trained_model_test/#1575110983344751...
Removing gs://qwiklabs-gcp-ml-49b827b781ab/babyweight/trained_model_test/events.out.tfevents.1575110987.cmle-training-worker-85d4e56175-0-9f6gr#1575110997478890...
Removing gs://qwiklabs-gcp-ml-49b827b781ab/babyweight/trained_model_test/events.out.tfevents.1575110983.cmle-training-master-85d4e56175-0-czjrw#1575110984046674...
Removing gs://qwiklabs-gcp-ml-49b827b781ab/babyweight/trained_model_test/checkpoint#1575111017090679...
Removing gs://qwiklabs-gcp-ml-49b827b781ab/babyweight/trained_model_test/graph.pbtxt#1575110986324211...
Removing gs://qwiklabs-gcp-ml-49b827b781ab/babyweight/trained_model_test/model.ckpt-0.data-00000-of-00012#1575110993630936...
Removing gs://qwiklabs-gcp-ml-49b827b781ab/babyweight/trained_model_test/model.ckpt-0.data-00001-of-00012#1575110994376289...
Removing gs://qwiklabs-gcp-ml-49b827b781ab/babyweight/trained_model_test/model.ckpt-0.data-00003-of-00012#15751109954

### 아래꺼 돌리면 Job은 수행되는데, 상세 hyperparameter 내부 job들이 실패로 되어 있음.

## gs://${BUCKET}/babyweight/trained_model_20191130를 모델로 올려보자.

In [13]:
%%bash
if ! gsutil ls -r gs://${BUCKET} | grep -q gs://${BUCKET}/babyweight/trained_model_20191130/; then
    gsutil mb -l ${REGION} gs://${BUCKET}
    # copy canonical model if you didn't do previous notebook
    gsutil -m cp -R gs://cloud-training-demos/babyweight/trained_model gs://${BUCKET}/babyweight/trained_model_20191130
fi

In [16]:
%%bash
MODEL_NAME="babyweight_20191130"
MODEL_VERSION="ml_on_gcp"

# Check to see if the model and version already exist, 
# if so, delete them to deploy anew
if gcloud ai-platform models list | grep "$MODEL_NAME \+ $MODEL_VERSION"; then
    echo "Deleting the version '$MODEL_VERSION' of model '$MODEL_NAME'"
    yes | gcloud ai-platform versions delete ${MODEL_VERSION} --model=$MODEL_NAME
    
    echo "Deleting the model '$MODEL_NAME'"
    yes | gcloud ai-platform models delete ${MODEL_NAME}
else 
    echo "The model '$MODEL_NAME' with version '$MODEL_VERSION' does not exist."
fi

The model 'babyweight_20191130' with version 'ml_on_gcp' does not exist.


### Model 생성

In [60]:
%%bash
MODEL_NAME="babyweight_20191130"
MODEL_VERSION="ml_on_gcp"
MODEL_LOCATION=$(gsutil ls gs://${BUCKET}/babyweight/trained_model_test/export/exporter/ | tail -1)
    
echo "Deploying the model '$MODEL_NAME', version '$MODEL_VERSION' from $MODEL_LOCATION"
echo "... this will take a few minutes"

gcloud ai-platform models create ${MODEL_NAME} --regions $REGION

Deploying the model 'babyweight_20191130', version 'ml_on_gcp' from gs://qwiklabs-gcp-ml-49b827b781ab/babyweight/trained_model_test/export/exporter/1575111495/
... this will take a few minutes


Created ml engine model [projects/qwiklabs-gcp-ml-49b827b781ab/models/babyweight_20191130].


### Model version 생성

In [61]:
%%bash
MODEL_NAME="babyweight_20191130"
MODEL_VERSION="ml_on_gcp_v1"
MODEL_LOCATION=$(gsutil ls gs://${BUCKET}/babyweight/trained_model_test/export/exporter/ | tail -1)

gcloud ai-platform versions create ${MODEL_VERSION} --model ${MODEL_NAME} --origin ${MODEL_LOCATION} --runtime-version ${TFVERSION}

Creating version (this might take a few minutes)......
..................................................................................................................................................................................................................................................done.


# Prediction(배치)

In [77]:
%%writefile inputs.json
{"is_male": "True", "mother_age": 31.0, "plurality": "Single(1)", "gestation_weeks": 39}
{"is_male": "False", "mother_age": 32.0, "plurality": "Single(1)", "gestation_weeks": 39}

Overwriting inputs.json


In [78]:
%%bash
INPUT=gs://${BUCKET}/babyweight/batchpred_20191130/inputs.json
OUTPUT=gs://${BUCKET}/babyweight/batchpred_20191130/outputs

gsutil cp inputs.json $INPUT
gsutil -m rm -rf $OUTPUT 
gcloud ai-platform jobs submit prediction babypred_$(date -u +%y%m%d_%H%M%S) \
    --data-format=TEXT \
    --region ${REGION} \
    --input-paths=$INPUT \
    --output-path=$OUTPUT \
    --model=babyweight_20191130 \
    --version=ml_on_gcp_v1

jobId: babypred_191130_114122
state: QUEUED


Copying file://inputs.json [Content-Type=application/json]...
/ [1 files][  179.0 B/  179.0 B]                                                
Operation completed over 1 objects/179.0 B.                                      
Removing gs://qwiklabs-gcp-ml-49b827b781ab/babyweight/batchpred_20191130/outputs/prediction.errors_stats-00000-of-00001#1575113785651953...
Removing gs://qwiklabs-gcp-ml-49b827b781ab/babyweight/batchpred_20191130/outputs/prediction.results-00000-of-00001#1575113777053769...
/ [2/2 objects] 100% Done                                                       
Operation completed over 2 objects.                                              
Job [babypred_191130_114122] submitted successfully.
Your job is still active. You may view the status of your job with the command

  $ gcloud ai-platform jobs describe babypred_191130_114122

or continue streaming the logs with the command

  $ gcloud ai-platform jobs stream-logs babypred_191130_114122


In [75]:
!gsutil ls gs://$BUCKET/babyweight/batchpred_20191130/outputs

gs://qwiklabs-gcp-ml-49b827b781ab/babyweight/batchpred_20191130/outputs/prediction.errors_stats-00000-of-00001
gs://qwiklabs-gcp-ml-49b827b781ab/babyweight/batchpred_20191130/outputs/prediction.results-00000-of-00001


In [76]:
!gsutil cat gs://$BUCKET/babyweight/batchpred_20191130/outputs/prediction.results*

{"predictions": [7.628013610839844]}
{"predictions": [7.422759056091309]}


## Prediction(API)

In [79]:
from oauth2client.client import GoogleCredentials
import requests
import json

MODEL_NAME = "babyweight_20191130"
MODEL_VERSION = "ml_on_gcp_v1"

token = GoogleCredentials.get_application_default().get_access_token().access_token
api = "https://ml.googleapis.com/v1/projects/{}/models/{}/versions/{}:predict" \
         .format(PROJECT, MODEL_NAME, MODEL_VERSION)
headers = {"Authorization": "Bearer " + token }
data = {
  "instances": [
    {
      "is_male": "True",
      "mother_age": 31.0,
      "plurality": "Single(1)",
      "gestation_weeks": 39
    },
    {
      "is_male": "True",
      "mother_age": 32.0,
      "plurality": "Single(1)",
      "gestation_weeks": 39
    },
    {
      "is_male": "True",
      "mother_age": 26.0,
      "plurality": "Single(1)",
      "gestation_weeks": 39
    },
    {
      "is_male": "False",
      "mother_age": 29.0,
      "plurality": "Single(1)",
      "gestation_weeks": 38
    },
    {
      "is_male": "True",
      "mother_age": 26.0,
      "plurality": "Triplets(3)",
      "gestation_weeks": 39
    },
    {
      "is_male": "Unknown",
      "mother_age": 29.0,
      "plurality": "Multiple(2+)",
      "gestation_weeks": 38
    },
  ]
}
response = requests.post(api, json=data, headers=headers)
print(response.content)

b'{"predictions": [{"predictions": [6.799725532531738]}, {"predictions": [6.524709224700928]}, {"predictions": [7.628015518188477]}, {"predictions": [6.689780235290527]}, {"predictions": [7.317388534545898]}, {"predictions": [6.435290813446045]}]}'
