In [1]:
PROJECT = "qwiklabs-gcp-ml-49b827b781ab"  # Replace with your PROJECT
BUCKET = "qwiklabs-gcp-ml-49b827b781ab"  # Replace with your BUCKET
REGION = "us-central1"            # Choose an available region for AI Platform Training Service
TFVERSION = "1.14"                # TF version for AI Platform Training Service to use

In [2]:
!gcloud config set project {PROJECT}
!gsutil mb -l {REGION} gs://{BUCKET}
!gsutil -m cp *.csv gs://{BUCKET}/taxifare/smallinput/

Updated property [core/project].
Creating gs://qwiklabs-gcp-ml-49b827b781ab/...
ServiceException: 409 Bucket qwiklabs-gcp-ml-49b827b781ab already exists.
CommandException: No URLs matched: *.csv
CommandException: 1 file/object could not be transferred.


In [7]:
!gsutil cp gs://cloud-training-demos/taxifare/small/*.csv .
!ls -l *.csv

Copying gs://cloud-training-demos/taxifare/small/taxi-test.csv...
Copying gs://cloud-training-demos/taxifare/small/taxi-train.csv...              
Copying gs://cloud-training-demos/taxifare/small/taxi-valid.csv...              
/ [3 files][ 10.9 MiB/ 10.9 MiB]                                                
Operation completed over 3 objects/10.9 MiB.                                     
-rw-r--r-- 1 jupyter jupyter 1799474 Dec  5 14:27 taxi-test.csv
-rw-r--r-- 1 jupyter jupyter 7986353 Dec  5 14:27 taxi-train.csv
-rw-r--r-- 1 jupyter jupyter 1673742 Dec  5 14:27 taxi-valid.csv


In [3]:
%%bash
mkdir taxifaremodel
touch taxifaremodel/__init__.py

In [4]:
%%writefile taxifaremodel/model.py
import tensorflow as tf
import shutil

CSV_COLUMN_NAMES = ["fare_amount","dayofweek","hourofday","pickuplon","pickuplat","dropofflon","dropofflat"]
CSV_DEFAULTS = [[0.0],[1],[0],[-74.0], [40.0], [-74.0], [40.7]]
FEATURE_NAMES = CSV_COLUMN_NAMES[1:]

def parse_row(row):
    fields = tf.decode_csv(records = row, record_defaults = CSV_DEFAULTS)
    features = dict(zip(CSV_COLUMN_NAMES, fields))
    label = features.pop("fare_amount")
    return features, label

def read_dataset(csv_path):
    dataset = tf.data.Dataset.list_files(file_pattern = csv_path)
    dataset = dataset.flat_map(lambda filename: tf.data.TextLineDataset(filenames = filename).skip(count = 1))
    dataset = dataset.map(map_func = parse_row)
    return dataset

def train_input_fn(csv_path, batch_size = 128):
    dataset = read_dataset(csv_path)
    dataset = dataset.shuffle(buffer_size = 1000).repeat(count = None).batch(batch_size = batch_size)
    return dataset

def eval_input_fn(csv_path, batch_size = 128):
    dataset = read_dataset(csv_path)
    dataset = dataset.batch(batch_size = batch_size)
    return dataset
  
def serving_input_receiver_fn():
    receiver_tensors = {
        "dayofweek" : tf.placeholder(dtype = tf.int32, shape = [None]), 
        "hourofday" : tf.placeholder(dtype = tf.int32, shape = [None]),
        "pickuplon" : tf.placeholder(dtype = tf.float32, shape = [None]), 
        "pickuplat" : tf.placeholder(dtype = tf.float32, shape = [None]),
        "dropofflat" : tf.placeholder(dtype = tf.float32, shape = [None]),
        "dropofflon" : tf.placeholder(dtype = tf.float32, shape = [None])
    }
    
    features = receiver_tensors
    
    return tf.estimator.export.ServingInputReceiver(features = features, receiver_tensors = receiver_tensors)
      
def my_rmse(labels, predictions):
    pred_values = tf.squeeze(input = predictions["predictions"], axis = -1)
    return {"rmse": tf.metrics.root_mean_squared_error(labels = labels, predictions = pred_values)}

def create_model(model_dir, train_steps):
    config = tf.estimator.RunConfig(
        tf_random_seed = 1,
        save_checkpoints_steps = max(10, train_steps // 10),
        model_dir = model_dir
    )
    
    feature_cols = [tf.feature_column.numeric_column(key = k) for k in FEATURE_NAMES]
    
    model = tf.estimator.DNNRegressor(
        hidden_units = [10,10],
        feature_columns = feature_cols, 
        config = config
    )
    
    model = tf.contrib.estimator.add_metrics(model, my_rmse)
    
    return model

def train_and_evaluate(params):
    OUTDIR = params["output_dir"]
    TRAIN_DATA_PATH = params["train_data_path"]
    EVAL_DATA_PATH = params["eval_data_path"]
    TRAIN_STEPS = params["train_steps"]

    model = create_model(OUTDIR, TRAIN_STEPS)

    train_spec = tf.estimator.TrainSpec(
        input_fn = lambda: train_input_fn(TRAIN_DATA_PATH),
        max_steps = TRAIN_STEPS
    )    
    exporter = tf.estimator.FinalExporter(name = "exporter", serving_input_receiver_fn = serving_input_receiver_fn)
    
    eval_spec = tf.estimator.EvalSpec(
        input_fn = lambda: eval_input_fn(EVAL_DATA_PATH),
        steps = None,
        start_delay_secs = 1,
        throttle_secs = 1,
        exporters = exporter
    )
    
    tf.logging.set_verbosity(tf.logging.INFO)
    
    shutil.rmtree(path = OUTDIR, ignore_errors = True)

    tf.estimator.train_and_evaluate(estimator = model, train_spec = train_spec, eval_spec = eval_spec)

Writing taxifaremodel/model.py


In [5]:
%%writefile taxifaremodel/task.py
import argparse
import json
import os

from . import model


if __name__ == "__main__":
    
    parser = argparse.ArgumentParser()
    
    parser.add_argument(
        "--train_data_path",
        help = "GCS or local path to training data",
        required = True
    )
    parser.add_argument(
        "--train_steps",
        help = "Steps to run the training job for (default: 1000)",
        type = int,
        default = 1000
    )
    parser.add_argument(
        "--eval_data_path",
        help = "GCS or local path to evaluation data",
        required = True
    )
    parser.add_argument(
        "--output_dir",
        help = "GCS location to write checkpoints and export models",
        required = True
    )
    parser.add_argument(
        "--job-dir",
        help = "This is not used by our model, but it is required by gcloud",
    )
    args = parser.parse_args().__dict__

    model.train_and_evaluate(args)

Writing taxifaremodel/task.py


In [8]:
%%time
!gcloud ai-platform local train \
    --package-path=taxifaremodel \
    --module-name=taxifaremodel.task \
    -- \
    --train_data_path=taxi-train.csv \
    --eval_data_path=taxi-valid.csv  \
    --train_steps=1 \
    --output_dir=taxi_trained 

The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
  * https://github.com/tensorflow/io (for I/O related ops)
If you depend on functionality not listed there, please file an issue.



INFO:tensorflow:Not using Distribute Coordinator.
INFO:tensorflow:Running training and evaluation locally (non-distributed).
INFO:tensorflow:Start train and evaluate loop. The evaluate will happen after every checkpoint. Checkpoint frequency is determined based on RunConfig arguments: save_checkpoints_steps 10 or save_checkpoints_secs None.
Instructions for updating:
Use Variable.read_value. Variables in 2.X are initialized automatically both in eager and graph (inside tf.defun) contexts.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


INFO:tensorflow:Calling model_fn.
INFO:tenso

In [9]:
OUTDIR = "gs://{}/taxifare/trained_small".format(BUCKET)

In [11]:
!gsutil -m rm -rf {OUTDIR} # start fresh each time
!gcloud ai-platform jobs submit training taxifare_$(date -u +%y%m%d_%H%M%S) \
    --package-path=taxifaremodel \
    --module-name=taxifaremodel.task \
    --job-dir=gs://{BUCKET}/taxifare \
    --python-version=3.5 \
    --runtime-version={TFVERSION} \
    --region={REGION} \
    -- \
    --train_data_path=gs://{BUCKET}/taxifare/smallinput/taxi-train.csv \
    --eval_data_path=gs://{BUCKET}/taxifare/smallinput/taxi-valid.csv  \
    --train_steps=1000 \
    --output_dir={OUTDIR}

Removing gs://qwiklabs-gcp-ml-49b827b781ab/taxifare/trained_small/#1574401743465196...
Removing gs://qwiklabs-gcp-ml-49b827b781ab/taxifare/trained_small/checkpoint#1574401874746997...
Removing gs://qwiklabs-gcp-ml-49b827b781ab/taxifare/trained_small/eval/#1574401765858330...
Removing gs://qwiklabs-gcp-ml-49b827b781ab/taxifare/trained_small/eval/events.out.tfevents.1574401765.cmle-training-12332736599289437873#1574401881625500...
Removing gs://qwiklabs-gcp-ml-49b827b781ab/taxifare/trained_small/events.out.tfevents.1574401743.cmle-training-12332736599289437873#1574401892297394...
Removing gs://qwiklabs-gcp-ml-49b827b781ab/taxifare/trained_small/export/#1574401882654989...
Removing gs://qwiklabs-gcp-ml-49b827b781ab/taxifare/trained_small/export/exporter/#1574401883071670...
Removing gs://qwiklabs-gcp-ml-49b827b781ab/taxifare/trained_small/export/exporter/1574401881/#1574401890248355...
Removing gs://qwiklabs-gcp-ml-49b827b781ab/taxifare/trained_small/export/exporter/1574401881/saved_model