In [2]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

from absl import app
from absl import flags

import sys
import os.path
import logging

import os
from six.moves import urllib
import tempfile

import math
import numpy as np
import pandas as pd
import tensorflow as tf

from enum import Enum

import datetime
from tensorflow import keras
from tensorflow.keras.callbacks import *

from google.cloud import bigquery
from google.api_core.exceptions import GoogleAPIError

from tensorflow.python.framework import ops
from tensorflow.python.framework import dtypes
from tensorflow_io.bigquery import BigQueryClient
from tensorflow_io.bigquery import BigQueryReadSession
from tensorflow.python.client import device_lib

from tensorflow.python.data.experimental.ops import interleave_ops
from tensorflow.python.data.ops import dataset_ops

import google.cloud.logging

LOCATION = 'us'
PROJECT_ID = "alekseyv-scalableai-dev"
GOOGLE_APPLICATION_CREDENTIALS = "alekseyv-scalableai-dev-077efe757ef6.json"

# # Download options.
# DATA_URL = 'gs://alekseyv-scalableai-dev-public-bucket/criteo_kaggle.tar.gz'

# DATASET_ID = 'criteo_kaggle'

BATCH_SIZE = 128

TARGET_TYPE = Enum('TARGET_TYPE', 'local cloud')

CSV_SCHEMA = [
      bigquery.SchemaField("label", "INTEGER", mode='REQUIRED'),
      # using strings because of https://github.com/tensorflow/io/issues/619
      bigquery.SchemaField("int1", "INTEGER"),
      bigquery.SchemaField("int2", "INTEGER"),
      bigquery.SchemaField("int3", "INTEGER"),
      bigquery.SchemaField("int4", "INTEGER"),
      bigquery.SchemaField("int5", "INTEGER"),
      bigquery.SchemaField("int6", "INTEGER"),
      bigquery.SchemaField("int7", "INTEGER"),
      bigquery.SchemaField("int8", "INTEGER"),
      bigquery.SchemaField("int9", "INTEGER"),
      bigquery.SchemaField("int10", "INTEGER"),
      bigquery.SchemaField("int11", "INTEGER"),
      bigquery.SchemaField("int12", "INTEGER"),
      bigquery.SchemaField("int13", "INTEGER"),
      bigquery.SchemaField("cat1", "STRING"),
      bigquery.SchemaField("cat2", "STRING"),
      bigquery.SchemaField("cat3", "STRING"),
      bigquery.SchemaField("cat4", "STRING"),
      bigquery.SchemaField("cat5", "STRING"),
      bigquery.SchemaField("cat6", "STRING"),
      bigquery.SchemaField("cat7", "STRING"),
      bigquery.SchemaField("cat8", "STRING"),
      bigquery.SchemaField("cat9", "STRING"),
      bigquery.SchemaField("cat10", "STRING"),
      bigquery.SchemaField("cat11", "STRING"),
      bigquery.SchemaField("cat12", "STRING"),
      bigquery.SchemaField("cat13", "STRING"),
      bigquery.SchemaField("cat14", "STRING"),
      bigquery.SchemaField("cat15", "STRING"),
      bigquery.SchemaField("cat16", "STRING"),
      bigquery.SchemaField("cat17", "STRING"),
      bigquery.SchemaField("cat18", "STRING"),
      bigquery.SchemaField("cat19", "STRING"),
      bigquery.SchemaField("cat20", "STRING"),
      bigquery.SchemaField("cat21", "STRING"),
      bigquery.SchemaField("cat22", "STRING"),
      bigquery.SchemaField("cat23", "STRING"),
      bigquery.SchemaField("cat24", "STRING"),
      bigquery.SchemaField("cat25", "STRING"),
      bigquery.SchemaField("cat26", "STRING")
  ]

# hack because model_to_estimator does not understand input feature names, see
# https://cs.corp.google.com/piper///depot/google3/third_party/tensorflow_estimator/python/estimator/keras.py?rcl=282034610&l=151
KERAS_TO_ESTIMATOR_FEATURE_NAMES = {}
for i in range(0, len(CSV_SCHEMA)):
  if i != 0:  # skip label
    KERAS_TO_ESTIMATOR_FEATURE_NAMES[CSV_SCHEMA[i].name] = 'input_{}'.format(i)

print('KERAS_TO_ESTIMATOR_FEATURE_NAMES')
print(KERAS_TO_ESTIMATOR_FEATURE_NAMES)

def get_mean_and_std_dicts():
  #client = bigquery.Client(location="US", project=PROJECT_ID)
  client = bigquery.Client(project=PROJECT_ID)
  query = """
    select
    AVG(int1) as avg_int1, STDDEV(int1) as std_int1,
    AVG(int2) as avg_int2, STDDEV(int2) as std_int2,
    AVG(int3) as avg_int3, STDDEV(int3) as std_int3,
    AVG(int4) as avg_int4, STDDEV(int4) as std_int4,
    AVG(int5) as avg_int5, STDDEV(int5) as std_int5,
    AVG(int6) as avg_int6, STDDEV(int6) as std_int6,
    AVG(int7) as avg_int7, STDDEV(int7) as std_int7,
    AVG(int8) as avg_int8, STDDEV(int8) as std_int8,
    AVG(int9) as avg_int9, STDDEV(int9) as std_int9,
    AVG(int10) as avg_int10, STDDEV(int10) as std_int10,
    AVG(int11) as avg_int11, STDDEV(int11) as std_int11,
    AVG(int12) as avg_int12, STDDEV(int12) as std_int12,
    AVG(int13) as avg_int13, STDDEV(int13) as std_int13
    from `alekseyv-scalableai-dev.criteo_kaggle.days`
  """
  query_job = client.query(
      query,
      location="US",
  )  # API request - starts the query

  df = query_job.to_dataframe()
  #print(query_job.result())
  #print(query_job.errors)
  #print(df)

  mean_dict = dict((field[0].replace('avg_', ''), df[field[0]][0]) for field in df.items() if field[0].startswith('avg'))
  std_dict = dict((field[0].replace('std_', ''), df[field[0]][0]) for field in df.items() if field[0].startswith('std'))
  return (mean_dict, std_dict)

def transofrom_row(row_dict, mean_dict, std_dict):
  dict_without_label = row_dict.copy()
  #tf.print(dict_without_label)
  label = dict_without_label.pop('label')
  for field in CSV_SCHEMA:
    if (field.name.startswith('int')):
        if dict_without_label[field.name] == 0:
            value = float(dict_without_label[field.name])
            dict_without_label[field.name] = (value - mean_dict[field.name]) / std_dict[field.name]
        else:
            dict_without_label[field.name] = 0.0 # don't use normalized 0 value for nulls

  #dict_with_esitmator_keys = { KERAS_TO_ESTIMATOR_FEATURE_NAMES[k]:v for k,v in dict_without_label.items() }
  dict_with_esitmator_keys = { k:v for k,v in dict_without_label.items() }

  return (dict_with_esitmator_keys, label)

def read_bigquery(dataset_id, table_name):

  (mean_dict, std_dict) = get_mean_and_std_dicts()
  tensorflow_io_bigquery_client = BigQueryClient()
  read_session = tensorflow_io_bigquery_client.read_session(
      "projects/" + PROJECT_ID,
      PROJECT_ID, table_name, dataset_id,
      list(field.name for field in CSV_SCHEMA),
      list(dtypes.int64 if field.field_type == 'INTEGER'
           else dtypes.string for field in CSV_SCHEMA),
      requested_streams=10)

  #dataset = read_session.parallel_read_rows()

  streams = read_session.get_streams()
  tf.print('bq streams: !!!!!!!!!!!!!!!!!!!!!!')
  tf.print(streams)
  streams_count = 10 # len(streams)
  #streams_count = read_session.get_streams().shape
  tf.print('big query read session returned {} streams'.format(streams_count))

  streams_ds = dataset_ops.Dataset.from_tensor_slices(streams).shuffle(buffer_size=streams_count)
  dataset = streams_ds.interleave(
            read_session.read_rows,
            cycle_length=streams_count,
            num_parallel_calls=streams_count)
  transformed_ds = dataset.map (lambda row: transofrom_row(row, mean_dict, std_dict), num_parallel_calls=streams_count).prefetch(10000)

  # Interleave dataset is not shardable, turning off sharding
  # See https://www.tensorflow.org/tutorials/distribute/multi_worker_with_keras#dataset_sharding_and_batch_size
  # Instead we are shuffling data.
  options = tf.data.Options()
#  options.experimental_distribute.auto_shard_policy = tf.data.experimental.AutoShardPolicy.OFF
  result = transformed_ds.with_options(options)
  tf.print(str(result))
  return result

def get_vocabulary_size_dict():
  client = bigquery.Client(location="US", project=PROJECT_ID)
  query = """
    SELECT
    COUNT(DISTINCT cat1) as cat1,
    COUNT(DISTINCT cat2) as cat2,
    COUNT(DISTINCT cat3) as cat3,
    COUNT(DISTINCT cat4) as cat4,
    COUNT(DISTINCT cat5) as cat5,
    COUNT(DISTINCT cat6) as cat6,
    COUNT(DISTINCT cat7) as cat7,
    COUNT(DISTINCT cat8) as cat8,
    COUNT(DISTINCT cat9) as cat9,
    COUNT(DISTINCT cat10) as cat10,
    COUNT(DISTINCT cat11) as cat11,
    COUNT(DISTINCT cat12) as cat12,
    COUNT(DISTINCT cat13) as cat13,
    COUNT(DISTINCT cat14) as cat14,
    COUNT(DISTINCT cat15) as cat15,
    COUNT(DISTINCT cat16) as cat16,
    COUNT(DISTINCT cat17) as cat17,
    COUNT(DISTINCT cat18) as cat18,
    COUNT(DISTINCT cat19) as cat19,
    COUNT(DISTINCT cat20) as cat20,
    COUNT(DISTINCT cat21) as cat21,
    COUNT(DISTINCT cat22) as cat22,
    COUNT(DISTINCT cat23) as cat23,
    COUNT(DISTINCT cat24) as cat24,
    COUNT(DISTINCT cat25) as cat25,
    COUNT(DISTINCT cat26) as cat26
    FROM
      `alekseyv-scalableai-dev.criteo_kaggle.days`
  """
  query_job = client.query(
      query,
      location="US",
  )  # API request - starts the query

  df = query_job.to_dataframe()
  #print(query_job.result())
  #print(query_job.errors)
  #print(df)
  dictionary = dict((field[0], df[field[0]][0]) for field in df.items())
  #print(dir(df))
  return dictionary

def create_categorical_feature_column(categorical_vocabulary_size_dict, key):
  hash_bucket_size = min(categorical_vocabulary_size_dict[key], 100000)
  # TODO: consider using categorical_column_with_vocabulary_list
  categorical_feature_column = tf.feature_column.categorical_column_with_hash_bucket(
    #KERAS_TO_ESTIMATOR_FEATURE_NAMES[key],
    key,
    hash_bucket_size,
    dtype=tf.dtypes.string
  )
  if hash_bucket_size < 10:
    return tf.feature_column.indicator_column(categorical_feature_column)

  embedding_feature_column = tf.feature_column.embedding_column(
      categorical_feature_column,
      int(min(50, math.floor(6 * hash_bucket_size**0.25))))
  return embedding_feature_column

def create_feature_columns(categorical_vocabulary_size_dict):
  feature_columns = []
  feature_columns.extend(list(tf.feature_column.numeric_column(KERAS_TO_ESTIMATOR_FEATURE_NAMES[field.name], dtype=tf.dtypes.float32)  for field in CSV_SCHEMA if field.field_type == 'INTEGER' and field.name != 'label'))
  feature_columns.extend(list(create_categorical_feature_column(categorical_vocabulary_size_dict, key) for key, _ in categorical_vocabulary_size_dict.items()))
  return feature_columns

def create_keras_model():
  categorical_vocabulary_size_dict = get_vocabulary_size_dict()
  feature_columns = create_feature_columns(categorical_vocabulary_size_dict)
  print("categorical_vocabulary_size_dict: " + str(categorical_vocabulary_size_dict))
  feature_layer = tf.keras.layers.DenseFeatures(feature_columns, name="feature_layer")
  Dense = tf.keras.layers.Dense
  model = tf.keras.Sequential(
  [
      feature_layer,
      Dense(2560, activation=tf.nn.relu),
      Dense(1024, activation=tf.nn.relu),
      Dense(256, activation=tf.nn.relu),
      Dense(1, activation=tf.nn.sigmoid)
  ])

  # Compile Keras model
  model.compile(
      # cannot use Adagrad with mirroredstartegy https://github.com/tensorflow/tensorflow/issues/19551
      #optimizer=tf.optimizers.Adagrad(learning_rate=0.05),
      optimizer=tf.optimizers.SGD(learning_rate=0.05),
      loss=tf.keras.losses.BinaryCrossentropy(),
      metrics=['accuracy'])
  # HACK: https://b.corp.google.com/issues/114035274
  #model._is_graph_network = True
  #model.summary()
  return model

def train_keras_model(model_dir):
  logging.info('training keras model')
  #strategy = tf.distribute.experimental.ParameterServerStrategy()
  strategy = tf.distribute.experimental.MultiWorkerMirroredStrategy() # doesn't work because of https://b.corp.google.com/issues/142700914
  #strategy = tf.distribute.MirroredStrategy()
  #strategy = tf.distribute.OneDeviceStrategy(device="/cpu:0")
  with strategy.scope():
    model = create_keras_model()
    #training_ds = read_bigquery('criteo_kaggle','days_strings').take(1000000).shuffle(10000).batch(BATCH_SIZE).prefetch(100)
    #training_ds = read_bigquery('criteo_kaggle','days').skip(100000).take(50000).shuffle(10000).batch(BATCH_SIZE)
    training_ds = read_bigquery('criteo_kaggle','days').take(1000000).shuffle(10000).batch(BATCH_SIZE)
    print('checking dataset')

    log_dir= model_dir + "/" + os.environ['HOSTNAME'] + "/logs/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
    tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1, embeddings_freq=1, profile_batch=0)

    checkpoints_dir = model_dir + "/" + os.environ['HOSTNAME'] + "/checkpoints"
    if not os.path.exists(checkpoints_dir):
        os.makedirs(checkpoints_dir)
    checkpoints_file_path = checkpoints_dir + "/epochs:{epoch:03d}-accuracy:{accuracy:.3f}.hdf5"
    # crashing https://github.com/tensorflow/tensorflow/issues/27688
    #checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(checkpoints_file_path, verbose=1, mode='max')

    fit_verbosity = 1 if TARGET == TARGET_TYPE.local else 2
    model.fit(training_ds, epochs=2, verbose=fit_verbosity,
    callbacks=[tensorboard_callback]
    )

  return model

def evaluate_keras_model(model):
  logging.info('evaluating keras model')
  eval_ds = read_bigquery('criteo_kaggle','days').skip(100000).take(50 * BATCH_SIZE).batch(BATCH_SIZE)
  loss, accuracy = model.evaluate(eval_ds)
  logging.info("Eval - Loss: {}, Accuracy: {}".format(loss, accuracy))

def input_fn():
  training_ds = read_bigquery('criteo_kaggle','days').take(1000000).shuffle(10000).batch(BATCH_SIZE)
  return training_ds

'''
FLAGS = flags.FLAGS
flags.DEFINE_string("job-dir", "", "Job directory")


def main(argv):
    if len(argv) < 1:
      raise app.UsageError("Too few command-line arguments.")

    tf.compat.v1.enable_eager_execution()

    model_dir = os.path.join(sys.argv[1], 'model.joblib')
    logging.info('Model will be saved to "%s..."', model_dir)

    # #tf.debugging.set_log_device_placement(True)
    # print("tf.config.experimental.list_logical_devices(GPU): " + str(tf.config.experimental.list_logical_devices('GPU')))
    # print("tf.config.experimental.list_physical_devices(GPU): " + str(tf.config.experimental.list_physical_devices('GPU')))
    # print("device_lib.list_local_devices(): " + str(device_lib.list_local_devices()))
    # print("tf.test.is_gpu_available(): " + str(tf.test.is_gpu_available()))

    #model = train_keras_model(model_dir)
    #evaluate_keras_model(model)

    model = create_keras_model()
    print("model.input_names:")
    print(model._is_graph_network)
    print(dir(model))

    tf.keras.backend.set_learning_phase(True)
    # Define DistributionStrategies and convert the Keras Model to an
    # Estimator that utilizes these DistributionStrateges.
    # Evaluator is a single worker, so using MirroredStrategy.
    # config = tf.estimator.RunConfig(
    #         train_distribute=tf.distribute.MirroredStrategy(),
    #         eval_distribute=tf.distribute.MirroredStrategy())
    # keras_estimator = tf.keras.estimator.model_to_estimator(
    #     keras_model=model, config=config, model_dir=model_dir)
    keras_estimator = tf.keras.estimator.model_to_estimator(
        keras_model=model, model_dir=model_dir)

    logging.info('!!!!!!!!!!!!! training MirroredStrategy on keras_estimator !!!!!!!!!!!!!!!!!!!')
    tf.estimator.train_and_evaluate(
        keras_estimator,
        train_spec=tf.estimator.TrainSpec(input_fn=input_fn, max_steps=5),
        eval_spec=tf.estimator.EvalSpec(input_fn=input_fn))


if __name__ == '__main__':
  logging_client = google.cloud.logging.Client()
  logging_client.setup_logging()
  logging.warning('>>>>>>>>>>>>>>>>>>>>>>>>>> app started logging <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<')
  print('>>>>>>>>>>>>>. executable <<<<<<<<<<<<<<<<<<<<<<<<')
  print(sys.executable)
  print(sys.version)
  print(sys.version_info)
  logging.warning(os.system('env'))

  #print('pip')
  #print(os.system('pip --version'))
  #print(os.system('pip list'))

  os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = GOOGLE_APPLICATION_CREDENTIALS
  os.environ['PROJECT_ID'] = PROJECT_ID
  print(os.system('pwd'))
  print(os.system('ls -al'))
  if (os.environ.get('CLOUDSDK_METRICS_COMMAND_NAME') == 'gcloud.ai-platform.local.train'):
    TARGET = TARGET_TYPE.local
    logging.warning('training locally')
    logging.warning('removing TF_CONFIG')
    os.environ.pop('TF_CONFIG')
  else:
    TARGET = TARGET_TYPE.cloud
    logging.warning('training in cloud')
    os.system('gsutil cp gs://alekseyv-scalableai-dev-private-bucket/criteo/alekseyv-scalableai-dev-077efe757ef6.json .')
    os.environ[ "GOOGLE_APPLICATION_CREDENTIALS"] = os.getcwd() + '/' + GOOGLE_APPLICATION_CREDENTIALS
    #os.system('gsutil cp gs://alekseyv-scalableai-dev-private-bucket/criteo/tensorflow_io-0.10.0-cp27-cp27mu-manylinux2010_x86_64.wh .')
    #os.system('pip install --no-deps tensorflow_io-0.10.0-cp27-cp27mu-manylinux2010_x86_64.whl')

  TF_CONFIG = os.environ.get('TF_CONFIG')
  if TF_CONFIG and '"master"' in TF_CONFIG:
    logging.warning('TF_CONFIG before modification:' + str(os.environ['TF_CONFIG']))
    os.environ['TF_CONFIG'] = TF_CONFIG.replace('"master"', '"chief"')

  if TF_CONFIG:
    logging.warning('TF_CONFIG:' + str(os.environ['TF_CONFIG']))
  logging.warning(os.system('cat ${GOOGLE_APPLICATION_CREDENTIALS}'))
  app.run(main)

'''

KERAS_TO_ESTIMATOR_FEATURE_NAMES
{'int1': 'input_1', 'int2': 'input_2', 'int3': 'input_3', 'int4': 'input_4', 'int5': 'input_5', 'int6': 'input_6', 'int7': 'input_7', 'int8': 'input_8', 'int9': 'input_9', 'int10': 'input_10', 'int11': 'input_11', 'int12': 'input_12', 'int13': 'input_13', 'cat1': 'input_14', 'cat2': 'input_15', 'cat3': 'input_16', 'cat4': 'input_17', 'cat5': 'input_18', 'cat6': 'input_19', 'cat7': 'input_20', 'cat8': 'input_21', 'cat9': 'input_22', 'cat10': 'input_23', 'cat11': 'input_24', 'cat12': 'input_25', 'cat13': 'input_26', 'cat14': 'input_27', 'cat15': 'input_28', 'cat16': 'input_29', 'cat17': 'input_30', 'cat18': 'input_31', 'cat19': 'input_32', 'cat20': 'input_33', 'cat21': 'input_34', 'cat22': 'input_35', 'cat23': 'input_36', 'cat24': 'input_37', 'cat25': 'input_38', 'cat26': 'input_39'}




In [50]:
def read_gcs(table_name, **kwargs):
  gcs_filename_glob = 'gs://alekseyv-scalableai-dev-public-bucket/criteo_kaggle_from_bq/{}*'.format(table_name)
  selected_columns = list(field.name for field in CSV_SCHEMA)
  column_names = selected_columns + ['row_hash']

  dataset = tf.data.experimental.make_csv_dataset(
      gcs_filename_glob,
      batch_size=5,
      column_names = column_names,
      select_columns = selected_columns,
      num_epochs=1,
      field_delim='\t',
      header=False,
      ignore_errors=False,
      **kwargs)
  return dataset


#dataset = read_gcs('test_small').take(10)
dataset = read_gcs('test_small').apply(tf.data.experimental.unbatch()).map(lambda row: row).take(2)
row_index = 0
for row in dataset:
  print("row %d: %s \n\n" % (row_index, row))
  row_index += 1
    
#dataset = read_gcs('test_small').take(10)
dataset = read_bigquery('criteo_kaggle','test_small').take(2)
row_index = 0
for row in dataset:
  print("row %d: %s \n\n" % (row_index, row))
  row_index += 1    

Instructions for updating:
Use `tf.data.Dataset.unbatch()`.


Instructions for updating:
Use `tf.data.Dataset.unbatch()`.


row 0: OrderedDict([('label', <tf.Tensor: id=82551, shape=(), dtype=int32, numpy=0>), ('int1', <tf.Tensor: id=82538, shape=(), dtype=int32, numpy=0>), ('int2', <tf.Tensor: id=82543, shape=(), dtype=int32, numpy=61>), ('int3', <tf.Tensor: id=82544, shape=(), dtype=int32, numpy=3>), ('int4', <tf.Tensor: id=82545, shape=(), dtype=int32, numpy=0>), ('int5', <tf.Tensor: id=82546, shape=(), dtype=int32, numpy=0>), ('int6', <tf.Tensor: id=82547, shape=(), dtype=int32, numpy=0>), ('int7', <tf.Tensor: id=82548, shape=(), dtype=int32, numpy=0>), ('int8', <tf.Tensor: id=82549, shape=(), dtype=int32, numpy=0>), ('int9', <tf.Tensor: id=82550, shape=(), dtype=int32, numpy=0>), ('int10', <tf.Tensor: id=82539, shape=(), dtype=int32, numpy=0>), ('int11', <tf.Tensor: id=82540, shape=(), dtype=int32, numpy=0>), ('int12', <tf.Tensor: id=82541, shape=(), dtype=int32, numpy=0>), ('int13', <tf.Tensor: id=82542, shape=(), dtype=int32, numpy=0>), ('cat1', <tf.Tensor: id=82512, shape=(), dtype=string, numpy=b'0



bq streams: !!!!!!!!!!!!!!!!!!!!!!
["projects/alekseyv-scalableai-dev/locations/us/streams/EgxzaHhoanpHT2hlcTEaAmlyKAE"]
big query read session returned 10 streams
<DatasetV1Adapter shapes: ({cat1: (), cat10: (), cat11: (), cat12: (), cat13: (), cat14: (), cat15: (), cat16: (), cat17: (), cat18: (), cat19: (), cat2: (), cat20: (), cat21: (), cat22: (), cat23: (), cat24: (), cat25: (), cat26: (), cat3: (), cat4: (), cat5: (), cat6: (), cat7: (), cat8: (), cat9: (), int1: (), int10: (), int11: (), int12: (), int13: (), int2: (), int3: (), int4: (), int5: (), int6: (), int7: (), int8: (), int9: ()}, ()), types: ({cat1: tf.string, cat10: tf.string, cat11: tf.string, cat12: tf.string, cat13: tf.string, cat14: tf.string, cat15: tf.string, cat16: tf.string, cat17: tf.string, cat18: tf.string, cat19: tf.string, cat2: tf.string, cat20: tf.string, cat21: tf.string, cat22: tf.string, cat23: tf.string, cat24: tf.string, cat25: tf.string, cat26: tf.string, cat3: tf.string, cat4: tf.string, cat5: tf

In [73]:
@tf.function
def transofrom_row_gcs(row_dict):
  dict_without_label = row_dict.copy()
  label = dict_without_label.pop('label')
  for field in CSV_SCHEMA:
    if (field.name.startswith('int')):
        if dict_without_label[field.name] == 0:
            value = float(dict_without_label[field.name])
            dict_without_label[field.name] = (value - 0) / 1
        else:
            dict_without_label[field.name] = 0.0 # don't use normalized 0 value for nulls
    elif (field.name != 'label'):
      dict_without_label[field.name] = str(dict_without_label[field.name])

  dict_with_esitmator_keys = { k:v for k,v in dict_without_label.items() }
  return (dict_with_esitmator_keys, label)

def transofrom_row_gcs2(row_dict):
  dict_without_label = row_dict.copy()
  label = tf.reshape(dict_without_label.pop('label'), [])
  for field in CSV_SCHEMA:
    if (field.name.startswith('int')):
        if dict_without_label[field.name] == 0:
            value = float(tf.reshape(dict_without_label[field.name], []))
            dict_without_label[field.name] = (value - 0) / 1
        else:
            dict_without_label[field.name] = 0.0 # don't use normalized 0 value for nulls
    elif (field.name != 'label'):
      dict_without_label[field.name] = str(tf.reshape(dict_without_label[field.name], []))

  dict_with_esitmator_keys = { k:v for k,v in dict_without_label.items() }
  return (dict_with_esitmator_keys, label)

def read_gcs(table_name, **kwargs):
  gcs_filename_glob = 'gs://alekseyv-scalableai-dev-public-bucket/criteo_kaggle_from_bq/{}*'.format(table_name)
  selected_columns = list(field.name for field in CSV_SCHEMA)
  column_names = selected_columns + ['row_hash']

  dataset = tf.data.experimental.make_csv_dataset(
      gcs_filename_glob,
      batch_size=1,
      column_names = column_names,
      select_columns = selected_columns,
      num_epochs=1,
      field_delim='\t',
      header=False,
      ignore_errors=False,
      **kwargs)
  return dataset

dataset = read_gcs('test_small') \
 .apply(tf.data.experimental.unbatch()) \
 .map(lambda row: transofrom_row(row)).take(5)

row_index = 0
for row in dataset:
  print("row %d: %s \n\n" % (row_index, row))
  row_index += 1    







TypeError: in converted code:

    <ipython-input-73-3e390697af60>:53 <lambda>
        .map(lambda row: transofrom_row(row)).take(5)

    TypeError: transofrom_row() missing 2 required positional arguments: 'mean_dict' and 'std_dict'


In [71]:
import sys
sys.version
import tensorflow as tf
print(tf.version)
#tf.dtypes.float32

AttributeError: module 'tensorflow_core._api.v2.version' has no attribute 'version'

In [12]:
%pip --version

pip 19.3.1 from /usr/local/lib/python3.7/dist-packages/pip (python 3.7)
Note: you may need to restart the kernel to use updated packages.


In [13]:
training_ds = read_bigquery('criteo_kaggle','days').take(20).shuffle(2)
row_index = 0
for row in training_ds.take(10):
    #print(">>>>>> row['title'] %s" % row['title'] )
    print(">>>>>> row %d: %s" % (row_index, row))
    row_index += 1




bq streams: !!!!!!!!!!!!!!!!!!!!!!
["projects/alekseyv-scalableai-dev/locations/us/streams/EghhZWh5azlWeBoCaXIoAQ" "projects/alekseyv-scalableai-dev/locations/us/streams/CAESCGFlaHlrOVZ4GgJpcigB" "projects/alekseyv-scalableai-dev/locations/us/streams/CAISCGFlaHlrOVZ4GgJpcigB" ... "projects/alekseyv-scalableai-dev/locations/us/streams/CAcSCGFlaHlrOVZ4GgJpcigB" "projects/alekseyv-scalableai-dev/locations/us/streams/CAgSCGFlaHlrOVZ4GgJpcigB" "projects/alekseyv-scalableai-dev/locations/us/streams/CAkSCGFlaHlrOVZ4GgJpcigB"]
big query read session returned 10 streams
<DatasetV1Adapter shapes: ({cat1: (), cat10: (), cat11: (), cat12: (), cat13: (), cat14: (), cat15: (), cat16: (), cat17: (), cat18: (), cat19: (), cat2: (), cat20: (), cat21: (), cat22: (), cat23: (), cat24: (), cat25: (), cat26: (), cat3: (), cat4: (), cat5: (), cat6: (), cat7: (), cat8: (), cat9: (), int1: (), int10: (), int11: (), int12: (), int13: (), int2: (), int3: (), int4: (), int5: (), int6: (), int7: (), int8: (), int

>>>>>> row 5: ({'cat1': <tf.Tensor: id=639, shape=(), dtype=string, numpy=b'70d60005'>, 'cat10': <tf.Tensor: id=640, shape=(), dtype=string, numpy=b'0466803a'>, 'cat11': <tf.Tensor: id=641, shape=(), dtype=string, numpy=b'3899cbf0'>, 'cat12': <tf.Tensor: id=642, shape=(), dtype=string, numpy=b'd72a8b65'>, 'cat13': <tf.Tensor: id=643, shape=(), dtype=string, numpy=b'5bdabfc1'>, 'cat14': <tf.Tensor: id=644, shape=(), dtype=string, numpy=b'07d13a8f'>, 'cat15': <tf.Tensor: id=645, shape=(), dtype=string, numpy=b'0507b832'>, 'cat16': <tf.Tensor: id=646, shape=(), dtype=string, numpy=b'4b6488bc'>, 'cat17': <tf.Tensor: id=647, shape=(), dtype=string, numpy=b'e5ba7672'>, 'cat18': <tf.Tensor: id=648, shape=(), dtype=string, numpy=b'b76fb0de'>, 'cat19': <tf.Tensor: id=649, shape=(), dtype=string, numpy=b'21ddcdc9'>, 'cat2': <tf.Tensor: id=650, shape=(), dtype=string, numpy=b'3ab4d7f5'>, 'cat20': <tf.Tensor: id=651, shape=(), dtype=string, numpy=b'a458ea53'>, 'cat21': <tf.Tensor: id=652, shape=()

In [14]:
categorical_vocabulary_size_dict = get_vocabulary_size_dict()
#feature_columns = create_feature_columns(categorical_vocabulary_size_dict)



In [21]:
def create_input_layer(categorical_vocabulary_size_dict):
    numeric_feature_columns = list(tf.feature_column.numeric_column(field.name, dtype=tf.dtypes.float32)  for field in CSV_SCHEMA if field.field_type == 'INTEGER' and field.name != 'label')
    numerical_input_layers = {
       feature_column.name: tf.keras.layers.Input(name=feature_column.name, shape=(1,), dtype=tf.float32)
       for feature_column in numeric_feature_columns
    }
    categorical_feature_columns = list(create_categorical_feature_column(categorical_vocabulary_size_dict, key) for key, _ in categorical_vocabulary_size_dict.items())
    #print("categorical_feature_columns: " + str(categorical_feature_columns))
    categorical_input_layers = {
       feature_column.categorical_column.name: tf.keras.layers.Input(name=feature_column.categorical_column.name, shape=(), dtype=tf.string)
       for feature_column in categorical_feature_columns
    }
    #print("categorical_input_layers: " + str(categorical_input_layers))
    input_layers = numerical_input_layers.copy()
    input_layers.update(categorical_input_layers)

    return (input_layers, numeric_feature_columns + categorical_feature_columns)

def create_keras_model_functional(categorical_vocabulary_size_dict): 
    (feature_layer_inputs, feature_columns) = create_input_layer(categorical_vocabulary_size_dict)
    #print(input_layers)
    #x = tf.keras.layers.DenseFeatures(feature_columns)(input_layers)
    
    print("")
    print("feature_columns: " + str(feature_columns))
    print("")
    print("feature_layer_inputs: " + str(feature_layer_inputs))
    print("")
    
    feature_layer = tf.keras.layers.DenseFeatures(feature_columns)
    feature_layer_outputs = feature_layer(feature_layer_inputs)
    
    
    x = tf.keras.layers.Dense(2560, activation=tf.nn.relu)(feature_layer_outputs)
    x = tf.keras.layers.Dense(1024, activation=tf.nn.relu)(x)
    x = tf.keras.layers.Dense(256, activation=tf.nn.relu)(x)
    outputs = tf.keras.layers.Dense(1, activation=tf.nn.sigmoid)(x)
    #output = tf.squeeze(output, -1)
    inputs=[v for v in feature_layer_inputs.values()]
    
    print("inputs: " + str(inputs))
    print("outputs: " + str(outputs))
    
    model = tf.keras.Model(inputs=inputs, outputs=outputs)
    
    # Compile Keras model
    model.compile(
      # cannot use Adagrad with mirroredstartegy https://github.com/tensorflow/tensorflow/issues/19551
      #optimizer=tf.optimizers.Adagrad(learning_rate=0.05),
      optimizer=tf.optimizers.SGD(learning_rate=0.05),
      loss=tf.keras.losses.BinaryCrossentropy(),
      metrics=['accuracy'])
    print("model: " + str(model.summary()))    
    return model    
    

#strategy = tf.distribute.experimental.MultiWorkerMirroredStrategy() # doesn't work because of https://b.corp.google.com/issues/142700914
strategy = tf.distribute.MirroredStrategy()
#strategy = tf.distribute.OneDeviceStrategy(device="/cpu:0")
with strategy.scope():    
    model = create_keras_model_functional(categorical_vocabulary_size_dict)
    model_dir = './trained/criteo_kaggle7/model.joblib'
    #model.fit(training_ds)
    keras_estimator = tf.keras.estimator.model_to_estimator(
        keras_model=model, model_dir=model_dir)
    logging.info('!!!!!!!!!!!!! training MirroredStrategy on keras_estimator !!!!!!!!!!!!!!!!!!!')
    tf.estimator.train_and_evaluate(
        keras_estimator,
        train_spec=tf.estimator.TrainSpec(input_fn=input_fn, max_steps=20),
        eval_spec=tf.estimator.EvalSpec(input_fn=input_fn))






feature_columns: [NumericColumn(key='int1', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None), NumericColumn(key='int2', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None), NumericColumn(key='int3', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None), NumericColumn(key='int4', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None), NumericColumn(key='int5', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None), NumericColumn(key='int6', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None), NumericColumn(key='int7', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None), NumericColumn(key='int8', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None), NumericColumn(key='int9', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None), NumericColumn(key='int10', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None), NumericCol

inputs: [<tf.Tensor 'int1_6:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'int2_6:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'int3_6:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'int4_6:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'int5_6:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'int6_6:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'int7_6:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'int8_6:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'int9_6:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'int10_6:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'int11_6:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'int12_6:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'int13_6:0' shape=(None, 1) dtype=float32>, <tf.Tensor 'cat1_1:0' shape=(None,) dtype=string>, <tf.Tensor 'cat2_1:0' shape=(None,) dtype=string>, <tf.Tensor 'cat3_1:0' shape=(None,) dtype=string>, <tf.Tensor 'cat4_1:0' shape=(None,) dtype=string>, <tf.Tensor 'cat5_1:0' shape=(None,) dtype=string>, <tf.Tensor 'cat6_1:0' shape=(N

INFO:tensorflow:Using default config.


INFO:tensorflow:Using the Keras model provided.


INFO:tensorflow:Using the Keras model provided.


INFO:tensorflow:Using config: {'_model_dir': './trained/criteo_kaggle7/model.joblib', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_session_creation_timeout_secs': 7200, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7febac737c90>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


INFO:tensorflow:Using config: {'_model_dir': './trained/criteo_kaggle7/model.joblib', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_session_creation_timeout_secs': 7200, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7febac737c90>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


INFO:tensorflow:Not using Distribute Coordinator.


INFO:tensorflow:Not using Distribute Coordinator.


INFO:tensorflow:Running training and evaluation locally (non-distributed).


INFO:tensorflow:Running training and evaluation locally (non-distributed).


INFO:tensorflow:Start train and evaluate loop. The evaluate will happen after every checkpoint. Checkpoint frequency is determined based on RunConfig arguments: save_checkpoints_steps None or save_checkpoints_secs 600.


INFO:tensorflow:Start train and evaluate loop. The evaluate will happen after every checkpoint. Checkpoint frequency is determined based on RunConfig arguments: save_checkpoints_steps None or save_checkpoints_secs 600.


INFO:tensorflow:Calling model_fn.


INFO:tensorflow:Calling model_fn.


INFO:tensorflow:Done calling model_fn.


INFO:tensorflow:Done calling model_fn.


INFO:tensorflow:Warm-starting with WarmStartSettings: WarmStartSettings(ckpt_to_initialize_from='./trained/criteo_kaggle7/model.joblib/keras/keras_model.ckpt', vars_to_warm_start='.*', var_name_to_vocab_info={}, var_name_to_prev_var_name={})


INFO:tensorflow:Warm-starting with WarmStartSettings: WarmStartSettings(ckpt_to_initialize_from='./trained/criteo_kaggle7/model.joblib/keras/keras_model.ckpt', vars_to_warm_start='.*', var_name_to_vocab_info={}, var_name_to_prev_var_name={})


INFO:tensorflow:Warm-starting from: ./trained/criteo_kaggle7/model.joblib/keras/keras_model.ckpt


INFO:tensorflow:Warm-starting from: ./trained/criteo_kaggle7/model.joblib/keras/keras_model.ckpt


INFO:tensorflow:Warm-starting variables only in TRAINABLE_VARIABLES.


INFO:tensorflow:Warm-starting variables only in TRAINABLE_VARIABLES.


INFO:tensorflow:Warm-started 32 variables.


INFO:tensorflow:Warm-started 32 variables.


INFO:tensorflow:Create CheckpointSaverHook.


INFO:tensorflow:Create CheckpointSaverHook.


INFO:tensorflow:Graph was finalized.


INFO:tensorflow:Graph was finalized.


INFO:tensorflow:Running local_init_op.


INFO:tensorflow:Running local_init_op.


INFO:tensorflow:Done running local_init_op.


INFO:tensorflow:Done running local_init_op.


INFO:tensorflow:Saving checkpoints for 0 into ./trained/criteo_kaggle7/model.joblib/model.ckpt.


INFO:tensorflow:Saving checkpoints for 0 into ./trained/criteo_kaggle7/model.joblib/model.ckpt.


INFO:tensorflow:loss = 0.7062943, step = 0


INFO:tensorflow:loss = 0.7062943, step = 0


INFO:tensorflow:Saving checkpoints for 20 into ./trained/criteo_kaggle7/model.joblib/model.ckpt.


INFO:tensorflow:Saving checkpoints for 20 into ./trained/criteo_kaggle7/model.joblib/model.ckpt.


INFO:tensorflow:Calling model_fn.


INFO:tensorflow:Calling model_fn.


INFO:tensorflow:Done calling model_fn.


INFO:tensorflow:Done calling model_fn.


INFO:tensorflow:Starting evaluation at 2019-12-09T11:36:06Z


INFO:tensorflow:Starting evaluation at 2019-12-09T11:36:06Z


INFO:tensorflow:Graph was finalized.


INFO:tensorflow:Graph was finalized.


INFO:tensorflow:Restoring parameters from ./trained/criteo_kaggle7/model.joblib/model.ckpt-20


INFO:tensorflow:Restoring parameters from ./trained/criteo_kaggle7/model.joblib/model.ckpt-20


INFO:tensorflow:Running local_init_op.


INFO:tensorflow:Running local_init_op.


INFO:tensorflow:Done running local_init_op.


INFO:tensorflow:Done running local_init_op.


INFO:tensorflow:Evaluation [10/100]


INFO:tensorflow:Evaluation [10/100]


INFO:tensorflow:Evaluation [20/100]


INFO:tensorflow:Evaluation [20/100]


INFO:tensorflow:Evaluation [30/100]


INFO:tensorflow:Evaluation [30/100]


INFO:tensorflow:Evaluation [40/100]


INFO:tensorflow:Evaluation [40/100]


INFO:tensorflow:Evaluation [50/100]


INFO:tensorflow:Evaluation [50/100]


INFO:tensorflow:Evaluation [60/100]


INFO:tensorflow:Evaluation [60/100]


INFO:tensorflow:Evaluation [70/100]


INFO:tensorflow:Evaluation [70/100]


INFO:tensorflow:Evaluation [80/100]


INFO:tensorflow:Evaluation [80/100]


INFO:tensorflow:Evaluation [90/100]


INFO:tensorflow:Evaluation [90/100]


INFO:tensorflow:Evaluation [100/100]


INFO:tensorflow:Evaluation [100/100]


INFO:tensorflow:Finished evaluation at 2019-12-09-11:36:13


INFO:tensorflow:Finished evaluation at 2019-12-09-11:36:13


INFO:tensorflow:Saving dict for global step 20: accuracy = 0.5771875, global_step = 20, loss = 0.67990685


INFO:tensorflow:Saving dict for global step 20: accuracy = 0.5771875, global_step = 20, loss = 0.67990685


INFO:tensorflow:Saving 'checkpoint_path' summary for global step 20: ./trained/criteo_kaggle7/model.joblib/model.ckpt-20


INFO:tensorflow:Saving 'checkpoint_path' summary for global step 20: ./trained/criteo_kaggle7/model.joblib/model.ckpt-20


INFO:tensorflow:Loss for final step: 0.6742929.


INFO:tensorflow:Loss for final step: 0.6742929.


In [None]:
def create_categorical_feature_column(categorical_vocabulary_size_dict, key):
  hash_bucket_size = min(categorical_vocabulary_size_dict[key], 100000)
  # TODO: consider using categorical_column_with_vocabulary_list
  categorical_feature_column = tf.feature_column.categorical_column_with_hash_bucket(
    #KERAS_TO_ESTIMATOR_FEATURE_NAMES[key],

    hash_bucket_size,
    dtype=tf.dtypes.string
  )
  if hash_bucket_size < 10:
    return tf.feature_column.indicator_column(categorical_feature_column)

  embedding_feature_column = tf.feature_column.embedding_column(
      categorical_feature_column,
      int(min(50, math.floor(6 * hash_bucket_size**0.25))))
  return embedding_feature_column

def create_feature_columns(categorical_vocabulary_size_dict):
  feature_columns = []
  feature_columns.extend(list(tf.feature_column.numeric_column(KERAS_TO_ESTIMATOR_FEATURE_NAMES[field.name], dtype=tf.dtypes.float32)  for field in CSV_SCHEMA if field.field_type == 'INTEGER' and field.name != 'label'))
  feature_columns.extend(list(create_categorical_feature_column(categorical_vocabulary_size_dict, key) for key, _ in categorical_vocabulary_size_dict.items()))
  return feature_columns



def create_keras_model_sequential():
  categorical_vocabulary_size_dict = get_vocabulary_size_dict()
  feature_columns = create_feature_columns(categorical_vocabulary_size_dict)
  print("categorical_vocabulary_size_dict: " + str(categorical_vocabulary_size_dict))
  model = tf.keras.Sequential(
  [
      tf.keras.layers.DenseFeatures(feature_columns)
      tf.keras.layers.Dense(2560, activation=tf.nn.relu),
      tf.keras.layers.Dense(1024, activation=tf.nn.relu),
      tf.keras.layers.Dense(256, activation=tf.nn.relu),
      tf.keras.layers.Dense(1, activation=tf.nn.sigmoid)
  ])

  # Compile Keras model
  model.compile(
      # cannot use Adagrad with mirroredstartegy https://github.com/tensorflow/tensorflow/issues/19551
      #optimizer=tf.optimizers.Adagrad(learning_rate=0.05),
      optimizer=tf.optimizers.SGD(learning_rate=0.05),
      loss=tf.keras.losses.BinaryCrossentropy(),
      metrics=['accuracy'])
  # HACK: https://b.corp.google.com/issues/114035274
  #model._is_graph_network = True
  #model.summary()
  return model


model = create_keras_model_sequential()

model_dir = './trained/criteo_kaggle3/model.joblib'


keras_estimator = tf.keras.estimator.model_to_estimator(
    keras_model=model, model_dir=model_dir)

logging.info('!!!!!!!!!!!!! training MirroredStrategy on keras_estimator !!!!!!!!!!!!!!!!!!!')
tf.estimator.train_and_evaluate(
    keras_estimator,
    train_spec=tf.estimator.TrainSpec(input_fn=input_fn, max_steps=2),
    eval_spec=tf.estimator.EvalSpec(input_fn=input_fn))

In [32]:
model_dir = './trained/criteo_kaggle_estimator/model.joblib'

def create_categorical_feature_column(categorical_vocabulary_size_dict, key):
  hash_bucket_size = min(categorical_vocabulary_size_dict[key], 100000)
  # TODO: consider using categorical_column_with_vocabulary_list
  categorical_feature_column = tf.feature_column.categorical_column_with_hash_bucket(
    #KERAS_TO_ESTIMATOR_FEATURE_NAMES[key],
    key,
    hash_bucket_size,
    dtype=tf.dtypes.string
  )
  if hash_bucket_size < 10:
    return tf.feature_column.indicator_column(categorical_feature_column)

  embedding_feature_column = tf.feature_column.embedding_column(
      categorical_feature_column,
      int(min(50, math.floor(6 * hash_bucket_size**0.25))))
  return embedding_feature_column

def create_feature_columns(categorical_vocabulary_size_dict):
  feature_columns = []
  feature_columns.extend(list(tf.feature_column.numeric_column(field.name, dtype=tf.dtypes.float32)  for field in CSV_SCHEMA if field.field_type == 'INTEGER' and field.name != 'label'))
  feature_columns.extend(list(create_categorical_feature_column(categorical_vocabulary_size_dict, key) for key, _ in categorical_vocabulary_size_dict.items()))
  return feature_columns

categorical_vocabulary_size_dict = get_vocabulary_size_dict()
feature_columns = create_feature_columns(categorical_vocabulary_size_dict)

config = tf.estimator.RunConfig(
        train_distribute=tf.distribute.MirroredStrategy(),
        eval_distribute=tf.distribute.MirroredStrategy())
config = tf.estimator.RunConfig(
        train_distribute=tf.distribute.OneDeviceStrategy(device="/cpu:0"),
        eval_distribute=tf.distribute.OneDeviceStrategy(device="/cpu:0"))

#model_dir=model_dir,
estimator = tf.estimator.DNNClassifier(
    optimizer=tf.optimizers.SGD(learning_rate=0.05),
    feature_columns=feature_columns,
    hidden_units=[2560, 1024, 256],
    
    n_classes=2,
    config=config)

tf.estimator.train_and_evaluate(
    estimator,
    train_spec=tf.estimator.TrainSpec(input_fn=input_fn, max_steps=2),
    eval_spec=tf.estimator.EvalSpec(input_fn=input_fn))











INFO:tensorflow:Initializing RunConfig with distribution strategies.


INFO:tensorflow:Initializing RunConfig with distribution strategies.


INFO:tensorflow:Not using Distribute Coordinator.


INFO:tensorflow:Not using Distribute Coordinator.


INFO:tensorflow:Initializing RunConfig with distribution strategies.


INFO:tensorflow:Initializing RunConfig with distribution strategies.


INFO:tensorflow:Not using Distribute Coordinator.


INFO:tensorflow:Not using Distribute Coordinator.






INFO:tensorflow:Using config: {'_model_dir': '/tmp/tmprhp54hnl', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': <tensorflow.python.distribute.one_device_strategy.OneDeviceStrategy object at 0x7febc4582090>, '_device_fn': None, '_protocol': None, '_eval_distribute': <tensorflow.python.distribute.one_device_strategy.OneDeviceStrategy object at 0x7febd4738d10>, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_session_creation_timeout_secs': 7200, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7febd47382d0>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluatio

INFO:tensorflow:Using config: {'_model_dir': '/tmp/tmprhp54hnl', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': <tensorflow.python.distribute.one_device_strategy.OneDeviceStrategy object at 0x7febc4582090>, '_device_fn': None, '_protocol': None, '_eval_distribute': <tensorflow.python.distribute.one_device_strategy.OneDeviceStrategy object at 0x7febd4738d10>, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_session_creation_timeout_secs': 7200, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7febd47382d0>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluatio

INFO:tensorflow:Not using Distribute Coordinator.


INFO:tensorflow:Not using Distribute Coordinator.


INFO:tensorflow:Running training and evaluation locally (non-distributed).


INFO:tensorflow:Running training and evaluation locally (non-distributed).


INFO:tensorflow:Start train and evaluate loop. The evaluate will happen after every checkpoint. Checkpoint frequency is determined based on RunConfig arguments: save_checkpoints_steps None or save_checkpoints_secs 600.


INFO:tensorflow:Start train and evaluate loop. The evaluate will happen after every checkpoint. Checkpoint frequency is determined based on RunConfig arguments: save_checkpoints_steps None or save_checkpoints_secs 600.


INFO:tensorflow:Calling model_fn.


INFO:tensorflow:Calling model_fn.


INFO:tensorflow:Done calling model_fn.


INFO:tensorflow:Done calling model_fn.


INFO:tensorflow:Create CheckpointSaverHook.


INFO:tensorflow:Create CheckpointSaverHook.


INFO:tensorflow:Graph was finalized.


INFO:tensorflow:Graph was finalized.


INFO:tensorflow:Running local_init_op.


INFO:tensorflow:Running local_init_op.


INFO:tensorflow:Done running local_init_op.


INFO:tensorflow:Done running local_init_op.


INFO:tensorflow:Saving checkpoints for 0 into /tmp/tmprhp54hnl/model.ckpt.


INFO:tensorflow:Saving checkpoints for 0 into /tmp/tmprhp54hnl/model.ckpt.


INFO:tensorflow:loss = 0.72376955, step = 0


INFO:tensorflow:loss = 0.72376955, step = 0


INFO:tensorflow:Saving checkpoints for 2 into /tmp/tmprhp54hnl/model.ckpt.


INFO:tensorflow:Saving checkpoints for 2 into /tmp/tmprhp54hnl/model.ckpt.


INFO:tensorflow:Calling model_fn.


INFO:tensorflow:Calling model_fn.


INFO:tensorflow:Done calling model_fn.


INFO:tensorflow:Done calling model_fn.


INFO:tensorflow:Starting evaluation at 2019-12-10T15:14:55Z


INFO:tensorflow:Starting evaluation at 2019-12-10T15:14:55Z


INFO:tensorflow:Graph was finalized.


INFO:tensorflow:Graph was finalized.


INFO:tensorflow:Restoring parameters from /tmp/tmprhp54hnl/model.ckpt-2


INFO:tensorflow:Restoring parameters from /tmp/tmprhp54hnl/model.ckpt-2


INFO:tensorflow:Running local_init_op.


INFO:tensorflow:Running local_init_op.


INFO:tensorflow:Done running local_init_op.


INFO:tensorflow:Done running local_init_op.


INFO:tensorflow:Evaluation [10/100]


INFO:tensorflow:Evaluation [10/100]


INFO:tensorflow:Evaluation [20/100]


INFO:tensorflow:Evaluation [20/100]


INFO:tensorflow:Evaluation [30/100]


INFO:tensorflow:Evaluation [30/100]


INFO:tensorflow:Evaluation [40/100]


INFO:tensorflow:Evaluation [40/100]


INFO:tensorflow:Evaluation [50/100]


INFO:tensorflow:Evaluation [50/100]


INFO:tensorflow:Evaluation [60/100]


INFO:tensorflow:Evaluation [60/100]


INFO:tensorflow:Evaluation [70/100]


INFO:tensorflow:Evaluation [70/100]


INFO:tensorflow:Evaluation [80/100]


INFO:tensorflow:Evaluation [80/100]


INFO:tensorflow:Evaluation [90/100]


INFO:tensorflow:Evaluation [90/100]


INFO:tensorflow:Evaluation [100/100]


INFO:tensorflow:Evaluation [100/100]


INFO:tensorflow:Finished evaluation at 2019-12-10-15:15:02


INFO:tensorflow:Finished evaluation at 2019-12-10-15:15:02


INFO:tensorflow:Saving dict for global step 2: accuracy = 0.42476562, accuracy_baseline = 0.58671874, auc = 0.49523786, auc_precision_recall = 0.4172819, average_loss = 0.6997528, global_step = 2, label/mean = 0.41328126, loss = 0.6997528, precision = 0.41308177, prediction/mean = 0.5165568, recall = 0.9311909


INFO:tensorflow:Saving dict for global step 2: accuracy = 0.42476562, accuracy_baseline = 0.58671874, auc = 0.49523786, auc_precision_recall = 0.4172819, average_loss = 0.6997528, global_step = 2, label/mean = 0.41328126, loss = 0.6997528, precision = 0.41308177, prediction/mean = 0.5165568, recall = 0.9311909


INFO:tensorflow:Saving 'checkpoint_path' summary for global step 2: /tmp/tmprhp54hnl/model.ckpt-2


INFO:tensorflow:Saving 'checkpoint_path' summary for global step 2: /tmp/tmprhp54hnl/model.ckpt-2


INFO:tensorflow:Loss for final step: 0.7019203.


INFO:tensorflow:Loss for final step: 0.7019203.


({'accuracy': 0.42476562,
  'accuracy_baseline': 0.58671874,
  'auc': 0.49523786,
  'auc_precision_recall': 0.4172819,
  'average_loss': 0.6997528,
  'label/mean': 0.41328126,
  'loss': 0.6997528,
  'precision': 0.41308177,
  'prediction/mean': 0.5165568,
  'recall': 0.9311909,
  'global_step': 2},
 [])