In [34]:
%%writefile younseun_input/train_data.csv
country,age,sex,height
kr,10,boy,120.0
kr,10,girl,110.0
kr,13,boy,150.0
kr,13,girl,140.0
kr,16,boy,170.0
kr,16,girl,155.0
kr,19,boy,180.0
kr,19,girl,165.0
kr,22,boy,182.0
kr,22,girl,167.0
ca,10,boy,122.0
ca,10,girl,113.0
ca,13,boy,153.0
ca,13,girl,143.0
ca,16,boy,173.0
ca,16,girl,157.0
ca,19,boy,182.0
ca,19,girl,165.0
ca,22,boy,185.0
ca,22,girl,169.0

Overwriting younseun_input/train_data.csv


In [35]:
import tensorflow as tf
import pandas as pd
import numpy as np
import shutil
print(tf.__version__)


1.15.0


In [36]:
CSV_COLUMN_NAMES = ["country","age","sex","height"]
CSV_DEFAULTS = [['z1'],[10],['none'],[150]]
CSV_COLUMN_NAMES,CSV_DEFAULTS

(['country', 'age', 'sex', 'height'], [['z1'], [10], ['none'], [150]])

In [37]:
def parse_row(row):
    fields = tf.decode_csv(records = row, record_defaults = CSV_DEFAULTS)
    features = dict(zip(CSV_COLUMN_NAMES, fields))
    label = features.pop("height")
    return features, label

In [38]:
def read_dataset(csv_path):
    dataset = tf.data.TextLineDataset(filenames = csv_path).skip(count = 1) # skip header
    dataset = dataset.map(map_func = parse_row)
    return dataset

In [39]:
def train_input_fn(csv_path, batch_size = 3):
    dataset = read_dataset(csv_path)
    dataset = dataset.shuffle(buffer_size = 1000).repeat(count = None).batch(batch_size = batch_size)
    print(dataset)
    return dataset

In [40]:
def eval_input_fn(csv_path, batch_size = 3):
    dataset = read_dataset(csv_path)
    dataset = dataset.batch(batch_size = batch_size)
    return dataset

In [41]:
FEATURE_NAMES = CSV_COLUMN_NAMES[:-1] # all but first column

feature_cols = [tf.feature_column.categorical_column_with_vocabulary_list(key = 'country',
                                                       vocabulary_list = ['kr','ca']) ,
                tf.feature_column.numeric_column(key = 'age') ,
                tf.feature_column.categorical_column_with_identity(key = 'sex', num_buckets = 3) ,
                ]
feature_cols

[VocabularyListCategoricalColumn(key='country', vocabulary_list=('kr', 'ca'), dtype=tf.string, default_value=-1, num_oov_buckets=0),
 NumericColumn(key='age', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None),
 IdentityCategoricalColumn(key='sex', number_buckets=3, default_value=None)]

In [42]:
# Data 확인
df_train = pd.read_csv(filepath_or_buffer = "./younseun_input/train_data.csv")
df_train

Unnamed: 0,country,age,sex,height
0,kr,10,boy,120.0
1,kr,10,girl,110.0
2,kr,13,boy,150.0
3,kr,13,girl,140.0
4,kr,16,boy,170.0
5,kr,16,girl,155.0
6,kr,19,boy,180.0
7,kr,19,girl,165.0
8,kr,22,boy,182.0
9,kr,22,girl,167.0


In [43]:
def serving_input_receiver_fn():
    receiver_tensors = {'country':tf.placeholder(dtype = tf.string,shape=[None]),
                        'age'    :tf.placeholder(dtype = tf.float32,shape=[None]),
                        'sex'    :tf.placeholder(dtype = tf.stgring,shape=[None]),
                        }
    features = receiver_tensors
    return tf.estimator.export.ServingInputReceiver(features = features, receiver_tensors = receiver_tensors)


In [44]:
OUTDIR = "younseun_trained"

config = tf.estimator.RunConfig(
    model_dir=OUTDIR,
    save_summary_steps=100,
    save_checkpoints_steps=2000
)

In [45]:
myopt = tf.train.AdamOptimizer(learning_rate=0.01)
model = tf.estimator.DNNRegressor(model_dir=OUTDIR,
                                  hidden_units=[10, 10],
                                  feature_columns=feature_cols,
                                  activation_fn=tf.nn.relu,
                                  optimizer=myopt,
                                  config = config)

INFO:tensorflow:Using config: {'_tf_random_seed': None, '_log_step_count_steps': 100, '_master': '', '_keep_checkpoint_max': 5, '_train_distribute': None, '_save_summary_steps': 100, '_session_creation_timeout_secs': 7200, '_global_id_in_cluster': 0, '_evaluation_master': '', '_task_type': 'worker', '_experimental_distribute': None, '_save_checkpoints_steps': 2000, '_task_id': 0, '_experimental_max_worker_delay_secs': None, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7f339c288438>, '_device_fn': None, '_num_ps_replicas': 0, '_eval_distribute': None, '_num_worker_replicas': 1, '_protocol': None, '_model_dir': 'younseun_trained', '_service': None, '_keep_checkpoint_every_n_hours': 10000, '_is_chief': True, '_save_checkpoints_secs': None}


In [46]:
train_spec = tf.estimator.TrainSpec(
    input_fn = lambda: train_input_fn('./younseun_input/train_data.csv'),
    max_steps = 500
)

In [47]:
exporter = tf.estimator.FinalExporter('./exporter', serving_input_receiver_fn=serving_input_receiver_fn)

In [48]:
eval_spec = tf.estimator.EvalSpec(
    input_fn = lambda: eval_input_fn("./younseun_input/train_data.csv"),
    steps = None,
    start_delay_secs = 1,
    throttle_secs = 1,
    exporters = exporter,
)

In [49]:
tf.logging.set_verbosity(tf.logging.INFO) 
shutil.rmtree(path = OUTDIR, ignore_errors = True)
tf.summary.FileWriterCache.clear() # ensure filewriter cache is clear for TensorBoard events file

tf.estimator.train_and_evaluate(estimator = model, 
                                train_spec = train_spec, 
                                eval_spec = eval_spec)

INFO:tensorflow:Not using Distribute Coordinator.
INFO:tensorflow:Running training and evaluation locally (non-distributed).
INFO:tensorflow:Start train and evaluate loop. The evaluate will happen after every checkpoint. Checkpoint frequency is determined based on RunConfig arguments: save_checkpoints_steps 2000 or save_checkpoints_secs None.
<DatasetV1Adapter shapes: ({country: (?,), age: (?,), sex: (?,)}, (?,)), types: ({country: tf.string, age: tf.int32, sex: tf.string}, tf.int32)>
INFO:tensorflow:Calling model_fn.


ValueError: Items of feature_columns must be a <class 'tensorflow.python.feature_column.feature_column_v2.DenseColumn'>. You can wrap a categorical column with an embedding_column or indicator_column. Given: VocabularyListCategoricalColumn(key='country', vocabulary_list=('kr', 'ca'), dtype=tf.string, default_value=-1, num_oov_buckets=0)