# tf.estimator Quickstart
https://www.tensorflow.org/get_started/estimator

在第一篇《01-Getting Started With TensorFlow.ipynb》中就已经涉及到estimator，当时没有展开，这一节中会详细介绍。

In [1]:
import os
import urllib

import numpy as np
import tensorflow as tf
import urllib.request


# Data sets
# 原来代码是文件名，没有带路径，这里添加绝对路径，以便任何方式执行代码都没有问题。
IRIS_TRAINING = "/home/w/tmp/tensorflow/05-tf.estimator Quickstart/iris_training.csv"
IRIS_TRAINING_URL = "http://download.tensorflow.org/data/iris_training.csv"

IRIS_TEST = "/home/w/tmp/tensorflow/05-tf.estimator Quickstart/iris_test.csv"
IRIS_TEST_URL = "http://download.tensorflow.org/data/iris_test.csv"

In [2]:
# 下载训练集
if not os.path.exists(IRIS_TRAINING):
    with urllib.request.urlopen(IRIS_TRAINING_URL) as url:
        with open(IRIS_TRAINING,"wb") as f:
            f.write(url.read())

# 下载测试集            
if not os.path.exists(IRIS_TEST):
    with urllib.request.urlopen(IRIS_TEST_URL) as url: 
        with open(IRIS_TEST, "wb") as f:
            f.write(url.read())            

In [3]:
# load databases
training_set = tf.contrib.learn.datasets.base.load_csv_with_header(
    filename=IRIS_TRAINING,
    target_dtype=np.int,
    features_dtype=np.float32)

test_set = tf.contrib.learn.datasets.base.load_csv_with_header(
    filename=IRIS_TEST,
    target_dtype=np.int,
    features_dtype=np.float32)

# Specify that all features have real-value data
feature_columns = [tf.feature_column.numeric_column('x',shape=[4])]


In [8]:
# 建立 3层 DNN , 分别为 10,20,10  个单元
classifier=tf.estimator.DNNClassifier(feature_columns = feature_columns, #
                                     hidden_units=[10,20,10],
                                     n_classes=3,
                                     model_dir='/home/w/tmp/tensorflow/05-tf.estimator Quickstart/iris_model')


INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': '/home/w/tmp/tensorflow/05-tf.estimator Quickstart/iris_model', '_tf_random_seed': 1, '_save_summary_steps': 100, '_save_checkpoints_secs': 600, '_save_checkpoints_steps': None, '_session_config': None, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100}


In [9]:
# define train inputs
train_input_fn=tf.estimator.inputs.numpy_input_fn(
    x={'x':np.array(training_set.data)},
    y=np.array(training_set.target),
    num_epochs=None,
    shuffle=True)

# train model
classifier.train(input_fn=train_input_fn,steps=2000)


INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Restoring parameters from /home/w/tmp/tensorflow/05-tf.estimator Quickstart/iris_model\model.ckpt-4000
INFO:tensorflow:Saving checkpoints for 4001 into /home/w/tmp/tensorflow/05-tf.estimator Quickstart/iris_model\model.ckpt.
INFO:tensorflow:loss = 3.31549, step = 4001
INFO:tensorflow:global_step/sec: 484.828
INFO:tensorflow:loss = 6.10325, step = 4101 (0.207 sec)
INFO:tensorflow:global_step/sec: 518.832
INFO:tensorflow:loss = 12.3425, step = 4201 (0.193 sec)
INFO:tensorflow:global_step/sec: 504.42
INFO:tensorflow:loss = 7.85241, step = 4301 (0.198 sec)
INFO:tensorflow:global_step/sec: 505.695
INFO:tensorflow:loss = 6.06133, step = 4401 (0.198 sec)
INFO:tensorflow:global_step/sec: 516.152
INFO:tensorflow:loss = 4.94475, step = 4501 (0.193 sec)
INFO:tensorflow:global_step/sec: 499.375
INFO:tensorflow:loss = 5.67336, step = 4601 (0.201 sec)
INFO:tensorflow:global_step/sec: 520.182
INFO:tensorflow:loss = 15.051, step = 4701 (0.192

<tensorflow.python.estimator.canned.dnn.DNNClassifier at 0x1a12a367b70>

In [10]:
# Define the test inputs
test_input_fn=tf.estimator.inputs.numpy_input_fn(
    x={'x':np.array(test_set.data)},
    y=np.array(test_set.target),
    num_epochs=1,
    shuffle=False)

# Evaluate accuracy.
accuracy_score = classifier.evaluate(input_fn=test_input_fn)["accuracy"]

"\nTest Accuracy: {0:f}\n".format(accuracy_score)


INFO:tensorflow:Starting evaluation at 2017-09-02-21:12:28
INFO:tensorflow:Restoring parameters from /home/w/tmp/tensorflow/05-tf.estimator Quickstart/iris_model\model.ckpt-6000
INFO:tensorflow:Finished evaluation at 2017-09-02-21:12:28
INFO:tensorflow:Saving dict for global step 6000: accuracy = 0.966667, average_loss = 0.0606162, global_step = 6000, loss = 1.81849


'\nTest Accuracy: 0.966667\n'

In [11]:
# Classify two new flower samples.
new_samples = np.array(
  [[6.4, 3.2, 4.5, 1.5],
   [5.8, 3.1, 5.0, 1.7]], dtype=np.float32)
predict_input_fn = tf.estimator.inputs.numpy_input_fn(
  x={"x": new_samples},
  num_epochs=1,
  shuffle=False)

predictions = list(classifier.predict(input_fn=predict_input_fn))
predicted_classes = [p["classes"] for p in predictions]

print(
  "New Samples, Class Predictions:    {}\n"
  .format(predicted_classes))


INFO:tensorflow:Restoring parameters from /home/w/tmp/tensorflow/05-tf.estimator Quickstart/iris_model\model.ckpt-6000
New Samples, Class Predictions:    [array([b'1'], dtype=object), array([b'2'], dtype=object)]



## 完整代码

In [3]:
import os
import urllib

import numpy as np
import tensorflow as tf
import urllib.request

# 限制显卡内存
config = tf.ConfigProto(allow_soft_placement=True)
gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.9)
config.gpu_options.allow_growth = True
sess = tf.Session(config=config)

# Data sets
# 原来代码是文件名，没有带路径，这里添加绝对路径，以便任何方式执行代码都没有问题。
IRIS_TRAINING = "/home/w/tmp/tensorflow/05-tf.estimator Quickstart/iris_training.csv"
IRIS_TRAINING_URL = "http://download.tensorflow.org/data/iris_training.csv"

IRIS_TEST = "/home/w/tmp/tensorflow/05-tf.estimator Quickstart/iris_test.csv"
IRIS_TEST_URL = "http://download.tensorflow.org/data/iris_test.csv"

# 下载训练集
if not os.path.exists(IRIS_TRAINING):
    with urllib.request.urlopen(IRIS_TRAINING_URL) as url:
        with open(IRIS_TRAINING,"wb") as f:
            f.write(url.read())

# 下载测试集            
if not os.path.exists(IRIS_TEST):
    with urllib.request.urlopen(IRIS_TEST_URL) as url: 
        with open(IRIS_TEST, "wb") as f:
            f.write(url.read())            

In [2]:
# 导入数据
training_set = tf.contrib.learn.datasets.base.load_csv_with_header(
    filename=IRIS_TRAINING,
    target_dtype=np.int,
    features_dtype=np.float32)

test_set = tf.contrib.learn.datasets.base.load_csv_with_header(
    filename=IRIS_TEST,
    target_dtype=np.int,
    features_dtype=np.float32)

# 定义输入数据集
train_input_fn=tf.estimator.inputs.numpy_input_fn(
    x={'x':np.array(training_set.data)},
    y=np.array(training_set.target),
    num_epochs=None,
    shuffle=True)

test_input_fn=tf.estimator.inputs.numpy_input_fn(
    x={'x':np.array(test_set.data)},
    y=np.array(test_set.target),
    num_epochs=1,
    shuffle=False)


# 定义输入数据的维度
feature_columns = [tf.feature_column.numeric_column('x',shape=[4])]

# 建立DNN网络，
# hidden_units : 3层 DNN , 分别为 10,20,10  个单元
# n_classes=3,分类结果是3种
classifier=tf.estimator.DNNClassifier(feature_columns = feature_columns, 
                                     hidden_units=[10,20,10],
                                     n_classes=3,
                                     model_dir='/home/w/tmp/tensorflow/05-tf.estimator Quickstart/iris_model')


# 训练模型
classifier.train(input_fn=train_input_fn,steps=2000)


# 评估模型准确性
accuracy_score = classifier.evaluate(input_fn=test_input_fn)["accuracy"]

print("\nTest Accuracy: {0:f}\n".format(accuracy_score))

# Classify two new flower samples.
new_samples = np.array(
  [[6.4, 3.2, 4.5, 1.5],
   [5.8, 3.1, 5.0, 1.7]], dtype=np.float32)
predict_input_fn = tf.estimator.inputs.numpy_input_fn(
  x={"x": new_samples},
  num_epochs=1,
  shuffle=False)

predictions = list(classifier.predict(input_fn=predict_input_fn))
predicted_classes = [p["classes"] for p in predictions]

print("New Samples, Class Predictions:    {}\n".format(predicted_classes))


SyntaxError: invalid syntax (<ipython-input-2-1ad6edb5250f>, line 31)

## 练习版

In [1]:
# 导入数据
train_set=
test_set=

# 定义输入数据集
train_input_fn=
test_input_fn=

# 定义输入数据的维度
feature_columns=

# 定义模型：建立3层DNN模型，每层分别为10,20,10个单元
classifter=

# 训练模型
classifter.train(input_fn=train_input_fn,steps=2000)

# 评估模型准确性
accuracy_score=classifter.evaluate()

print(accuracy_score)


SyntaxError: invalid syntax (<ipython-input-1-d91f75b38196>, line 2)

In [5]:
# 导入数据
training_set = tf.contrib.learn.datasets.base.load_csv_with_header(
    filename=IRIS_TRAINING,
    target_dtype=np.int,
    features_dtype=np.float32)

test_set = tf.contrib.learn.datasets.base.load_csv_with_header(
    filename=IRIS_TEST,
    target_dtype=np.int,
    features_dtype=np.float32)

# 定义输入数据集
train_input_fn=tf.estimator.inputs.numpy_input_fn(
    x={'x':np.array(training_set.data)},
    y=np.array(training_set.target),
    num_epochs=None,
    shuffle=True)

test_input_fn=tf.estimator.inputs.numpy_input_fn(
    x={'x':np.array(test_set.data)},
    y=np.array(test_set.target),
    num_epochs=1,
    shuffle=False)


# 定义输入数据的维度
feature_columns = [tf.feature_column.numeric_column('x',shape=[4])]

# 建立 3层 DNN , 分别为 10,20,10  个单元
classifier=tf.estimator.DNNClassifier(feature_columns = feature_columns, 
                                     hidden_units=[10,20,10],
                                     n_classes=3)


# 训练模型
classifier.train(input_fn=train_input_fn,steps=2000)


# 评估模型准确性
accuracy_score = classifier.evaluate(input_fn=test_input_fn)["accuracy"]

print("\nTest Accuracy: {0:f}\n".format(accuracy_score))

# Classify two new flower samples.
new_samples = np.array(
  [[6.4, 3.2, 4.5, 1.5],
   [5.8, 3.1, 5.0, 1.7]], dtype=np.float32)
predict_input_fn = tf.estimator.inputs.numpy_input_fn(
  x={"x": new_samples},
  num_epochs=1,
  shuffle=False)

predictions = list(classifier.predict(input_fn=predict_input_fn))
predicted_classes = [p["classes"] for p in predictions]

print("New Samples, Class Predictions:    {}\n".format(predicted_classes))


INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': 'C:\\Users\\w\\AppData\\Local\\Temp\\tmphtd2ml81', '_tf_random_seed': 1, '_save_summary_steps': 100, '_save_checkpoints_secs': 600, '_save_checkpoints_steps': None, '_session_config': None, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100}
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Saving checkpoints for 1 into C:\Users\w\AppData\Local\Temp\tmphtd2ml81\model.ckpt.
INFO:tensorflow:loss = 303.836, step = 1
INFO:tensorflow:global_step/sec: 449.888
INFO:tensorflow:loss = 9.76834, step = 101 (0.224 sec)
INFO:tensorflow:global_step/sec: 473.343
INFO:tensorflow:loss = 14.4561, step = 201 (0.212 sec)
INFO:tensorflow:global_step/sec: 450.902
INFO:tensorflow:loss = 15.1062, step = 301 (0.222 sec)
INFO:tensorflow:global_step/sec: 484.891
INFO:tensorflow:loss = 6.38331, step = 401 (0.205 sec)
INFO:tensorflow:global_step/sec: 514.82
INFO:tensorflow:loss