In [1]:
import tensorflow as tf
from tensorflow import keras
import numpy as np
import sklearn 
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
import os
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split

In [2]:
titanic = "./data/titanic.csv"
titanic_data = pd.read_csv(titanic)

In [3]:
# 数据预处理，填空值
titanic_data = titanic_data.fillna(method="bfill")
titanic_data = titanic_data.dropna()

In [4]:
# 生成label，data
y = titanic_data.pop("Survived")
X = titanic_data

In [5]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=666)

print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)

(623, 11)
(267, 11)
(623,)
(267,)


In [6]:
# 生成dataset
def make_dataset(X, y, shuffle=True, epoches=100, batch_size=32):
    # from_tensor_slices
    dataset = tf.data.Dataset.from_tensor_slices((dict(X), y))
    if shuffle:
        dataset = dataset.shuffle(100)
    dataset = dataset.repeat(epoches).batch(batch_size)
    return dataset

In [7]:
# 生成标签类，分为分类与数值
feature_columns = []
category_columns = ["Pclass", "Sex", "SibSp", "Parch", "Embarked"]
numeric_columns = ["Age", "Fare"]

# 类别：生成one-hot编码
for category in category_columns:
    vocab = titanic_data[category].unique()
    print(category, vocab)
    feature_columns.append(
            tf.feature_column.indicator_column(
                tf.feature_column.categorical_column_with_vocabulary_list(category, vocab)))
    
for numeric in numeric_columns:
    feature_columns.append(tf.feature_column.numeric_column(numeric))

Pclass [3 1 2]
Sex ['male' 'female']
SibSp [1 0 3 4 2 5 8]
Parch [0 1 2 5 3 4 6]
Embarked ['S' 'C' 'Q']


In [8]:
# method1:LinearClassifier
linear_dir = "linearModel"
if not os.path.exists(linear_dir):
    os.makedirs(linear_dir)
    
# 1.生成estimator
linear_estimator = tf.estimator.LinearClassifier(feature_columns=feature_columns, model_dir=linear_dir, n_classes = 2)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': 'linearModel', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_session_creation_timeout_secs': 7200, '_service': None, '_cluster_spec': ClusterSpec({}), '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


In [9]:
# 2.训练
linear_estimator.train(input_fn=lambda : make_dataset(X_train, y_train, batch_size=32))

Instructions for updating:
Use Variable.read_value. Variables in 2.X are initialized automatically both in eager and graph (inside tf.defun) contexts.
INFO:tensorflow:Calling model_fn.


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

Instructions for updating:
Please use `layer.add_weight` method instead.
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from linearModel\model.ckpt-6721
Instructions for updating:
Use standard file utilities to get mtimes.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done run

<tensorflow_estimator.python.estimator.canned.linear.LinearClassifierV2 at 0x1ee41e4bc70>

In [10]:
# 3.测试
linear_estimator.evaluate(input_fn=lambda : make_dataset(X_test, y_test, batch_size=32))

INFO:tensorflow:Calling model_fn.


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2020-09-21T18:04:11Z
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from linearModel\model.ckpt-8668
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Inference Time : 0.98560s
INFO:tensorflow:Finished evaluation at 2020-09-21-18:04:12
INFO:tensorflow:Saving dict for global step 8668: accuracy = 0.76779026, accuracy_baseline = 0.5917603, auc = 0.8287656, auc_precision_recall = 0.79127914, average_loss = 0.5006572, global_step = 8668, label/mean = 0.4082397, loss = 0.5005823, precision = 0.7373737, predicti

{'accuracy': 0.76779026,
 'accuracy_baseline': 0.5917603,
 'auc': 0.8287656,
 'auc_precision_recall': 0.79127914,
 'average_loss': 0.5006572,
 'label/mean': 0.4082397,
 'loss': 0.5005823,
 'precision': 0.7373737,
 'prediction/mean': 0.3984509,
 'recall': 0.66972476,
 'global_step': 8668}

In [11]:
# method2:DNNClassifier
dnn_model = "./dnnModel"
if not os.path.exists(dnn_model):
    os.makedirs(dnn_model)
    
dnn_estimator = tf.estimator.DNNClassifier(hidden_units=[128,128],
    feature_columns = feature_columns,
    model_dir=dnn_model,
    n_classes=2,
    optimizer='Adam',
    activation_fn=tf.nn.relu)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': './dnnModel', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_session_creation_timeout_secs': 7200, '_service': None, '_cluster_spec': ClusterSpec({}), '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


In [12]:
dnn_estimator.train(input_fn=lambda : make_dataset(X_train, y_train, batch_size=32))

INFO:tensorflow:Calling model_fn.


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from ./dnnModel\model.ckpt-4450
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Calling checkpoint listeners before saving checkpoint 4450...
INFO:tensorflow:Saving checkpoints for 4450 into ./dnnModel\model.ckpt.
INFO:tensorflow:Calling checkpoint listeners after saving checkpoint 4450...
INFO:tensorflow:loss = 0.21558446, step = 4450
INFO:tensorflow:global_step/sec: 658.5
INFO:tensorflow:loss = 0.2341446, step = 4550 (0.152 sec)
INFO:tensorflow:global

<tensorflow_estimator.python.estimator.canned.dnn.DNNClassifierV2 at 0x1ee68bcd070>

In [13]:
dnn_estimator.evaluate(input_fn=lambda : make_dataset(X_test, y_test, epoches=1, batch_size=32))

INFO:tensorflow:Calling model_fn.


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2020-09-21T18:04:22Z
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from ./dnnModel\model.ckpt-6397
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Inference Time : 0.39932s
INFO:tensorflow:Finished evaluation at 2020-09-21-18:04:22
INFO:tensorflow:Saving dict for global step 6397: accuracy = 0.7827715, accuracy_baseline = 0.5917603, auc = 0.8616015, auc_precision_recall = 0.8194231, average_loss = 0.69451654, global_step = 6397, label/mean = 0.4082397, loss = 0.66541535, precision = 0.8072289, predictio

{'accuracy': 0.7827715,
 'accuracy_baseline': 0.5917603,
 'auc': 0.8616015,
 'auc_precision_recall': 0.8194231,
 'average_loss': 0.69451654,
 'label/mean': 0.4082397,
 'loss': 0.66541535,
 'precision': 0.8072289,
 'prediction/mean': 0.36645186,
 'recall': 0.6146789,
 'global_step': 6397}