In [153]:
# 准备工作

import warnings

import numpy as np
import pandas as pd
from sklearn.pipeline import Pipeline
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.model_selection import StratifiedShuffleSplit
from sklearn.metrics import accuracy_score

import matplotlib.pyplot as plt

%matplotlib inline

In [154]:
# 加载数据
train_tf = pd.read_csv("./datasets/train.csv")
test_tf = pd.read_csv("./datasets/test.csv")
train_data = train_tf.drop(["label"], axis=1)
train_label = train_tf["label"]

In [155]:
train_label_one_hot = pd.get_dummies(train_label)

In [156]:
X_train, X_test, y_train,  y_test = train_test_split(train_data, 
                                                     train_label_one_hot, test_size=0.2, 
                                                     random_state=42, stratify=train_label)


In [157]:
m, n = X_train.shape
X_train.shape

(33600, 784)

In [158]:
# 神经网络参数
learning_rate = 0.001
num_steps = 500
batch_size = 100
display_step = 50

num_input = 784
num_classes = 10
dropout = 0.75

# tf 计算图输入
X = tf.placeholder(tf.float32, [None, num_input])
Y = tf.placeholder(tf.float32, [None, num_classes])
keep_prob = tf.placeholder(tf.float32)

In [159]:
# conv2d的封装层
def conv2d(x, W, b, strides=1):
    # strides 步长为1
    x = tf.nn.conv2d(x, W, strides=[1, strides, strides, 1], padding="SAME")
    x = tf.nn.bias_add(x, b)
    return tf.nn.relu(x)

In [160]:
# 池化层的封装
def maxpool2d(x, k=2):  
    # k 池化大小
    result = tf.nn.max_pool(x, ksize=[1, k, k, 1], strides=[1, k, k, 1], 
                           padding="SAME")
    return result
    

In [161]:
# cnn模型结构
def conv_net(x, weights, biases, dropout):
    # input: [batch_size, height, weight, channel]
    x = tf.reshape(x, shape=[-1, 28, 28, 1])
    
    # 卷积层
    conv1 = conv2d(x, weights['wc1'], biases['bc1'])
    conv1 = maxpool2d(conv1, 2)
    
#     conv2 = conv2d(conv1, weights['wc2'], biases['bc2'])
#     conv2 = maxpool2d(conv2, 2)
    
    # 全连接层
    fc1 = tf.reshape(conv1, [-1, weights['wd1'].get_shape().as_list()[0]])
    fc1 = tf.add(tf.matmul(fc1, weights['wd1']), biases['bd1'])
    fc1 = tf.nn.relu(fc1)
    
    # dropout
    fc1 = tf.nn.dropout(fc1, dropout)

    out = tf.add(tf.matmul(fc1, weights['out']), biases['out'])
    return out

In [162]:
# 权重和偏置
weights = {
     # 5*5 conc, 1 input, 32 outputs
    'wc1':tf.Variable(tf.random_normal([5, 5, 1, 10])),
#     'wc2':tf.Variable(tf.random_normal([5, 5, 1, 1])),
    'wd1':tf.Variable(tf.random_normal([14 * 14 * 10, 100])), 
    'out':tf.Variable(tf.random_normal([100, num_classes]))
}
biases = {
    'bc1':tf.Variable(tf.random_normal([10])),
#     'bc2':tf.Variable(tf.random_normal([1])),
    'bd1':tf.Variable(tf.random_normal([100])),
    'out':tf.Variable(tf.random_normal([num_classes]))
}

In [163]:
# init = tf.global_variables_initializer()
# with tf.Session() as sess:
#     sess.run(init)
#     a = weights['wd1'].get_shape()
#     print(type(a))

In [164]:
# 输出结果
logits = conv_net(X, weights, biases, keep_prob)
prediction = tf.nn.softmax(logits)

In [165]:
# 定义损失和优化器
loss_op = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=Y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
train_op = optimizer.minimize(loss_op)

In [166]:
# 评估模型
correct_pred = tf.equal(tf.argmax(prediction, 1), tf.argmax(Y, 1)) # true or false
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

In [167]:
# 变量初始化
init = tf.global_variables_initializer()
saver = tf.train.Saver()

In [168]:
# 开始训练
with tf.Session() as sess:
    sess.run(init)
    for step in range(num_steps):
        index = np.random.randint(m - batch_size)
        batch_x = X_train[index: index + batch_size].values
        batch_y = y_train[index: index + batch_size].values
        sess.run(train_op, feed_dict={X:batch_x, Y:batch_y, keep_prob: dropout})
        if step % display_step == 0:
            # 计算batch 的损失和精确度
            loss, acc = sess.run([loss_op, accuracy], feed_dict={X: batch_x, Y:batch_y, keep_prob:1.0})
            print("Step " + str(step) + ", Minibatch Loss= " + "{:.4f}".format(loss) + 
                  ", Training Accuracy= " + "{:.3f}".format(acc) )
    print("Optimization Finished!")
    
#     print("Testing Accuracy:", sess.run(accuracy, feed_dict={X:X_test, Y:y_test, keep_prob:1.0}))
#     saver.save(sess, "./model/06_final.ckpt")

Step 0, Minibatch Loss= 226833.7188, Training Accuracy= 0.140
Step 50, Minibatch Loss= 45328.6914, Training Accuracy= 0.330
Step 100, Minibatch Loss= 23314.9805, Training Accuracy= 0.570
Step 150, Minibatch Loss= 14519.3154, Training Accuracy= 0.650
Step 200, Minibatch Loss= 13341.2871, Training Accuracy= 0.590
Step 250, Minibatch Loss= 10508.9121, Training Accuracy= 0.720
Step 300, Minibatch Loss= 8423.2822, Training Accuracy= 0.740
Step 350, Minibatch Loss= 3871.4299, Training Accuracy= 0.800
Step 400, Minibatch Loss= 6507.9424, Training Accuracy= 0.740
Step 450, Minibatch Loss= 2920.6125, Training Accuracy= 0.770
Optimization Finished!


In [142]:
# with tf.Session() as sess:
#     saver.restore(sess, "./model/06_final.ckpt")
#     Z = sess.run(logits, feed_dict={X:test_tf.values, keep_prob:1.0})
#     p_pred = np.argmax(Z, 1)

## 高级API

In [176]:
X_train, X_test, y_train,  y_test = train_test_split(train_data, 
                                                     train_label, test_size=0.2, 
                                                     random_state=42, stratify=train_label)

array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]])

In [224]:
def conv_net2(X,n_classes, dropout, reuse, is_training):
    with tf.variable_scope("ConvNet", reuse=reuse):
        X = tf.cast(X, tf.float32)
        x = tf.reshape(X, shape=[-1, 28, 28, 1])
        conv1 = tf.layers.conv2d(x, 1, 5, activation=tf.nn.relu)
        conv1 = tf.layers.max_pooling2d(conv1, 2, 2)
        
        fc1 = tf.contrib.layers.flatten(conv1)
        fc1 = tf.layers.dense(fc1, 100)
        fc1 = tf.layers.dropout(fc1, rate=dropout, training=is_training)
        
        out = tf.layers.dense(fc1, 10)
    
    return out

In [225]:
def model_fn(features, labels, mode):
    logits_train = conv_net2(features, num_classes, dropout, reuse=False, is_training=True)
    logits_test = conv_net2(features, num_classes, dropout, reuse=True, is_training=False)
    
    pred_classes = tf.argmax(logits_test, axis=1)
#     pred_proba = tf.nn.softmax(logits_test)
    
    if mode == tf.estimator.ModeKeys.PREDICT:
        return tf.estimator.EstimatorSpec(mode, predictions=pred_classes)
    
    loss_op = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits_train, 
                                                                           labels=tf.cast(labels, dtype=tf.int32)))
    optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
    train_op = optimizer.minimize(loss_op, global_step=tf.train.get_global_step())
    
    acc_op = tf.metrics.accuracy(labels=labels, predictions=pred_classes)
    
    estim_specs = tf.estimator.EstimatorSpec(mode=mode,predictions=pred_classes, 
                                        loss=loss_op, train_op=train_op, 
                                        eval_metric_ops={"accuracy":acc_op})
    return estim_specs

In [226]:
model = tf.estimator.Estimator(model_fn)
input_fn = tf.estimator.inputs.numpy_input_fn(x=X_train.values,
                                              y=y_train.values, 
                                              batch_size=batch_size, 
                                             num_epochs=None, shuffle=True)
model.train(input_fn, steps=num_steps)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': '/var/folders/j0/mlln6qvj2kl42rr7mg80lsxc0000gn/T/tmpq4y4grbl', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': None, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x1a19f84a90>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 1 into /var/folders/j0

<tensorflow.python.estimator.estimator.Estimator at 0x1a19f84978>

In [227]:
# 评估模型
input_fn = tf.estimator.inputs.numpy_input_fn(x = X_test.values, 
                                             y = y_test.values, 
                                             batch_size=batch_size, 
                                             shuffle=True)
model.evaluate(input_fn)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2018-05-30-08:09:34
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from /var/folders/j0/mlln6qvj2kl42rr7mg80lsxc0000gn/T/tmpq4y4grbl/model.ckpt-500
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Finished evaluation at 2018-05-30-08:09:34
INFO:tensorflow:Saving dict for global step 500: accuracy = 0.19154762, global_step = 500, loss = 3.0313046


{'accuracy': 0.19154762, 'global_step': 500, 'loss': 3.0313046}

In [228]:
# 进行预测
input_fn = tf.estimator.inputs.numpy_input_fn(x = X_test.values,  
                                             shuffle=True)
y_pred = np.array( list( (model.predict(input_fn)) ) )

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from /var/folders/j0/mlln6qvj2kl42rr7mg80lsxc0000gn/T/tmpq4y4grbl/model.ckpt-500
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
