In [1]:
import tensorflow as tf
import mnist_inference
import Layer2Model
import os
import pandas as pd
from sklearn.preprocessing import normalize
import numpy as np

In [2]:
BATCH_SIZE = 100 
LEARNING_RATE_BASE = 0.8
LEARNING_RATE_DECAY = 0.99
REGULARIZATION_RATE = 0.0001
TRAINING_STEPS = 200001
MOVING_AVERAGE_DECAY = 0.99 
MODEL_SAVE_PATH = "TianChi_Model/"
MODEL_NAME = "tianchi_model"
two_layers_model_fileName = '2HiddenLayer'
TENSORBOARD_LOG = 'tensor_board'

In [3]:
def startTrain(trainX, trainY, model_path, model_name, model):
    dataSize = len(trainY)
    
    with tf.device('/device:GPU:0'):
    # 定义输入输出placeholder。
        x = tf.placeholder(tf.float32, [None, model.INPUT_NODE], name='x-input')
        y_ = tf.placeholder(tf.float32, [None, model.OUTPUT_NODE], name='y-input')

        regularizer = tf.contrib.layers.l2_regularizer(REGULARIZATION_RATE)
        y = model.inference(x, regularizer)
        global_step = tf.Variable(0, trainable=False)
    
    # 定义损失函数、学习率、滑动平均操作以及训练过程。
        variable_averages = tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY, global_step)
        variables_averages_op = variable_averages.apply(tf.trainable_variables())
#     cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=y, labels=tf.argmax(y_, 1))
#     cross_entropy_mean = tf.reduce_mean(cross_entropy)
        beginLoss = tf.sqrt(tf.reduce_mean(tf.square(tf.subtract(y, y_))))
#     loss = beginLoss + tf.add_n(tf.get_collection('losses'))
        loss = beginLoss
        learning_rate = tf.train.exponential_decay(
            LEARNING_RATE_BASE,
            global_step,
            dataSize / BATCH_SIZE, LEARNING_RATE_DECAY,
            staircase=True)
        train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss, global_step=global_step)
        with tf.control_dependencies([train_step, variables_averages_op]):
            train_op = tf.no_op(name='train')
        
    # 初始化TensorFlow持久化类。
    saver = tf.train.Saver()
    config = tf.ConfigProto(allow_soft_placement = True, log_device_placement=True)
    with tf.Session(config = config) as sess:
        tf.global_variables_initializer().run()
        
        head = 0
        for i in range(TRAINING_STEPS):
            tail = head+BATCH_SIZE
            if tail > dataSize:
                xs = np.concatenate((trainX[head: BATCH_SIZE], trainX[0: tail-BATCH_SIZE]))
                ys = np.concatenate((trainY[head: BATCH_SIZE], trainY[0: tail-BATCH_SIZE]))
                head = tail - BATCH_SIZE
            else:
                xs, ys = trainX[head: head+BATCH_SIZE-1], trainY[head: head+BATCH_SIZE-1]
                head = tail
            
            _, loss_value, step = sess.run([train_op, loss, global_step], feed_dict={x: xs, y_: ys})
            if i % 1000 == 0:
                print("After", step," training step(s), loss on training batch is ", loss_value)
            if i % 10000 == 0:
                saver.save(sess, os.path.join(model_path, model_name), global_step=global_step)
                testLoss = sess.run([loss], feed_dict={x: testX, y_: testY})
                print("After", step," training step(s), loss on test set is ", testLoss)

In [4]:
# handle data, produce train and test input

df = pd.read_csv('final.csv')
# random shift the df
df = df.sample(frac=1).reset_index(drop=True)

normalizeColumns = ['compartment','TR','displacement','price_level','power','level_id',
                    'cylinder_number','engine_torque','car_length','car_height','car_width','total_quality','equipment_quality',
                    'rated_passenger','wheelbase','front_track','rear_track']
leftDf = df.drop(normalizeColumns, axis =1 ).drop(['sale_quantity'], axis = 1)

normalizeDf = df[normalizeColumns]
normalizeDf = (normalizeDf-normalizeDf.min())/(normalizeDf.max()-normalizeDf.min())
inputDf = pd.concat([leftDf, normalizeDf], axis = 1)
inputX = inputDf.values
resultArray = df['sale_quantity'].values
inputY = resultArray.reshape((len(resultArray),1))
trainX = inputX[0:18000]
trainY = inputY[0:18000]
testX = inputX[18000:]
testY = inputY[18000:]

In [None]:
def continueTrain(trainX, trainY, sess, continue_steps):
    head = 0
    dataSize = len(trainY)
    for i in range(continue_steps):
        tail = head+BATCH_SIZE
        if tail > dataSize:
            xs = np.concatenate((trainX[head: BATCH_SIZE], trainX[0: tail-BATCH_SIZE]))
            ys = np.concatenate((trainY[head: BATCH_SIZE], trainY[0: tail-BATCH_SIZE]))
            head = tail - BATCH_SIZE
        else:
            xs, ys = trainX[head: head+BATCH_SIZE-1], trainY[head: head+BATCH_SIZE-1]
            head = tail
            
        _, loss_value, step = sess.run([train_op, loss, global_step], feed_dict={x: xs, y_: ys})
        if i % 1000 == 0:
            print("After", step," training step(s), loss on training batch is ", loss_value)
        if i % 10000 == 0:
            saver.save(sess, os.path.join(MODEL_SAVE_PATH, MODEL_NAME), global_step=global_step)
            testLoss = sess.run([loss], feed_dict={x: testX, y_: testY})
            print("After", step," training step(s), loss on test set is ", restoredSession.run([loss], feed_dict={x: testX, y_: testY}))

In [None]:
startTrain(trainX, trainY, MODEL_SAVE_PATH, MODEL_NAME, mnist_inference)

After 1  training step(s), loss on training batch is  241.81886
After 1  training step(s), loss on test set is  [254.51508]
After 1001  training step(s), loss on training batch is  164.55571
After 2001  training step(s), loss on training batch is  145.78275
After 3001  training step(s), loss on training batch is  137.77367
After 4001  training step(s), loss on training batch is  130.30962
After 5001  training step(s), loss on training batch is  124.78074
After 6001  training step(s), loss on training batch is  117.69403
After 7001  training step(s), loss on training batch is  113.80787
After 8001  training step(s), loss on training batch is  109.756874
After 9001  training step(s), loss on training batch is  108.107475
After 10001  training step(s), loss on training batch is  105.22526
After 10001  training step(s), loss on test set is  [197.53035]
After 11001  training step(s), loss on training batch is  99.80518
After 12001  training step(s), loss on training batch is  101.03128
Afte

After 111001  training step(s), loss on training batch is  74.13375
After 112001  training step(s), loss on training batch is  74.12999
After 113001  training step(s), loss on training batch is  74.12647
After 114001  training step(s), loss on training batch is  74.12302
After 115001  training step(s), loss on training batch is  74.11981
After 116001  training step(s), loss on training batch is  74.11674
After 117001  training step(s), loss on training batch is  74.11384
After 118001  training step(s), loss on training batch is  74.11122
After 119001  training step(s), loss on training batch is  74.10871
After 120001  training step(s), loss on training batch is  74.106255
After 120001  training step(s), loss on test set is  [211.29446]
After 121001  training step(s), loss on training batch is  74.104004
After 122001  training step(s), loss on training batch is  74.1018
After 123001  training step(s), loss on training batch is  74.09965
After 124001  training step(s), loss on training b

In [None]:
# # 初始化TensorFlow持久化类。
# saver = tf.train.Saver()
# config = tf.ConfigProto(allow_soft_placement = True, log_device_placement=True)
# sess = tf.Session(config = config)
# with sess:
#     tf.global_variables_initializer().run()
        
#     head = 0
#     for i in range(TRAINING_STEPS):
#         tail = head+BATCH_SIZE
#         if tail > dataSize:
#             xs = np.concatenate((trainX[head: BATCH_SIZE], trainX[0: tail-BATCH_SIZE]))
#             ys = np.concatenate((trainY[head: BATCH_SIZE], trainY[0: tail-BATCH_SIZE]))
#             head = tail - BATCH_SIZE
#         else:
#             xs, ys = trainX[head: head+BATCH_SIZE-1], trainY[head: head+BATCH_SIZE-1]
#             head = tail
            
#         _, loss_value, step = sess.run([train_op, loss, global_step], feed_dict={x: xs, y_: ys})
#         if i % 1000 == 0:
#             print("After", step," training step(s), loss on training batch is ", loss_value)
#         if i % 5000 == 0:
#             saver.save(sess, os.path.join(MODEL_SAVE_PATH, MODEL_NAME), global_step=global_step)

In [None]:
# config = tf.ConfigProto(allow_soft_placement = True, log_device_placement=True)
# restoredSession = tf.Session(config=config)
# restoredSaver = tf.train.import_meta_graph(os.path.join(MODEL_SAVE_PATH, 'tianchi_model-30001.meta'))
# restoredSaver.restore(restoredSession, os.path.join(MODEL_SAVE_PATH, 'tianchi_model-30001'))
# # restoredSession.run([loss], feed_dict={x: testX, y_: testY})
# continueTrain(trainX, trainY, restoredSession, 100000)