In [1]:
import sys
sys.path.append('../Train')
from parse import parse
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt  
import tensorflow as tf
import mnist_inference
import re
import os

### pretreatment predict_set

In [2]:
trainset_df_path = '../CleanData/withMax_trainData.csv'
predict_df_path = '../RawData/yancheng_testA_20171225.csv'

In [3]:
trainset_df = pd.read_csv(trainset_df_path, delimiter=',')
predict_df = pd.read_csv(predict_df_path, delimiter=',')

In [4]:
facet_filter_pattern = re.compile('(sale_quantity)|(year)|(month)')
year_month_pattern = re.compile('(year)|(month)')

facet_columns = [c for c in trainset_df.columns.values if not facet_filter_pattern.match(c)]
class_id_columns = [c for c in trainset_df.columns.values if c.find('class_id') > -1]
year_month_columns = [c for c in trainset_df.columns.values if year_month_pattern.match(c)]

In [5]:
unique_trainset_df = trainset_df.drop_duplicates(facet_columns)

In [6]:
predict_df['year'] = predict_df['predict_date'].astype('str').apply(lambda x:int(x[:4]))
predict_df['month'] = predict_df['predict_date'].astype('str').apply(lambda x:int(x[4:]))
predict_df = pd.get_dummies(predict_df.drop(['predict_date'], axis=1), columns=['class_id', 'year', 'month'])
predict_df = pd.merge(predict_df, unique_trainset_df[facet_columns], on=class_id_columns)
for date_c in set(year_month_columns)-set([c for c in predict_df.columns.values if year_month_pattern.match(c)]):
    predict_df.loc[:, date_c] = pd.Series(np.zeros(len(predict_df)), index=predict_df.index)
predict_df = predict_df.reindex(columns=trainset_df.columns.values)

In [7]:
predict_df.head()

Unnamed: 0,sale_quantity,compartment,level_id,TR,displacement,if_charging,price_level,if_MPV_id,if_luxurious_id,power,...,newenergy_type_id_4,emission_standards_id_1,emission_standards_id_2,emission_standards_id_3,emission_standards_id_5,gearbox_type_AMT,gearbox_type_AT,gearbox_type_CVT,gearbox_type_DCT,gearbox_type_MT
0,,2,3.0,6.0,1.6,1,12.5,1,0,91.6,...,0,1,0,0,0,0,0,0,0,1
1,,2,3.0,6.0,1.6,1,12.5,1,0,91.6,...,0,1,0,0,0,0,1,0,0,0
2,,2,3.0,6.0,1.6,1,12.5,1,0,91.6,...,0,1,0,0,0,0,0,0,1,0
3,,2,3.0,6.0,1.6,1,12.5,1,0,91.6,...,0,1,0,0,0,0,0,1,0,0
4,,2,3.0,6.0,1.6,1,12.5,1,0,91.6,...,0,0,0,1,0,0,0,0,1,0


### predict

In [8]:
MODEL_SAVE_PATH = "../model/2018-01-20/"
MODEL_NAME = "1HiddenLayer"

In [9]:
def predict_on_model_1(df):
    
    def normalize(df):
        normalizeColumns = ['compartment','TR','displacement','price_level','power','level_id',
                    'cylinder_number','engine_torque','car_length','car_height','car_width','total_quality','equipment_quality',
                    'rated_passenger','wheelbase','front_track','rear_track']
        leftDf = df.drop(normalizeColumns, axis =1 )
        normalizeDf = df[normalizeColumns]
        normalizeDf = (normalizeDf-normalizeDf.min())/(normalizeDf.max()-normalizeDf.min())
        inputDf_1 = pd.concat([leftDf, normalizeDf], axis = 1)
        return inputDf_1.values
    
    config = tf.ConfigProto(allow_soft_placement = True, log_device_placement=True)
    restoredSession = tf.Session(config=config)
    restoredSaver = tf.train.import_meta_graph(os.path.join(MODEL_SAVE_PATH, MODEL_NAME+'-30001.meta'))
    restoredSaver.restore(restoredSession, os.path.join(MODEL_SAVE_PATH, MODEL_NAME + '-30001'))
    w_1 = restoredSession.run('layer1/weights:0')
    B_1 = restoredSession.run('layer1/biases:0')
    w_2 = restoredSession.run('layer2/weights:0')
    b_2 = restoredSession.run('layer2/biases:0')

    x = tf.placeholder(tf.float32, [None, mnist_inference.INPUT_NODE], name='x-input')
    input_tensor = restoredSession.run(x, feed_dict={x: normalize(df)})
    layer1 = tf.nn.relu(tf.matmul(input_tensor, w_1) + B_1)
    layer2 = tf.nn.relu(tf.matmul(layer1, w_2) + b_2)

    return restoredSession.run(layer2)

In [10]:
predict_quantity = predict_on_model_1(predict_df.drop(['sale_quantity'], axis=1))
predict_df['predict_quantity'] = predict_quantity
predict_df['class_id'] = predict_df.apply(lambda row: (row[class_id_columns]==1).idxmax(axis=1)[9:], axis=1).astype('int64')
predict_result = predict_df.groupby(['class_id'])['predict_quantity'].mean()

In [11]:
predicted_df = pd.read_csv(predict_df_path, delimiter=',')
predicted_df = predicted_df.set_index('class_id')
predicted_df.update(predict_result)
predicted_df

Unnamed: 0_level_0,predict_date,predict_quantity
class_id,Unnamed: 1_level_1,Unnamed: 2_level_1
103507,201711,273.636414
124140,201711,208.460739
125403,201711,187.138596
136916,201711,187.310471
169673,201711,106.094765
175962,201711,190.464020
178529,201711,415.284821
186250,201711,289.621002
194201,201711,330.379883
194450,201711,99.970688


In [29]:
predicted_df['predict_quantity'] = predicted_df['predict_quantity'].transform(np.round)