In [165]:
#This script reads in cleaned and merged data and performs regressions to predict number of bikes
#departing a bike station given the following parameters:
#Time bucket, station latitude and longitude, hourly temperature, precipitation, and whether the day is a holiday/weekend or not. 
#Training takes about 12 hours on my Surface Pro 4.

import pandas as pd
import numpy as np
import tensorflow as tf

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import itertools

tf.logging.set_verbosity(tf.logging.INFO)

#generate tensorflow input function for gradient descent. 
def get_input_fn(data_set, num_epochs, shuffle):
    return tf.estimator.inputs.pandas_input_fn(
      x = pd.DataFrame({k: data_set[k].values for k in FEATURES}),
      y = pd.Series(data_set[LABEL].values),
      num_epochs=num_epochs,
      shuffle=shuffle,
      batch_size=data_set.shape[0])
     
if __name__ == '__main__':
    
    COLUMNS = ['Start_Time','Start_Station_Latitude','Start_Station_Longitude','Count','Precipitation','Temperature']
    FEATURES = ['Start_Time','Start_Station_Latitude','Start_Station_Longitude','Precipitation','Temperature']
    LABEL = 'Count'   
                                      
    #read data                             
    data = pd.read_csv('demand_prediction_data.csv')
   
    #normalize feature and label vectors
    for k in FEATURES:
        data[k] = (data[k] - data[k].mean()) / (data[k].max() - data[k].min())
    data['Count'] = data['Count']/10    
    
    #randomly split data set into 70% training set, 20% validation set, and 10% test set.
    length = len(data['Count'])
    print(length)
    selection = np.random.rand(length)
    training = (selection < 0.7)
    training_set = data[training]
    valid_test = data[~training]
    
    length = len(valid_test['Count'])
    selection = np.random.rand(length)
    valid = (selection < 0.66)
    validation_set = valid_test[valid]
    test_set = valid_test[~valid]
    
    #define feature columns 
    feature_cols = [tf.feature_column.numeric_column(k) for k in FEATURES]
    
    #Evaluate on validation set every 10 epochs, and train for 10000 epochs. 
    for i in range(1000):
        
        #Implement regression on Tensorflow.
        #regressor = tf.estimator.LinearRegressor(feature_columns=feature_cols,
        regressor = tf.estimator.DNNRegressor(feature_columns=feature_cols, hidden_units=[64,32,64],                     
                          #optimizer=tf.train.FtrlOptimizer(
                          #learning_rate=0.1,
                          #l1_regularization_strength=10,
                          #l2_regularization_strength=10))
                          optimizer=tf.train.AdamOptimizer(
                          learning_rate=0.001,
                          beta1=0.9,
                          beta2=0.999,  
                          #epsilon=1e-5),model_dir="/tmp/demandLinear1")
                          epsilon=1e-8),model_dir="/tmp/demandDNN23")
        
        regressor.train(input_fn=get_input_fn(training_set,num_epochs=10, shuffle=True), steps=10)
        
        ev = regressor.evaluate(
        input_fn=get_input_fn(validation_set, num_epochs=1, shuffle=False))
        
    #To generate examples of predictions, randomly select 100 rows from the test set. 
    snip = test_set.sample(100)
    pred = regressor.predict(
    input_fn=get_input_fn(snip, num_epochs=1, shuffle=False))
    predictions = list(p["predictions"] for p in itertools.islice(pred, 100))
    pre = [float(element) for element in predictions]
    comparison = pd.DataFrame()
    comparison['Actual_Count']=snip['Count'] 
    comparison['Predicted_Count']=pre
    print(comparison.sample(frac=1))


1376328
INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': '/tmp/demandDNN23', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': None, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x0000022303D69630>, '_task_type': 'worker', '_task_id': 0, '_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Restoring parameters from /tmp/demandDNN23\model.ckpt-7861
INFO:tensorflow:Saving checkpoints for 7862 into /tmp/demandDNN23\model.ckpt.
INFO:tensorflow:loss = 157924.0, step = 7862
INFO:tensorflow:Saving checkpoints for 7871 into /tmp/demandDNN23\model.ckpt.
INFO:tensorflow:Loss for final step: 157466.0.
INFO:tensorflow:Starting evaluation at 201