[View in Colaboratory](https://colab.research.google.com/github/wichersq/alarm-predict-ML/blob/master/timePredML.ipynb)

In [0]:
import tensorflow as tf
import pandas as pd
from google.colab import files

In [2]:
FILE_NAME_WITH_TOTAL_DURATION = 'https://drive.google.com/uc?export=download&id=1bj8g_OIbA6hZAzY9JdtsChRlNBQZ3KLS'
data_df = pd.read_csv(FILE_NAME_WITH_TOTAL_DURATION)
                 
print(data_df.head(10))

#Checks null columns
null_columns=data_df.columns[data_df.isnull().any()]

                         Business Name  \
0                   Maison D'Alexandre   
1                     Johnson Law Firm   
2                                 DROM   
3  Scrub Pro Uniforms & Gallo Clothing   
4                           DC Trekker   
5                Kohl's Fredericksburg   
6                       Investors Bank   
7                       Barnes & Noble   
8       Market Cafe & Brick Oven Pizza   
9                         Jos. A. Bank   

                                 Destination Address  \
0                   33 Lewis St, Greenwich, CT 06830   
1            302 W Broad St, Elizabethtown, NC 28337   
2                    85 Avenue A, New York, NY 10009   
3              78 Mountain Rd, Glen Burnie, MD 21060   
4           1422 Harvard St NW, Washington, DC 20009   
5  1571 Carl D. Silver Parkway, Fredericksburg, V...   
6  675 Hempstead Turnpike, Franklin Square, NY 11010   
7                601 E Pratt St, Baltimore, MD 21202   
8              425 Lexington Ave,

In [0]:
def get_open_close_time(df_input_hour, day_in_week):
  open_col = []
  close_col = []
  for n in range(len(day_in_week)):
    day = day_in_week[n]
    col = df_input_hour.iloc[n]
    close_col.append(col[((day*2 )-2)])
    open_col.append(col[((day*2 )-1)])
  return close_col, open_col

In [4]:
input_df = data_df.copy()

input_df['Destination'] = data_df['Business Name'] + ', ' + data_df["Destination Address"]

store_hour_df =  data_df[['Day1_Close', 'Day1_Open','Day2_Close','Day2_Open','Day3_Close',
                          'Day3_Open','Day4_Close','Day4_Open','Day5_Close', 'Day5_Open',     
                          'Day6_Close', 'Day6_Open','Day0_Close', 'Day0_Open']]

# Sorting NAN value as 0 and available value as 1.
input_df['Close_Time'],input_df['Open_Time']  = get_open_close_time(store_hour_df, data_df['Day of the Week'])
input_df['Does_Reviews_Exist?'] = data_df['Reviews'].notnull().astype(int)
input_df['Does_Rating_Exist?'] = data_df['Rating'].notnull().astype(int)
input_df['Does_Price_Lv_Exist?'] = input_df['Price Level'].notnull().astype(int)
input_df['Does_Walking_Exist?'] = input_df['Walking_Duration'].notnull().astype(int)
input_df['Does_Transit_Exist?'] = input_df['Transit_Duration'].notnull().astype(int)
input_df['Does_Close/Open_Time_Exist?'] = input_df['Open_Time'].notnull().astype(int)

# fill nan with value 0
input_df = input_df.fillna(-1)

def linear_scale(series):
  min_val = series.min()
  max_val = series.max()
  scale = (max_val - min_val)
  return series.apply(lambda x:((x / scale))) 

input_df['Reviews'] = linear_scale(input_df['Reviews'])

print(input_df.head(10))

train_df=input_df.sample(frac=0.8, random_state=0)
test_df=input_df.drop(train_df.index)

                         Business Name  \
0                   Maison D'Alexandre   
1                     Johnson Law Firm   
2                                 DROM   
3  Scrub Pro Uniforms & Gallo Clothing   
4                           DC Trekker   
5                Kohl's Fredericksburg   
6                       Investors Bank   
7                       Barnes & Noble   
8       Market Cafe & Brick Oven Pizza   
9                         Jos. A. Bank   

                                 Destination Address  \
0                   33 Lewis St, Greenwich, CT 06830   
1            302 W Broad St, Elizabethtown, NC 28337   
2                    85 Avenue A, New York, NY 10009   
3              78 Mountain Rd, Glen Burnie, MD 21060   
4           1422 Harvard St NW, Washington, DC 20009   
5  1571 Carl D. Silver Parkway, Fredericksburg, V...   
6  675 Hempstead Turnpike, Franklin Square, NY 11010   
7                601 E Pratt St, Baltimore, MD 21202   
8              425 Lexington Ave,

In [0]:
def dumb_model(x):
  """Gets about 3500 error"""
  pred = x[:,0]
  add = tf.Variable(0.0)
  pred += add
  return pred

In [0]:
def NN_model(x):
  pred = tf.layers.dense(inputs = x, units = 100, activation =  tf.nn.relu)
  pred = tf.layers.dense(inputs = pred, units = 50, activation =  tf.nn.relu)
  pred = tf.layers.dense(inputs = pred, units = 10, activation =  tf.nn.relu) 
  pred = tf.layers.dense(inputs = pred, units = 5, activation =  tf.nn.relu)
  pred = tf.layers.dense(inputs =pred, units = 1)
  return pred

In [0]:
tf.reset_default_graph()

list_samp_x = ['Driving_Duration', 'Reviews', 'Does_Reviews_Exist?','Price Level','Does_Price_Lv_Exist?'] #, 'Rating', 'Does_Rating_Exist?']
x = tf.placeholder(shape=(None, len(list_samp_x)), dtype=tf.float32)
y_ = tf.placeholder(shape=(None, 1), dtype=tf.float32)
pred = NN_model(x)

loss = tf.square(y_ - pred)
error = tf.abs(y_ - pred)
loss = tf.reduce_mean(loss)
error = tf.reduce_mean(error)

opt = tf.train.AdamOptimizer(.0003).minimize(loss)

sess =tf.Session()
sess.run(tf.global_variables_initializer())
loss_vals = []

In [0]:
for i in range(60000):
  batch = train_df.sample(100)
  samp_x = batch.as_matrix(list_samp_x)
  samp_y = batch.as_matrix(['Total_Driving'])
  _, loss_val, error_val = sess.run([opt, loss, error], feed_dict={x:samp_x, y_:samp_y})
  if i % 2000 == 0:
    print("loss", loss_val, "\n error", error_val)
    
    batch = test_df
    samp_x = batch.as_matrix(list_samp_x)
    samp_y = batch.as_matrix(['Total_Driving'])
    loss_val, error_val = sess.run([loss, error], feed_dict={x:samp_x, y_:samp_y})
    print("testing_loss", loss_val, "\n testing_error", error_val)
    
 #optimizer .0003
 # 30000 iteration
 # error 253.36313

 #optimizer .0005
 # 30000 iteration
 # error 228.12729
  
 #optimizer .0003
 # 60000 iteration
 # error 171.95436

# without price level
#error 187.73192


loss 93508280.0 
 error 8309.803
testing_loss 71285290.0 
 testing_error 7321.413
loss 2407835.8 
 error 1308.0309
testing_loss 2593065.0 
 testing_error 1378.1989
loss 1956840.5 
 error 1180.5688
testing_loss 2565315.0 
 testing_error 1364.2834
loss 2096750.2 
 error 1215.9193
testing_loss 2530575.5 
 testing_error 1363.4529
loss 2222925.8 
 error 1226.636
testing_loss 2487504.0 
 testing_error 1354.5975
loss 2232885.8 
 error 1327.0277
testing_loss 2376100.5 
 testing_error 1312.4169
loss 1431017.1 
 error 1003.68024
testing_loss 1907405.8 
 testing_error 1173.8478
loss 631491.9 
 error 631.1598
testing_loss 680082.2 
 testing_error 671.02545
loss 297794.75 
 error 467.0543
testing_loss 320418.47 
 testing_error 468.15598
loss 149753.12 
 error 251.41924
testing_loss 216173.89 
 testing_error 325.41824
loss 171102.84 
 error 275.5192
testing_loss 188293.52 
 testing_error 269.03787
loss 152336.92 
 error 258.4233
testing_loss 178344.53 
 testing_error 286.3161
loss 177552.7 
 error 3