Grab relevant information for bus stops for a specific route, join the data together.

In [1]:
import requests
import pandas as pd
import glob
import json
import time
import os
import platform
from analysis_functions import *

with open('config.json', 'r') as f:
    config = json.load(f)

SWIFTLY_API_KEY = config['DEFAULT']['SWIFTLY_API_KEY']
MSSQL_USERNAME = config['DEFAULT']['MSSQL_USERNAME']
MSSQL_PASSWORD = config['DEFAULT']['MSSQL_PASSWORD']

if platform.system() == 'Darwin':
    import pymssql
    connection = pymssql.connect(server='ELTDBPRD\ELTDBPRD', user=MSSQL_USERNAME, password=MSSQL_PASSWORD, database='ACS_13')
elif platform.system() == 'Windows':
    import pyodbc
    connection_string = 'DRIVER={SQL Server};SERVER=ELTDBPRD\ELTDBPRD;DATABASE=ACS_13;UID=%s;PWD=%s' % (MSSQL_USERNAME, MSSQL_PASSWORD)
    connection = pyodbc.connect(connection_string)


# DEBUG = True

days_to_consider = [2,3,4,5,9,10,11,12,16,17,18,19,23,24,25,26,30]

In [2]:
line_number = 22

In [None]:
rid_by_stop_df = pull_ridership_by_stop(line_number)
rid_by_stop_df.head()

Unnamed: 0,STOP_ID,DIRECTION_NAME,TIME_PERIOD,ALIGHT_ALL,AVG_SERVICED,BOARD_ALL,LOAD_ALL,SORT_ORDER,TIME_PERIOD_SORT
186,328,EAST,AM Early,0.49,0.99,43.87,43.99,10,1
192,329,EAST,AM Early,0.0,0.19,0.64,44.62,20,1
198,330,EAST,AM Early,0.0,0.05,0.16,44.79,30,1
204,331,EAST,AM Early,0.0,0.01,0.04,44.83,40,1
210,332,EAST,AM Early,0.0,0.0,0.0,44.83,50,1


In [None]:
df_dwell_runtime, df_stop_path_length, df_min_travel_time = dwell_runtime(line_number, days_to_consider)

In [None]:
rid_dwell = pd.merge(pd.merge(pd.merge(rid_by_stop_df,df_dwell_runtime,how='outer'),df_stop_path_length, how='outer'),df_min_travel_time, how='outer')

In [None]:
# Midday - 9 to 2:30
# PM Peak - 2:30 to 6:30
# PM Late - 6:30 to 9:59
# PM Nite - 10pm to 12pm
# PM Nite - 12am to 3am

In [None]:
frames = []

df = pull_early_late_by_stop('22',SWIFTLY_API_KEY, dateRange = '10012017-10302017', timeRange = '0300-0459')
df['TIME_PERIOD'] = 'AM Early'
frames.append(df)
time.sleep(10)
df = pull_early_late_by_stop('22',SWIFTLY_API_KEY, dateRange = '10012017-10302017', timeRange = '0500-0829')
df['TIME_PERIOD'] = 'AM Peak'
frames.append(df)
time.sleep(10)
df = pull_early_late_by_stop('22',SWIFTLY_API_KEY, dateRange = '10012017-10302017', timeRange = '0900-1429')
df['TIME_PERIOD'] = 'Midday'
frames.append(df)
time.sleep(10)
df = pull_early_late_by_stop('22',SWIFTLY_API_KEY, dateRange = '10012017-10302017', timeRange = '1430-1829')
df['TIME_PERIOD'] = 'PM Peak'
frames.append(df)
time.sleep(10)
# df = pull_early_late_by_stop('22',SWIFTLY_API_KEY, dateRange = '10012017-10302017', timeRange = '1830-2159')
df = pull_early_late_by_stop('22',SWIFTLY_API_KEY, dateRange = '10012017-10302017', timeRange = '1830-2200')
df['TIME_PERIOD'] = 'PM Late'
frames.append(df)
time.sleep(10)
df = pull_early_late_by_stop('22',SWIFTLY_API_KEY, dateRange = '10012017-10302017', timeRange = '2200-2359')
df['TIME_PERIOD'] = 'PM Nite'
frames.append(df)
time.sleep(10)
df = pull_early_late_by_stop('22',SWIFTLY_API_KEY, dateRange = '10012017-10302017', timeRange = '0000-0259')
df['TIME_PERIOD'] = 'PM Nite'
frames.append(df)

In [None]:
times = pd.concat(frames)
times['TIMEPOINT'] = True
del times['direction_id']
bus_df = pd.merge(rid_dwell,times, how='outer', on=['STOP_ID','DIRECTION_NAME','TIME_PERIOD'])
# bus_df.to_csv("bus_analysis.csv",index=False)
# bus_df.head()
times.head()

In [None]:
stops_visited_counts, trips_sampled_count = stop_frequency_percent(connection, line_number, days_to_consider, date_range= "'2017-10-01' and '2017-11-1'")
del stops_visited_counts['current_route_id']
del trips_sampled_count['current_route_id']

In [None]:
bus_df_frequency = pd.merge(pd.merge(rid_dwell, stops_visited_counts, how="outer"),trips_sampled_count, how="outer")
# stop_frequency['percent_stopped'] = (stop_frequency['number_of_times_stopped']/stop_frequency['total_trips_sampled']).round(2)
# bus_df_frequency['percent_stopped'] = (bus_df_frequency['number_of_times_stopped'].dividebus_df_frequency['total_trips_sampled']).round(2)
bus_df_frequency['percent_stopped'] = (bus_df_frequency['number_of_times_stopped'].divide(bus_df_frequency['total_trips_sampled'],fill_value=0)).round(2)

In [None]:
bus_df_frequency['travel_speed_meters_second'] = (bus_df_frequency['stop_path_length_meters']/bus_df_frequency['travel_time_secs_mean']).round(2)
bus_df_frequency['travel_speed_miles_per_hour'] = ((bus_df_frequency['stop_path_length_meters']/bus_df_frequency['travel_time_secs_mean'])*2.23694).round(1)

In [None]:
bus_df_frequency.to_csv("bus_stop_data_analysis_dwell.csv",index=False)