## Initial Processing
### Grab relevant information for bus stops for a specific route, join the data together.

In [None]:
import requests
import pandas as pd
import glob
import json
import time
import os
import platform
# from analysis_functions import *
from analysis_functions import read_in_dwell_runtime, timepoint_finder 
from analysis_functions import pull_ridership_by_stop, dwell_runtime, stop_frequency_percent


with open('config.json', 'r') as f:
    config = json.load(f)

SWIFTLY_API_KEY = config['DEFAULT']['SWIFTLY_API_KEY']
MSSQL_USERNAME = config['DEFAULT']['MSSQL_USERNAME']
MSSQL_PASSWORD = config['DEFAULT']['MSSQL_PASSWORD']

if platform.system() == 'Darwin':
    import pymssql
    connection = pymssql.connect(server='ELTDBPRD\ELTDBPRD', 
        user=MSSQL_USERNAME, password=MSSQL_PASSWORD, database='ACS_13')
elif platform.system() == 'Windows':
    import pyodbc
    connection_string = 'DRIVER={SQL Server};SERVER=ELTDBPRD\ELTDBPRD;DATABASE=ACS_13;UID=%s;PWD=%s' % (MSSQL_USERNAME, MSSQL_PASSWORD)
    connection = pyodbc.connect(connection_string)


# DEBUG = True



## Editable Parameters

In [None]:
line_numbers = [
    22, 181, 68, 23, 66, 25, 10, 200, 55, 304, 19,
    64, 72, 70, 180, 62, 71, 57, 121, 168, 26, 60, 
    102, 27, 35, 48, 65, 32, 328, 103, 104, 522, 61, 
    122, 77, 73, 46, 101, 54, 88, 89, 40, 81, 52, 53, 
    58, 31, 63, 323, 82, 47, 120, 182, 201, 330, 140, 
    49, 37, 16, 13, 42, 18, 14, 17, 45, 39, 321, 185, 
    34, 900, 901, 902
    ]

# Skip VTA special lines 831, 827, 828, 826, 825, 823, 95, 12, 231, 235


days_to_consider = [2,3,4,5,9,10,11,12,16,17,18,19,23,24,25,26,30]
month_to_consider = 10
year_to_consider = 2018
date_range_to_consider = "'2017-10-01' and '2017-11-1'"

transitfeeds_url_relevant_gtfs = 'http://transitfeeds.com/p/vta/45/20170929/download'

In [None]:
swiftly_source_data_df = read_in_dwell_runtime(month=month_to_consider, year=year_to_consider)
timepoints = timepoint_finder(transitfeeds_url = transitfeeds_url_relevant_gtfs)

In [None]:
for line_number in line_numbers:
    print(line_number)
    rid_by_stop_df = pull_ridership_by_stop(line_number)
    rid_by_stop_df.head()

    df_dwell_runtime, df_stop_path_length, df_min_travel_time = dwell_runtime(swiftly_source_data_df, line_number, days_to_consider)

    rid_dwell = pd.merge(pd.merge(pd.merge(rid_by_stop_df,df_dwell_runtime,how='outer'),df_stop_path_length, how='outer'),df_min_travel_time, how='outer')

    stops_visited_counts, trips_sampled_count = stop_frequency_percent(connection, line_number, days_to_consider, date_range= date_range_to_consider)
    del stops_visited_counts['current_route_id']
    del trips_sampled_count['current_route_id']

    bus_df_frequency = pd.merge(pd.merge(rid_dwell, stops_visited_counts, how="outer"),trips_sampled_count, how="outer")
    # stop_frequency['percent_stopped'] = (stop_frequency['number_of_times_stopped']/stop_frequency['total_trips_sampled']).round(2)
    # bus_df_frequency['percent_stopped'] = (bus_df_frequency['number_of_times_stopped'].dividebus_df_frequency['total_trips_sampled']).round(2)
    bus_df_frequency['percent_stopped'] = (bus_df_frequency['number_of_times_stopped'].divide(bus_df_frequency['total_trips_sampled'],fill_value=0)).round(2)

    bus_df_frequency['travel_speed_meters_second'] = (bus_df_frequency['stop_path_length_meters']/bus_df_frequency['travel_time_secs_mean']).round(2)
    bus_df_frequency['travel_speed_miles_per_hour'] = ((bus_df_frequency['stop_path_length_meters']/bus_df_frequency['travel_time_secs_mean'])*2.23694).round(2)
    
    bus_df_frequency['route_id']=line_number

    bus_array = pd.merge(bus_df_frequency,timepoints, how='left')
    bus_array.loc[bus_array['timepoint'].isnull(),'timepoint'] = 0
    
    bus_array.to_csv("results/bus_stop_data_analysis_dwell_" + str(line_number) + ".csv",index=False)