In [1]:
import geopandas as gpd
import requests
import pandas as pd
import os
import shutil
import glob
import numpy as np
# url = 'https://opendata.arcgis.com/datasets/490db54636704d35aae661a12c12e9a0_0.geojson'
# r = requests.get(url, allow_redirects=True)
# open('Bus_Stop_Inventory.geojson', 'wb').write(r.content)
geo_df = gpd.read_file('Bus_Stop_Inventory.geojson')

routes_shapes = gpd.read_file('routes_shapes.geojson')
crs = routes_shapes.crs

In [None]:
routes_shapes['routeShortName'] = routes_shapes['routeShortName'].astype(int)
routes_shapes['directionId'] = routes_shapes['directionId'].astype(int)
routes_shapes['stopId'] = routes_shapes['stopId'].astype(int)
frames = []
stops_frames = []
missing_rows = []

allFiles = glob.glob('.' + "/results/*.csv")
for file_ in allFiles:
    print(file_)
    df = pd.read_csv(file_)
    route_id = df['route_id'].value_counts().reset_index().head(1)['index'].values[0]
    df['route_id'] = route_id  #alternatively df = df.loc[df['route_id'].dropna().index,]
    
    #Add daily totals.
    daily_counts = df.groupby(['STOP_ID','direction_id'])['BOARD_ALL','ALIGHT_ALL','LOAD_ALL'].sum().reset_index().rename(columns={
    "BOARD_ALL":"BOARD_ALL_DAILY","ALIGHT_ALL":"ALIGHT_ALL_DAILY","LOAD_ALL":"LOAD_ALL_DAILY"})
    df = pd.merge(df,daily_counts, how='outer')

    df = df.query("TIME_PERIOD=='AM Peak'|TIME_PERIOD=='PM Peak'")
    df = df.sort_values(by=['TIME_PERIOD','DIRECTION_NAME','SORT_ORDER'])
    df['travel delay'] = df['travel_time_secs_mean'] - df['travel_time_min_secs']
    df['activity'] = df['BOARD_ALL'] + df['ALIGHT_ALL']
    df['daily activity'] = df['BOARD_ALL_DAILY'] + df['ALIGHT_ALL_DAILY']
    df['activity divided by dwell'] = df['activity']/df['dwell_time_secs_mean']
    #df['total_sec_delay'] = df['LOAD_ALL']*df['delay_secs_mean'].shift(periods=-1)
    df['distance to next stop'] = df['stop_path_length_meters']*[3.28084]
    df['distance to next stop'] =  df['distance to next stop'].shift(periods=-1)
    df['travel_speed_miles_per_hour'] =  df['travel_speed_miles_per_hour'].shift(periods=-1)
    df['travel_time_secs_mean'] =  df['travel_time_secs_mean'].shift(periods=-1)
    df['travel_time_min_secs'] =  df['travel_time_min_secs'].shift(periods=-1)
    df['travel delay'] =  df['travel delay'].shift(periods=-1)
    df['travel_time_secs_std'] =  df['travel_time_secs_std'].shift(periods=-1)
    df['boardings per Obs'] = df['BOARD_ALL']/df['percent_stopped']
    df['alightings per Obs'] = df['ALIGHT_ALL']/df['percent_stopped']
    df['activity per Obs'] = df['activity']/df['percent_stopped']
    df['dwell seconds times load'] = df['dwell_time_secs_mean']*df['LOAD_ALL']
    df['load divided by ob activity'] = df['LOAD_ALL']/df['activity per Obs']
    df['Load times travel delay'] = df['LOAD_ALL']*df['travel delay']
    df = df.replace([np.inf, -np.inf], np.nan)


    df_csv = df.sort_values(by=['TIME_PERIOD','DIRECTION_NAME','SORT_ORDER'])
    columns_to_keep = ['STOP_ID', 'TIME_PERIOD','DIRECTION_NAME','route_id','SORT_ORDER','BOARD_ALL', 'ALIGHT_ALL','daily activity','timepoint','TRIPS_ALL','TRIPS_GROSS', 'activity','dwell_time_secs_mean', 'dwell_time_secs_std', 'percent_stopped',
                       'boardings per Obs', 'alightings per Obs', 'activity per Obs', 'distance to next stop', 'travel_speed_miles_per_hour', 'travel_time_secs_mean', 'travel_time_min_secs', 'travel delay', 
                       'travel_time_secs_std', 'LOAD_ALL', 'dwell seconds times load', 'load divided by ob activity', 'activity divided by dwell', 'Load times travel delay', 'direction_id','BOARD_ALL_DAILY', 'ALIGHT_ALL_DAILY', 'LOAD_ALL_DAILY',]
    df_csv = df_csv[columns_to_keep]
    df_csv.rename(columns={'STOP_ID':'stop id','sec_per_activity':'seconds per activity', 'total_sec_delay':'Total Passanger Delay', 
                       'travel_time_min_secs':'minimum travel time', 'TIME_PERIOD':'time period', 'DIRECTION_NAME':'direction name', 'SORT_ORDER':'sort order',
                       'travel_time_secs_mean':'travel time', 'travel_time_secs_std':'travel time STD', 'delay_secs_mean':'delay time',
                       'travel_speed_miles_per_hour':'MPH','dwell_time_secs_mean':'dwell time', 'dwell_time_secs_std':'dwell time std', 
                       'BOARD_ALL': 'Boardings','ALIGHT_ALL':'Alightings', 'LOAD_ALL':'Load' },inplace=True)

    df_csv = pd.merge(df_csv,geo_df[['trapeze_id','stopname', 'st_loc', 'routes_listed']],left_on=['stop id'],right_on=['trapeze_id'], how='left')

    df_csv.sort_values(by=['time period','direction name','sort order']).to_csv("fast_output/fast_output_" + file_.split('_')[-1], index=False)
    frames.append(df_csv.copy())

    stops_columns_to_keep = ['stop id', 'time period', 'direction name', 'timepoint', 'Boardings',
       'Alightings', 'activity','daily activity','dwell time', 'dwell time std',
       'percent_stopped', 'boardings per Obs', 'alightings per Obs',
       'activity per Obs', 'distance to next stop', 'Load',
       'dwell seconds times load', 'load divided by ob activity',
       'activity divided by dwell', 'trapeze_id',
       'stopname', 'st_loc', 'route_id','routes_listed']    
    df_stops_geojson = pd.merge(df_csv[stops_columns_to_keep],geo_df[['trapeze_id','rtiid', 'geometry']],left_on=['trapeze_id'],right_on=['trapeze_id'])
    stops_frames.append(df_stops_geojson)
    
#     df_stops_geojson = pd.merge(df[stops_columns_to_keep],geo_df[['trapeze_id','stopname','rtiid', 'geometry']],left_on=['STOP_ID'],right_on=['trapeze_id'])
#     stops_frames.append(df_stops_geojson)
#     missing_row = pd.merge(df, routes_shapes, left_on=['route_id','direction_id','STOP_ID'],right_on=['routeShortName','directionId','stopId'], how='left', indicator=True).query("_merge!='both'")
#     missing_rows.append(missing_row)

pd.concat(frames,ignore_index=True).to_csv("fast_output/full_routes.csv",index=False)
df_all = pd.concat(frames,ignore_index=True)

stops_df = pd.concat(stops_frames,ignore_index=True)
stops_df.rename(columns=lambda x: x.replace(' ','_'), inplace=True)
stops_df = gpd.GeoDataFrame(stops_df, crs = crs).set_geometry(stops_df['geometry'])

try:
    os.remove('fast_geospatial_output/stops_df.geojson')
except OSError:
    pass
try:
    os.remove('fast_geospatial_output/shapes_data.geojson')
except OSError:
    pass

try:
    shutil.rmtree('fast_geospatial_output/stops_df/')
except FileNotFoundError:
    pass
try:
    shutil.rmtree('fast_geospatial_output/shapes_data/')
except FileNotFoundError:
    pass


stops_df.to_file('fast_geospatial_output/stops_df.geojson',driver='GeoJSON')
stops_df.to_file('fast_geospatial_output/stops_df',driver='ESRI Shapefile')

shapes_data = pd.merge(df_all, routes_shapes, left_on=['route_id','direction_id','stop id'],right_on=['routeShortName','directionId','stopId'], how='left')
shapes_data.rename(columns=lambda x: x.replace(' ','_'), inplace=True)
shapes_data = shapes_data.loc[~shapes_data['geometry'].isnull(),]
shapes_data = gpd.GeoDataFrame(shapes_data, crs = crs).set_geometry(shapes_data['geometry'])
shapes_data.to_file('fast_geospatial_output/shapes_data.geojson',driver='GeoJSON')
shapes_data.to_file('fast_geospatial_output/shapes_data',driver='ESRI Shapefile')

# pd.concat(missing_rows).to_csv('missing_data_rows.csv',index=False)

./results/bus_stop_data_analysis_dwell_304.csv
./results/bus_stop_data_analysis_dwell_34.csv
./results/bus_stop_data_analysis_dwell_35.csv
./results/bus_stop_data_analysis_dwell_37.csv
./results/bus_stop_data_analysis_dwell_23.csv
./results/bus_stop_data_analysis_dwell_22.csv
./results/bus_stop_data_analysis_dwell_104.csv
./results/bus_stop_data_analysis_dwell_32.csv
./results/bus_stop_data_analysis_dwell_26.csv
./results/bus_stop_data_analysis_dwell_27.csv
./results/bus_stop_data_analysis_dwell_101.csv
./results/bus_stop_data_analysis_dwell_103.csv
./results/bus_stop_data_analysis_dwell_25.csv
./results/bus_stop_data_analysis_dwell_31.csv
./results/bus_stop_data_analysis_dwell_19.csv
./results/bus_stop_data_analysis_dwell_18.csv
./results/bus_stop_data_analysis_dwell_328.csv
./results/bus_stop_data_analysis_dwell_102.csv
./results/bus_stop_data_analysis_dwell_57.csv
./results/bus_stop_data_analysis_dwell_81.csv
./results/bus_stop_data_analysis_dwell_42.csv
./results/bus_stop_data_anal