In [1]:
import partridge as ptg
import peartree as pt
%matplotlib inline
import requests
from urllib.request import urlopen
from zipfile import ZipFile
from io import BytesIO
import pandas as pd
import numpy as np
import geopandas as gpd
from shapely.geometry import Point, LineString
import networkx as nx
from shapely import wkt
from scipy.spatial import cKDTree
import osmnx as ox
from dbfread import DBF

In [2]:
cd = "C:/Users/wangs1/Documents/Met_Council/gtfs_transit_network_builder/data/"
mvta_url = "http://wiki.mvta.com/files/Home/google_transit.zip"
metrotransit_url = "ftp://ftp.gisdata.mn.gov/pub/gdrs/data/pub/us_mn_state_metc/trans_transit_schedule_google_fd/csv_trans_transit_schedule_google_fd.zip"

In [3]:
roadway_network_file = "C:/Users/wangs1/Documents/Met_Council/osm_roadway_network_builder/drive_link.shp"
roadway_gdf = gpd.read_file(roadway_network_file)
node_file = "C:/Users/wangs1/Documents/Met_Council/centroid_connector_builder/drive_node.shp"
node_gdf = gpd.read_file(node_file)

In [4]:
G = ox.save_load.load_graphml("C:/Users/wangs1/Documents/Met_Council/data/network-shape_MetCouncil_full/osm_networktype_drive/drive.graphml")
#G = nx.from_pandas_edgelist(roadway_gdf, 'A', 'B', 'DISTANCE',  create_using=nx.MultiGraph())

In [5]:
#route type reference
bus_routetype_df = pd.DataFrame(iter(DBF("C:/Users/wangs1/Documents/Met_Council/GIS/shp_trans_transit_routes/TransitRoutes.dbf")))

In [6]:
def main(cd, url, roadway, node, G):
    
    roadway_gdf = roadway.copy()
    node_gdf = node.copy()
    
    # get feed for the busiest day
    feed = get_representative_feed_from_gtfs(cd, url)

    # standard trip reference
    trip_df = get_representative_trip_for_route(feed)

    # standard stop reference
    stop_to_node_df = snap_stop_to_node(feed, node_gdf)
    stop_df = pd.merge(feed.stops, stop_to_node_df[['N','OSMID', 'stop_id']],
                  how = 'left',
                  on = 'stop_id')

    # chained stops on selected trips
    chained_stop_to_node_df = representative_chained_stop_snap_node(feed, trip_df, stop_to_node_df)

    # use osmnx to route the bus onto roadway network, return link_id the bus traverses
    bus_shape_df, broken_shape_trip = match_bus_trip_shape_to_osm_link(roadway_gdf, 
                                                                   node_gdf, 
                                                                   G, feed, 
                                                                   trip_df, 
                                                                   chained_stop_to_node_df)
    #initialize
    rail_node_df = False
    
    if sum(feed.routes.route_type != 3) > 0:
        # create rail links between stops
        rail_shape_df, rail_node_df = non_bus_shape(feed, 
                                            trip_df, 
                                            chained_stop_to_node_df)

        # combine bus and rail shape reference, and add rail links and nodes to roadway system
        shape_df, roadway_and_rail_link_gdf, roadway_and_rail_node_gdf, rail_node_df = combine_bus_and_rail_shape(bus_shape_df, 
                                                                                            rail_shape_df, 
                                                                                            rail_node_df, 
                                                                                            roadway_gdf, 
                                                                                            node_gdf)
    else:
        shape_df = bus_shape_df.copy()
        roadway_and_rail_link_gdf = roadway_gdf.copy()
        roadway_and_rail_node_gdf = node_gdf.copy()

    # create frequency reference
    freq_df = create_freq_table(trip_df)

    # write transit standard
    write_out_transit_standard(trip_df, stop_df, shape_df, broken_shape_trip, freq_df, feed, rail_node_df)

    # write shape files
    #write_out_transit_embedded_roadway_network(roadway_and_rail_link_gdf, roadway_and_rail_node_gdf)

    return feed, trip_df, stop_df, chained_stop_to_node_df, broken_shape_trip, shape_df, \
            roadway_and_rail_link_gdf, roadway_and_rail_node_gdf, rail_node_df, freq_df

In [7]:
def get_representative_feed_from_gtfs(work_dir, in_url):
    
    print('getting representative feed...')
    
    # read and save zip from url
    #resp = urlopen(in_url)
    #zipfile = ZipFile(BytesIO(resp.read()))
    if 'mvta' in in_url:
        #zipfile.extractall(cd + "google_transit_mvta")
        file_loc = cd + "google_transit_mvta"
    else:
        #zipfile.extractall(cd + "csv_trans_transit_schedule_google_fd")
        file_loc = cd + "csv_trans_transit_schedule_google_fd"
    
    # get feed for the busiest day
    feed = pt.get_representative_feed(file_loc)
    
    return feed

In [8]:
# pick representatives for each route by direction, with most number of trip 
def get_representative_trip_for_route(feed):
    
    print('getting representative trip...')
    
    # get the first stop of each trip, process time
    stop_times_df = feed.stop_times.copy()
    stop_times_df['arrival_h'] = pd.to_datetime(stop_times_df['arrival_time'], unit = 's').dt.hour
    stop_times_df['arrival_m'] = pd.to_datetime(stop_times_df['arrival_time'], unit = 's').dt.minute
    stop_times_df['departure_h'] = pd.to_datetime(stop_times_df['departure_time'], unit = 's').dt.hour
    stop_times_df['departure_m'] = pd.to_datetime(stop_times_df['departure_time'], unit = 's').dt.minute
    first_stop_df = stop_times_df[stop_times_df['stop_sequence'] == 1]
    
    ## identify peak, offpeak trips, based on the arrival time of first stop
    trip_df = feed.trips.copy()
    trip_df = pd.merge(trip_df, first_stop_df,
                      how = 'left',
                      on = 'trip_id')
    ## peak: 6-9am, offpeak: 9am-3pm
    trip_df['tod'] = np.where((trip_df['arrival_h'] >= 6) & (trip_df['arrival_h'] < 9),
                             'peak',
                             np.where((trip_df['arrival_h'] >= 9) & (trip_df['arrival_h'] < 15),
                             'offpeak',
                             'other'))
    
    # get the most frequent trip for each route, by direction, by time of day
    ## trips share the same shape_id is considered being the same
    ## first get the trip count for each shape_id
    trip_freq_df = trip_df.groupby(['route_id', 'tod', 'direction_id', 'shape_id'])['trip_id'].count().to_frame().drop(index = 'other', level = 1)

    ## then choose the most frequent shape_id for each route, frequency use the total number of trips
    def agg(x):
        m = x.shape_id.iloc[np.argmax(x.trip_id.values)]
        return pd.Series({'trip_sum' : x.trip_id.sum(), 'shape_id' : m})
   
    trip_freq_df = trip_freq_df.reset_index().groupby(['route_id', 'tod', 'direction_id']).apply(agg)
    
    # retain the complete trip info of represent trip only
    trip_rep_df = pd.merge(trip_df, trip_freq_df.reset_index(),
                      how = 'inner',
                      on = ['route_id', 'tod', 'direction_id', 'shape_id']).drop_duplicates(['route_id', 'direction_id', 'tod'])
    
    return trip_rep_df

In [9]:
def snap_stop_to_node(feed, node_gdf):
    
    print('snapping gtfs stops to roadway nodes...')
    
    node_non_c_gdf = node_gdf[node_gdf['N'] > 3061].copy().reset_index(drop = True)
    inventory_node_ref = node_non_c_gdf[['X', 'Y']].values
    tree = cKDTree(inventory_node_ref)
    
    stop_df = feed.stops.copy()
    stop_df['geometry'] = [Point(xy) for xy in zip(stop_df['stop_lon'], stop_df['stop_lat'])]
    stop_df = gpd.GeoDataFrame(stop_df)
    stop_df.crs = {'init' : 'epsg:4326'}
    stop_df = stop_df.to_crs(node_non_c_gdf.crs)
    stop_df['X'] = stop_df['geometry'].apply(lambda p: p.x)
    stop_df['Y'] = stop_df['geometry'].apply(lambda p: p.y)
   
    for i in range(len(stop_df)):
        point = stop_df.iloc[i][['X', 'Y']].values
        dd, ii = tree.query(point, k = 1)
        add_snap_gdf = gpd.GeoDataFrame(node_non_c_gdf.iloc[ii]).transpose().reset_index(drop = True)
        add_snap_gdf['stop_id'] = stop_df.iloc[i]['stop_id']
        if i == 0:
            stop_to_node_gdf = add_snap_gdf.copy()
        else:
            stop_to_node_gdf = stop_to_node_gdf.append(add_snap_gdf, ignore_index=True, sort=False)
    
    return stop_to_node_gdf

In [10]:
def representative_chained_stop_snap_node(feed, trip_df, stop_to_node_df):
    
    print('getting representative chained stops...')
    
    stop_time_df = feed.stop_times.copy()
    chained_stop_df = stop_time_df[stop_time_df['trip_id'].isin(trip_df.trip_id.tolist())]
    chained_stop_to_node_df = pd.merge(chained_stop_df, stop_to_node_df,
                                            how = 'left',
                                            on = 'stop_id')
    
    return chained_stop_to_node_df

In [11]:
def match_bus_trip_shape_to_osm_link(roadway_gdf, node_gdf, G, feed, trip, chained_stop_to_node):
    
    print('routing bus on roadway network...')
    
    osm_node_dict = dict(zip(node_gdf.OSMID, node_gdf.N))
    
    trip_df = trip.copy()
    trip_df = pd.merge(trip_df, feed.routes, how = 'left', on = 'route_id')
    bus_trip_df = trip_df[trip_df['route_type'] == 3]
    
    non_duplicate_busshape_trip_list = bus_trip_df.groupby('shape_id')['trip_id'].first().tolist()
    
    chained_stop_to_node_df = chained_stop_to_node.copy()
    chained_stop_to_node_df = chained_stop_to_node_df[chained_stop_to_node_df.trip_id.isin(non_duplicate_busshape_trip_list)]
    
    broken_shape_trip_list = []
    
    for trip_id in chained_stop_to_node_df.trip_id.unique():
        #print(trip_id)
        trip_stop_df = chained_stop_to_node_df[chained_stop_to_node_df['trip_id'] == trip_id]
        for s in range(len(trip_stop_df)-1):
            try:
                closest_node_to_stop1 = trip_stop_df.OSMID.iloc[s]
                closest_node_to_stop2 = trip_stop_df.OSMID.iloc[s+1]
                node_osmid_list = nx.shortest_path(G, closest_node_to_stop1, closest_node_to_stop2)
                node_N_list = [osm_node_dict[x] for x in node_osmid_list if x in list(osm_node_dict.keys())]
                osm_link_gdf = pd.merge(pd.DataFrame({'A' : node_N_list[:len(node_N_list)-1], 
                                                      'B' : node_N_list[1:len(node_N_list)]}),
                                         roadway_gdf[['A', 'B', 'LINK_ID']],
                                         how = 'left',
                                         on = ['A', 'B'])
                osm_link_gdf['trip_id'] = trip_id
                if (trip_id == chained_stop_to_node_df.trip_id.unique()[0]) & (s==0):
                    trip_shape_df = osm_link_gdf.copy()
                else:
                    trip_shape_df = trip_shape_df.append(osm_link_gdf, ignore_index = True, sort = False)
            except:
                broken_shape_trip_list = broken_shape_trip_list + [trip_id]
                print('  warning: cannot route bus: ' + trip_id)
                continue
                
    trip_shape_df = pd.merge(trip_shape_df, trip_df[['trip_id', 'shape_id']], how = 'left', on = 'trip_id') 
    
    return trip_shape_df, broken_shape_trip_list
#bus_shape_df = pd.merge(bus_shape_df, trip_df[['trip_id', 'shape_id']], how = 'left', on = 'trip_id')
#n_list = chained_stop_to_node_df[chained_stop_to_node_df.trip_id == '14447646-MAR19-MVS-BUS-Weekday-01'].N.tolist()

In [12]:
# create rail links
def non_bus_shape(feed, trip, chained_stop_to_node):
    
    print('generating rail links...')
    
    trip_df = trip.copy()
    trip_df = pd.merge(trip_df, feed.routes[['route_id', 'route_type']], how = 'left', on = 'route_id')
    non_bus_trip_df = trip_df[trip_df.route_type != 3]
    
    non_bus_chained_stop_df = chained_stop_to_node[chained_stop_to_node['trip_id'].isin(non_bus_trip_df.trip_id.tolist())]
    
    non_bus_shape_df = feed.shapes[feed.shapes['shape_id'].isin(non_bus_trip_df.shape_id.tolist())]
    
    shape_trip_dict = dict(zip(non_bus_trip_df.shape_id, non_bus_trip_df.trip_id))
    
    for i in non_bus_shape_df.shape_id.unique():
        trip_id = shape_trip_dict[i]
        
        stop_df = non_bus_chained_stop_df[non_bus_chained_stop_df.trip_id == trip_id].copy()
        stop_df = pd.merge(stop_df, feed.stops, how = 'left', on = 'stop_id')
        shape_df = non_bus_shape_df[non_bus_shape_df.shape_id == i].copy()
        shape_df['is_stop'] = np.int(0)
        shape_df['stop_id'] = np.nan
        shape_inventory = shape_df[['shape_pt_lon', 'shape_pt_lat']].values
        tree = cKDTree(shape_inventory)
        for s in range(len(stop_df)):
            point = stop_df.iloc[s][['stop_lon', 'stop_lat']].values
            dd, ii = tree.query(point, k = 1)
            shape_df.is_stop.iloc[ii] = 1
            shape_df.stop_id.iloc[ii] = stop_df.iloc[s]['stop_id']
        if i == non_bus_shape_df.shape_id.unique()[0]:
            shape_flag_df = shape_df.copy()
        else:
            shape_flag_df = shape_flag_df.append(shape_df, ignore_index = True, sort = False)
      
    linestring_df = pd.DataFrame(columns = ['shape_id', 'A', 'B', 'geometry', 'A_stop_id', 'B_stop_id'])

    for i in shape_flag_df.shape_id.unique():
        shape_route_df = shape_flag_df[shape_flag_df.shape_id == i]
        break_list = shape_route_df.index[shape_route_df.is_stop == 1].tolist()
        stop_id_list = shape_route_df[shape_route_df.is_stop == 1]['stop_id'].tolist()
        for j in range(len(break_list)-1):
            lon_list = shape_flag_df.shape_pt_lon.iloc[break_list[j]:break_list[j+1]+1].tolist()
            lat_list = shape_flag_df.shape_pt_lat.iloc[break_list[j]:break_list[j+1]+1].tolist()
            linestring = LineString([Point(xy) for xy in zip(lon_list,lat_list)])
            linestring_df = linestring_df.append({'shape_id':i, 
                                                  'A':break_list[j], 
                                                  'B':break_list[j+1],
                                                  'A_stop_id':stop_id_list[j], 
                                                  'B_stop_id':stop_id_list[j+1],
                                                  'geometry' : linestring}, ignore_index = True, sort = False)
    
    #linestring_df = pd.merge(linestring_df, trip_df[['trip_id', 'shape_id']], how = 'left', on = 'shape_id')
    
    #rail_node_list = list(set(linestring_df['A'].tolist() + linestring_df['B'].tolist()))
    #rail_node_df = non_bus_shape_df.reset_index(drop = True).loc[rail_node_list].rename_axis('node_id').reset_index().sort_values(by = 'node_id')
    rail_node_df = shape_flag_df[shape_flag_df.is_stop == 1].rename_axis('node_id').reset_index()
    
    return linestring_df, rail_node_df

In [13]:
def combine_bus_and_rail_shape(bus_shape, rail_shape, rail_node, roadway_gdf, node_gdf):
    
    print('indexing rail links and nodes...')
    
    roadway_node_end_id = node_gdf.N.max()
    roadway_link_end_id = roadway_gdf.LINK_ID.max()
    
    # add unique rail nodes to roadway node dataframe
    rail_node_gdf = rail_node.copy()
    
    unique_rail_node_df = rail_node_gdf.drop_duplicates(['shape_pt_lat', 'shape_pt_lon']).copy()
    unique_rail_node_df['N'] = range(roadway_node_end_id + 1, roadway_node_end_id + 1 + len(unique_rail_node_df))
    
    rail_node_gdf = pd.merge(rail_node_gdf, 
                            unique_rail_node_df[['shape_pt_lat', 'shape_pt_lon', 'N']], 
                            how = 'left', 
                            on = ['shape_pt_lat', 'shape_pt_lon'])
    
    rail_shape_df = rail_shape.copy()
    rail_shape_df['A'] = rail_shape_df['A'].astype(int)
    rail_shape_df['B'] = rail_shape_df['B'].astype(int)

    rail_shape_df = pd.merge(rail_shape_df, rail_node_gdf[['node_id', 'N']], how = 'left', left_on = 'A', right_on='node_id')
    rail_shape_df = pd.merge(rail_shape_df, rail_node_gdf[['node_id', 'N']], how = 'left', left_on = 'B', right_on='node_id')
    
    unique_rail_node_df['geometry'] = [Point(xy) for xy in zip(unique_rail_node_df.shape_pt_lon, 
                                                               unique_rail_node_df.shape_pt_lat)]
    unique_rail_node_df = gpd.GeoDataFrame(unique_rail_node_df)
    unique_rail_node_df.crs = {'init' : 'epsg:4326'}
    unique_rail_node_df = unique_rail_node_df.to_crs(node_gdf.crs)
    unique_rail_node_df['X'] = unique_rail_node_df['geometry'].apply(lambda p: p.x)
    unique_rail_node_df['Y'] = unique_rail_node_df['geometry'].apply(lambda p: p.y)
    unique_rail_node_df['OSMID'] = np.int(0)
    
    roadway_and_rail_node_gdf = node_gdf.append(unique_rail_node_df[node_gdf.columns.values], 
                                                ignore_index = True, 
                                                sort = False)
    
    # add unique rail links to roadway link dataframe
    unique_rail_shape_gdf = rail_shape_df.drop_duplicates(['N_x', 'N_y']).copy()
    unique_rail_shape_gdf.drop(['A', 'B'], axis = 1, inplace = True)
    unique_rail_shape_gdf.rename(columns = {'N_x' : 'A', 'N_y' : 'B'}, inplace = True)
    
    unique_rail_shape_gdf['LINK_ID'] = range(roadway_link_end_id + 1, roadway_link_end_id + 1 + len(unique_rail_shape_gdf))
    unique_rail_shape_gdf = gpd.GeoDataFrame(unique_rail_shape_gdf)
    unique_rail_shape_gdf.crs = {'init' : 'epsg:4326'}
    unique_rail_shape_gdf = unique_rail_shape_gdf.to_crs(roadway_gdf.crs)
    
    roadway_and_rail_link_gdf = roadway_gdf.append(unique_rail_shape_gdf[['A', 'B', 'LINK_ID', 'geometry']], 
                                                   ignore_index = True, 
                                                   sort = False)
    
    print('combining bus and rail shapes...')
    
    bus_shape_df = bus_shape.copy()
    shape_df = bus_shape_df[['shape_id', 'A', 'B', 'LINK_ID']].append(unique_rail_shape_gdf[['shape_id', 'A', 'B', 'LINK_ID']])
    
    return shape_df, roadway_and_rail_link_gdf, roadway_and_rail_node_gdf, rail_node_gdf

In [14]:
def create_freq_table(trip_df):
    
    print('creating frequency reference...')
    
    freq_df = trip_df[['trip_id', 'tod', 'direction_id', 'trip_sum']].copy()
    freq_df['headway_secs'] = np.where(freq_df.tod == 'peak', (3*60*60/freq_df.trip_sum).astype(int),
                      (6*60*60/freq_df.trip_sum).astype(int))
    
    return freq_df

In [15]:
def write_out_transit_standard(trip, stop, shape, broken_trip, freq, feed, rail_node = False):
    
    shape_df = shape.copy()
    trip_df = trip.copy()
    
    broken_shape_list = shape_df[shape_df.LINK_ID.isnull()].shape_id.tolist()
    broken_shape_list = broken_shape_list + trip_df[trip_df.trip_id.isin(broken_trip)].shape_id.tolist()
    
    shape_df = shape_df[-shape_df.shape_id.isin(broken_shape_list)]
    shape_df['LINK_ID'] = shape_df['LINK_ID'].astype(int)
    shape_df['LINK_ID'] = shape_df.apply(lambda x: 'driveLink' + str(x.LINK_ID), axis = 1) 
    shape_df['A'] = shape_df.apply(lambda x: 'driveNode' + str(x.A), axis = 1)
    shape_df['B'] = shape_df.apply(lambda x: 'driveNode' + str(x.B), axis = 1)
    
    trip_df = trip_df[-((trip_df.shape_id.isin(broken_shape_list)) | (trip_df.trip_id.isin(broken_trip)))]
    
    final_trip_list = trip_df.trip_id.unique().tolist()
    
    freq_df = freq.copy()
    freq_df = freq_df[freq_df.trip_id.isin(final_trip_list)]
    
    stop_df = stop.copy()
    
    if type(rail_node) != bool:
        rail_node_df = rail_node.copy()
        rail_node_dict = dict(zip(rail_node_df.stop_id, rail_node_df.N))
        stop_df['N'] = stop_df.apply(lambda x: rail_node_dict[x.stop_id] if x.stop_id in rail_node_df.stop_id.tolist()else x.N,
                                axis = 1)
    
    stop_df['N'] = stop_df.apply(lambda x: 'driveNode' + str(x.N), axis = 1)
    
    route_df = feed.routes.copy()
    route_df = route_df[route_df.route_id.isin(trip_df.route_id.tolist())]
    
    route_df.to_csv(cd + 'output/routes.txt', index = False, sep = ',')
    shape_df.to_csv(cd + 'output/shapes.txt', index = False, sep = ',')
    trip_df[feed.trips.columns.values].to_csv(cd + 'output/trips.txt', index = False, sep = ',')
    freq_df[['trip_id', 'headway_secs']].to_csv(cd + 'output/frequencies.txt', index = False, sep = ',')
    stop_df.to_csv(cd + 'output/stops.txt', index = False, sep = ',')

In [16]:
def write_out_transit_embedded_roadway_network(link, node):
    link.to_file(cd + 'output/drive_link_with_rail.shp')
    node.to_file(cd + 'output/drive_node_with_rail.shp')

In [48]:
def combine_metro_mvta_txt():
    name_list = ['metro', 'mvta']
    file_list = ['routes', 'trips', 'stops', 'frequencies', 'shapes']
    mvta_agency = {1:3, 2:0, 3:15}
    for f in file_list:
        all_df = pd.DataFrame()
        for n in name_list:
            sub_df = pd.read_csv(cd + 'output/' + f + '_' + n + '.txt')
            if (f == 'routes')&(n == 'mvta'):
                sub_df['agency_id'] = sub_df['agency_id'].map(mvta_agency)
            if f == 'shapes':
                sub_df = sub_df[['shape_id', 'A', 'B', 'LINK_ID']]
            all_df = all_df.append(sub_df, ignore_index = True, sort = False)
            all_df.to_csv(cd + 'output/' + f + '.txt', index = False, sep = ',')

In [17]:
metro_feed, metro_trip_df, metro_stop_df, metro_chained_stop_to_node_df, metro_broken_shape_trip, metro_shape_df, \
        metro_roadway_and_rail_link_gdf, metro_roadway_and_rail_node_gdf, \
        metro_rail_node_df, metro_freq_df = main(cd, 
                                               metrotransit_url, 
                                               roadway_gdf,#roadway_and_rail_link_gdf, 
                                               node_gdf,#roadway_and_rail_node_gdf,
                                              G)

getting representative feed...
getting representative trip...
snapping gtfs stops to roadway nodes...
getting representative chained stops...
routing bus on roadway network...
generating rail links...


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self._setitem_with_indexer(indexer, value)


indexing rail links and nodes...
combining bus and rail shapes...
creating frequency reference...


In [18]:
mvta_feed, mvta_trip_df, mvta_stop_df, mvta_chained_stop_to_node_df, mvta_broken_shape_trip, mvta_shape_df, \
        mvta_roadway_and_rail_link_gdf, mvta_roadway_and_rail_node_gdf, \
        mvta_rail_node_df, mvta_freq_df = main(cd, 
                                               mvta_url, 
                                               roadway_gdf,#roadway_and_rail_link_gdf, 
                                               node_gdf,#roadway_and_rail_node_gdf,
                                              G)

getting representative feed...
getting representative trip...
snapping gtfs stops to roadway nodes...
getting representative chained stops...
routing bus on roadway network...
creating frequency reference...


In [49]:
combine_metro_mvta_txt()

In [19]:
def prepare_df_for_cube(feed, trip, shape, chained_stop_to_node, freq, broken_trip, bus_routetype, url):
    
    mode_dict = {0:8, 2:9}
    bus_mode_dict = {'Urb Loc':5, 'Sub Loc':6, 'Express':7}
    metro_operator_dict = {'0':3, '1':3, '2':3, '3':4, '4':2, '5':5, '6':8, '7':1, '8':1, '9':10, '10':3, 
                           '11':9, '12':3, '13':4, '14':4, '15':3}
    mvta_operator_dict = {'1':4, '2':3, '3':3}
    
    routetype_df = bus_routetype.copy()
    
    shape_df = shape.copy()
    trip_df = trip.copy()
    
    broken_shape_list = shape_df[shape_df.LINK_ID.isnull()].shape_id.tolist()
    broken_shape_list = broken_shape_list + trip_df[trip_df.trip_id.isin(broken_trip)].shape_id.tolist()
    
    
    trip_df = trip_df[-((trip_df.shape_id.isin(broken_shape_list)) | (trip_df.trip_id.isin(broken_trip)))]
    
    trip_df = pd.merge(trip_df, feed.routes, how = 'left', on = 'route_id')
    trip_df = pd.merge(trip_df, freq, how = 'left', on = 'trip_id')
    trip_df = pd.merge(trip_df, routetype_df[['route', 'routetype']], 
                       how = 'left', left_on = 'route_short_name', right_on = 'route')
    
    trip_df['NAME'] = trip_df.apply(lambda x: x.agency_id + '_' + x.route_id + '_' + x.route_short_name + \
                                    '_' + x.tod_x + '_' + 'dir' + str(x.direction_id_x), 
                                    axis = 1)
    trip_df['LONGNAME'] = trip_df['route_long_name']
    trip_df['HEADWAY'] = (trip_df['headway_secs']/60).astype(int)
    trip_df['MODE'] = np.where(trip_df.route_type == 3, 
                               trip_df['routetype'].map(bus_mode_dict),
                               trip_df['route_type'].map(mode_dict))
    trip_df['MODE'].fillna(5, inplace = True)
    trip_df['MODE'] = trip_df['MODE'].astype(int)
    
    trip_df['ONEWAY'] = 'T'
    
    if 'mvta' in url:
        trip_df['OPERATOR'] = trip_df['agency_id'].map(mvta_operator_dict)
    else:
        trip_df['OPERATOR'] = trip_df['agency_id'].map(metro_operator_dict)
    
    return trip_df

In [20]:
metro_cube = prepare_df_for_cube(metro_feed, 
                                metro_trip_df, 
                                metro_shape_df, 
                                metro_chained_stop_to_node_df, 
                                metro_freq_df, 
                                metro_broken_shape_trip, 
                                bus_routetype_df,
                                metrotransit_url)

In [21]:
metro_cube.shape

(392, 40)

In [22]:
mvta_cube = prepare_df_for_cube(mvta_feed, 
                                mvta_trip_df, 
                                mvta_shape_df, 
                                mvta_chained_stop_to_node_df, 
                                mvta_freq_df, 
                                mvta_broken_shape_trip, 
                                bus_routetype_df,
                                mvta_url)

In [23]:
mvta_cube.shape

(66, 47)

In [30]:
metro_cube[metro_cube.MODE.isnull()]

Unnamed: 0,route_id,service_id,trip_id,trip_headsign,direction_id_x,block_id,shape_id,wheelchair_accessible,arrival_time,departure_time,...,trip_sum_y,headway_secs,route,routetype,NAME,LONGNAME,HEADWAY,MODE,ONEWAY,OPERATOR


In [25]:
mvta_cube[mvta_cube.routetype.isnull()]
mvta_cube[mvta_cube.MODE.isnull()]
#temp.groupby(['MODE', 'route_short_name']).count()

Unnamed: 0,route_id,service_id,trip_id,trip_headsign,trip_destination,trip_short_name,trip_route_name,direction_id_x,pub_dir_id,block_id,...,trip_sum_y,headway_secs,route,routetype,NAME,LONGNAME,HEADWAY,MODE,ONEWAY,OPERATOR


In [26]:
def node_list(x, rail_node_df, chained_stop_to_node_df, stop_df, shape_df):
    
    if type(rail_node_df) != bool:
        rail_node_dict = dict(zip(rail_node_df.stop_id, rail_node_df.N))
        stop_df['N'] = stop_df.apply(lambda x: rail_node_dict[x.stop_id] if x.stop_id in rail_node_df.stop_id.tolist()else x.N,
                                    axis = 1)
        
    stop_id_list = chained_stop_to_node_df[chained_stop_to_node_df.trip_id == x.trip_id]['stop_id'].tolist()
    stop_node_list = stop_df[stop_df['stop_id'].isin(stop_id_list)]['N'].tolist()
    
    node_list = shape_df[shape_df['shape_id'] == x.shape_id]['A'].tolist() + \
                    [shape_df[shape_df['shape_id'] == x.shape_id]['B'].iloc[-1]]
    
    s = '\nLINE NAME=\"%s\",' % (x.NAME,)
    
    #line attribtes
    s += '\n LONGNAME=\"%s",' % (x.LONGNAME,)
    if x.tod_y == 'peak':
        s += '\n HEADWAY=%s,' % (x.HEADWAY,)
    else:
        s += '\n HEADWAY[2]=%s,' % (x.HEADWAY,)
    s += '\n MODE=%s,' % (x.MODE,)
    s += '\n ONEWAY=%s,' % (x.ONEWAY,)
    s += '\n OPERATOR=%s,' % (x.OPERATOR,)
    s += '\nNODES='
    
    #node list
    for nodeIdx in range(len(node_list)):
        if node_list[nodeIdx] in stop_node_list:
            s += '\n %s' % (node_list[nodeIdx])
            if nodeIdx < (len(node_list)-1):
                s += ','
        else:
            s += '\n -%s' % (node_list[nodeIdx])
            if nodeIdx < (len(node_list)-1):
                s += ','
            
    lines.append(s)


In [27]:
lines = [';;<<PT>><<LINE>>;;']

metro_cube.apply(lambda x: node_list(x, 
                                    metro_rail_node_df, 
                                    metro_chained_stop_to_node_df, 
                                    metro_stop_df, 
                                    metro_shape_df), 
                axis=1)

mvta_cube.apply(lambda x: node_list(x, 
                                    mvta_rail_node_df, 
                                    mvta_chained_stop_to_node_df, 
                                    mvta_stop_df, 
                                    mvta_shape_df), 
                axis=1)

with open('C:/Users/wangs1/Documents/Met_Council/Network Standard/transit.txt', 'w') as f:
    f.write("\n".join(map(str, lines)))

#node_list(temp.iloc[-1])
#temp.trip_id
#print(lines)
lines = [';;<<PT>><<LINE>>;;']

mvta_cube.apply(lambda x: node_list(x, 
                                    mvta_rail_node_df, 
                                    mvta_chained_stop_to_node_df, 
                                    mvta_stop_df, 
                                    mvta_shape_df), 
                axis=1)

with open('C:/Users/wangs1/Documents/Met_Council/Network Standard/mvta_transit_new3.txt', 'w') as f:
    f.write("\n".join(map(str, lines)))

roadway_with_rail_link_gdf = gpd.read_file(cd + 'output/drive_link_with_rail.shp')
#roadway_with_rail_node_gdf = gpd.read_file(cd + 'output/drive_node_with_rail.shp')

roadway_with_rail_link_gdf = gpd.read_file(cd + 'output/drive_link_with_rail.shp')
roadway_with_rail_link_gdf['ASGNGRP'] = np.where(roadway_with_rail_link_gdf['ASGNGRP'] == 0,
                                                1, roadway_with_rail_link_gdf['ASGNGRP'])

roadway_with_rail_link_gdf['RC_NUM'] = np.where(roadway_with_rail_link_gdf['RC_NUM'] == 0,
                                                20, roadway_with_rail_link_gdf['RC_NUM'])

roadway_with_rail_link_gdf.fillna(0, inplace = True)
float_list = roadway_with_rail_link_gdf.columns[(roadway_with_rail_link_gdf.dtypes.values == np.dtype('float64'))]
for x in float_list:
    roadway_with_rail_link_gdf[x] = roadway_with_rail_link_gdf[x].astype(int)
    
roadway_with_rail_link_gdf.drop(['DISTANCE', 'T_MANTIME'], axis = 1).to_file(cd + 'output/drive_and_rail_link.shp')

from osgeo import ogr

def shp_float_fields(network_type = 'drive'):
    #additional step to add float fields
    source = ogr.Open(cd + 'output/drive_and_rail_link.shp', update=True)
    layer = source.GetLayer()
    layer_defn = layer.GetLayerDefn()
    field_names = [layer_defn.GetFieldDefn(i).GetName() for i in range(layer_defn.GetFieldCount())]
    print(len(field_names))

    #Add a new field - DISTANCE
    new_field = ogr.FieldDefn('DISTANCE', ogr.OFTReal)
    new_field.SetWidth(10)
    new_field.SetPrecision(5)
    if ('DISTANCE' in field_names) == False:
        layer.CreateField(new_field)

    #for i in layer:
    #    geom = i.GetGeometryRef()
    #    length = (geom.Length())/1609.344
    #    i.SetField( "DISTANCE", length)
    #    layer.SetFeature(i)

    #Add a new field - T_MANTIME
    if network_type == 'drive':
        new_field = ogr.FieldDefn('T_MANTIME', ogr.OFTReal)
        new_field.SetWidth(10) 
        new_field.SetPrecision(5)
        if ('T_MANTIME' in field_names) == False:
            layer.CreateField(new_field)

    for i in layer:
        geom = i.GetGeometryRef()
        length = (geom.Length())/1609.344
        i.SetField( "DISTANCE", length)
        if network_type == 'drive':
            i.SetField( "T_MANTIME", 0)
        layer.SetFeature(i)

    # Close the Shapefile
    source = None


shp_float_fields()

# create subset for transit st paul
poly_gdf = gpd.read_file("W:/Users/wangs1/Documents/Met_Council/Network Standard/poly2.shp")
poly_gdf = poly_gdf.to_crs(roadway_gdf.crs)

bus_routetype_gdf = gpd.read_file("W:/Users/wangs1/Documents/Met_Council/GIS/shp_trans_transit_routes/TransitRoutes.shp")

st_transit = bus_routetype_gdf[bus_routetype_gdf.intersects(poly_gdf.geometry[0])]

def write_out_transit_standard_subset(trip, stop, shape, broken_trip, freq, feed, st_tran, rail_node = False):
    
    shape_df = shape.copy()
    broken_shape_list = shape_df[shape_df.LINK_ID.isnull()].shape_id.tolist()
    
    #shape_df = shape_df[-shape_df.shape_id.isin(broken_shape_list)]
    #shape_df = shape_df[shape_df.shape_id.isin(st_tran.)]
    
    
    trip_df = trip.copy()
    trip_df = trip_df[-((trip_df.shape_id.isin(broken_shape_list)) | (trip_df.trip_id.isin(broken_trip)))]
    trip_df = pd.merge(trip_df, feed.routes[['route_id', 'route_short_name']], how = 'left', on = 'route_id') 
    trip_df = trip_df[trip_df.route_short_name.isin(st_tran.route.tolist())]
    
    final_trip_list = trip_df.trip_id.unique().tolist()
    final_shape_list = trip_df.shape_id.unique().tolist()
    
    shape_df = shape_df[shape_df.shape_id.isin(final_shape_list)]
    shape_df['LINK_ID'] = shape_df['LINK_ID'].astype(int)
    shape_df['LINK_ID'] = shape_df.apply(lambda x: 'driveLink' + str(x.LINK_ID), axis = 1)
    shape_df['A'] = shape_df.apply(lambda x: 'driveNode' + str(x.A), axis = 1)
    shape_df['B'] = shape_df.apply(lambda x: 'driveNode' + str(x.B), axis = 1)
    
    freq_df = freq.copy()
    freq_df = freq_df[freq_df.trip_id.isin(final_trip_list)]
    
    stop_df = stop.copy()
    
    if type(rail_node) != bool:
        rail_node_df = rail_node.copy()
        rail_node_dict = dict(zip(rail_node_df.stop_id, rail_node_df.N))
        stop_df['N'] = stop_df.apply(lambda x: rail_node_dict[x.stop_id] if x.stop_id in rail_node_df.stop_id.tolist()else x.N,
                                axis = 1)
    
    stop_df['N'] = stop_df.apply(lambda x: 'driveNode' + str(x.N), axis = 1)
    
    route_df = feed.routes.copy()
    route_df = route_df[route_df.route_short_name.isin(st_tran.route.tolist())]
    
    route_df.to_csv(cd + 'output/st_routes.txt', index = False, sep = ',')
    shape_df.to_csv(cd + 'output/st_shapes.txt', index = False, sep = ',')
    trip_df[feed.trips.columns.values].to_csv(cd + 'output/st_trips.txt', index = False, sep = ',')
    freq_df[['trip_id', 'headway_secs']].to_csv(cd + 'output/st_frequencies.txt', index = False, sep = ',')
    stop_df.to_csv(cd + 'output/st_stops.txt', index = False, sep = ',')

write_out_transit_standard_subset(metro_trip_df, metro_stop_df, metro_shape_df, 
                                  metro_broken_shape_trip, metro_freq_df, metro_feed, st_transit, metro_rail_node_df)