In [None]:
from IPython.display import display, HTML
import pandas as pd
import geopandas as gpd
import gtfs_kit as gk
import os
from datetime import datetime
import folium as fl
import folium.plugins as fp
import numpy as np
import matplotlib.pyplot as plt

In [None]:
def highlight_greater(row):

    if row[GTFS_A_name] != row[GTFS_B_name]:
        color = '#d65f5f'
    elif row[GTFS_A_name] == row[GTFS_B_name]:
        color = '#5fba7d'

    background = ['background-color: {}'.format(color) for _ in row]

    return background

In [None]:

def highlight_file_modified(row):

    if row['file_modified_GTFS_A'] != row['file_modified_GTFS_B']:
        color = '#d65f5f'
    elif row['file_modified_GTFS_B'] == row['file_modified_GTFS_A']:
        color = '#5fba7d'
    
    background = ['background-color: {}'.format(color) for _ in row]

    return background

In [None]:
def highlight_file_size(row):

    if row['file_size_GTFS_A'] != row['file_size_GTFS_B']:
        color = '#d65f5f'
    elif row['file_size_GTFS_A'] == row['file_size_GTFS_B']:
        color = '#5fba7d'
    
    background = ['background-color: {}'.format(color) for _ in row]

    return background

In [None]:
def percentage_compare(tableA,tableB,value_check):

    listA=tableA[value_check].to_list()
    listB=tableB[value_check].to_list()
    setA = set(listA)
    setB = set(listB)

    overlap = setA & setB
    universe = setA | setB

    OnlyInA_p = 100-float(len(overlap)) / len(setA) * 100
    OnlyInA_n=len(setA)-len(overlap)
    OnlyInB_p =100-float(len(overlap)) / len(setB) * 100
    OnlyInB_n=len(setB)-len(overlap)
    intersection =float(len(overlap)) / len(universe) * 100
    intersection_n=len(overlap)
    
    return {value_check:[len(setA),len(setB),intersection_n,intersection,OnlyInA_n,OnlyInA_p,OnlyInB_n,OnlyInB_p]}


In [None]:
from difflib import SequenceMatcher

def similar(a, b):
    return SequenceMatcher(None, a, b).ratio()

In [None]:
GTFS_A = ""
GTFS_B = ""


In [None]:
GTFS_A_name=os.path.basename(GTFS_A).split(".zip")[0]
GTFS_B_name=os.path.basename(GTFS_B).split(".zip")[0]

In [None]:
feed_a = gk.read_feed(GTFS_A, dist_units='km')
feed_b = gk.read_feed(GTFS_B, dist_units='km')

In [None]:
now = datetime.now() 
date_time = now.strftime("%d/%m/%Y, %H:%M:%S")
TODAY_STR = now.strftime('%Y%m%d')

## GTFS High level comparison

In [None]:
GTFS_A_des=feed_a.describe(TODAY_STR)
GTFS_B_des=feed_b.describe(TODAY_STR)

df=pd.merge(GTFS_A_des, GTFS_B_des, on =["indicator"],suffixes=('_GTFS_A', '_GTFS_B'))
df = df.rename(columns={'value_GTFS_A': GTFS_A_name, 'value_GTFS_B': GTFS_B_name})
df.loc[df.indicator == 'agencies', [GTFS_A_name, GTFS_B_name]]=len(GTFS_A_des.loc[GTFS_A_des.indicator == 'agencies', 'value'][0]),len(GTFS_B_des.loc[GTFS_B_des.indicator == 'agencies', 'value'][0])

df=df.reset_index(drop=True)
df.style.apply(highlight_greater, axis=1).set_table_attributes('style="font-size: 24px"').hide_index()


In [None]:
parm= [[feed_a.agency,feed_b.agency,"agency_id"],[feed_a.agency,feed_b.agency,"agency_name"],
       [feed_a.routes,feed_b.routes,"route_id"],[feed_a.routes,feed_b.routes,"route_short_name"],[feed_a.routes,feed_b.routes,"route_long_name"],[feed_a.routes,feed_b.routes,"route_color"],
      [feed_a.trips,feed_b.trips,"trip_id"],[feed_a.trips,feed_b.trips,"trip_headsign"],
      [feed_a.stops,feed_b.stops,"stop_id"],[feed_a.stops,feed_b.stops,"stop_name"]]

copm_list={}
for i in parm:
    try:
        copm_list.update(percentage_compare(i[0],i[1],i[2]))
    except:
        print(i[2])
   

  
df=pd.DataFrame.from_dict(copm_list,orient='index' ,columns=["GTFS_A","GTFS_B","Overlap","overlap (%)","Only_A","Only A (%)", "Only_B","Only B (%)"]) 
format_dict={'overlap (%)':'{0:,.2f}',
            'Only A (%)':'{0:,.2f}',
             'Only B (%)':'{0:,.2f}'}
df.style.bar(subset=['overlap (%)', 'Only A (%)','Only B (%)'], align='zero', color=[ '#5fba7d']).set_table_attributes('style="font-size: 18px"').format(format_dict).set_caption('Overlaps')


In [None]:
listA=feed_a.routes["route_long_name"].to_list()
listB=feed_b.routes["route_long_name"].to_list()
setA = set(listA)
setB = set(listB)


In [None]:
#smart_overlap=[]
for x in setA:
    for y in setB:
        score=similar(x,y)
        if score>0.9:
            print(x,"|",y,"|",score)

## Basic shapes map

In [None]:


fig, axes = plt.subplots(nrows=1,ncols=2,figsize=(23,23))
ax1,ax2=axes
rg1=feed_a.geometrize_routes()
rg2=feed_b.geometrize_routes()
rg1.plot(ax=ax1)
rg2.plot(ax=ax2)
A=feed_a.compute_bounds()
B=feed_b.compute_bounds()

if np.allclose(A,B):
    display(HTML("<h1 style='color:blue;''> The area of two GTFS are pretty similar </h1>" ))
else:
    display(HTML("<h1 style='color:red;''>there are significant difference between GTFS area</h1>" ))
    

## Stops Map

In [None]:
stop_ids_a=feed_a.stops.stop_id.to_list()
stop_style_a = {
    "radius": 8,
    "fill": "true",
    "color": 'red',
    "weight": 1,
    "fillOpacity": 0.75,
}
stop_ids_b=feed_b.stops.stop_id.to_list()
stop_style_b = {
    "radius": 8,
    "fill": "true",
    "color": 'green',
    "weight": 1,
    "fillOpacity": 0.75,
}

In [None]:
my_map = fl.Map(tiles="cartodbpositron")
group = fl.FeatureGroup(name=f"Stops {GTFS_A_name}")
stops_a = feed_a.stops.loc[lambda x: x.stop_id.isin(stop_ids_a)].fillna("n/a")


# Add stops with clustering
callback = f"""\
function (row) {{
    var imarker;
    marker = L.circleMarker(new L.LatLng(row[0], row[1]),
        {stop_style_a}
    );
    marker.bindPopup(
        '<b>Stop name</b>: ' + row[2] + '<br>' +
        '<b>Stop ID</b>: ' + row[3] + '<br>'
    );
    return marker;
}};
"""

fp.FastMarkerCluster(
    data=stops_a[
        ["stop_lat", "stop_lon", "stop_name",  "stop_id"]
    ].values.tolist(),
    callback=callback,
    disableClusteringAtZoom=14,
    name=f"Stops {GTFS_A_name}"
).add_to(my_map)

fl.LayerControl().add_to(my_map)
#fp.Search(group,position='topright',geom_type="Point",search_label="stop_name").add_to(my_map)
fp.MiniMap().add_to(my_map)
fp.Fullscreen().add_to(my_map)
bounds = [(stops_a.stop_lat.min(), stops_a.stop_lon.min()),(stops_a.stop_lat.max(), stops_a.stop_lon.max()),]
my_map.fit_bounds(bounds, padding=[1, 1])



In [None]:
my_map_b = fl.Map(tiles="cartodbpositron")
groupb = fl.FeatureGroup(name=f"Stops {GTFS_B_name}")
stops_b = feed_b.stops.loc[lambda x: x.stop_id.isin(stop_ids_b)].fillna("n/a")
callback_b = f"""\
function (row) {{
    var imarker;
    marker = L.circleMarker(new L.LatLng(row[0], row[1]),
        {stop_style_b}
    );
    marker.bindPopup(
        '<b>Stop name</b>: ' + row[2] + '<br>' +
        '<b>Stop ID</b>: ' + row[3] + '<br>'
    );
    return marker;
}};
"""
fp.FastMarkerCluster(
    data=stops_b[
        ["stop_lat", "stop_lon", "stop_name",  "stop_id"]
    ].values.tolist(),
    callback=callback_b,
    disableClusteringAtZoom=14,
    name=f"Stops {GTFS_B_name}"
).add_to(my_map_b)

fp.MiniMap().add_to(my_map_b)
fp.Fullscreen().add_to(my_map_b)
fl.LayerControl().add_to(my_map_b)

bounds = [(stops_b.stop_lat.min(), stops_b.stop_lon.min()),(stops_b.stop_lat.max(), stops_b.stop_lon.max()),]
my_map_b.fit_bounds(bounds, padding=[1, 1])


In [None]:
htmlmap = HTML('<iframe srcdoc="{}" style="float:left; width: {}px; height: {}px; display:inline-block; width: 49%; margin: 0 auto; border: 2px solid black"></iframe>'
           '<iframe srcdoc="{}" style="float:right; width: {}px; height: {}px; display:inline-block; width: 49%; margin: 0 auto; border: 2px solid black"></iframe>'
           .format(my_map.get_root().render().replace('"', '&quot;'),500,500,
                   my_map_b.get_root().render().replace('"', '&quot;'),500,500))
display(htmlmap)

In [None]:
listA=feed_a.routes.route_id.to_list()
listB=feed_b.routes.route_id.to_list()
setA = set(listA)
setB = set(listB)

overlap = list(setA & setB)

In [None]:
overlap

In [None]:
route_map_a=feed_a.map_routes(overlap, include_stops=True)
route_map_b=feed_b.map_routes(overlap, include_stops=True)

htmlmap = HTML('<iframe srcdoc="{}" style="float:left; width: {}px; height: {}px; display:inline-block; width: 49%; margin: 0 auto; border: 2px solid black"></iframe>'
           '<iframe srcdoc="{}" style="float:right; width: {}px; height: {}px; display:inline-block; width: 49%; margin: 0 auto; border: 2px solid black"></iframe>'
           .format(route_map_a.get_root().render().replace('"', '&quot;'),500,500,
                   route_map_b.get_root().render().replace('"', '&quot;'),500,500))
display(htmlmap)