In [75]:
import pandas as pd

import os
import configparser
import datetime as dt
from datetime import datetime
from os.path import exists
import time

import folium
from random import randint

import shapely
from shapely.geometry import Point, Polygon, LineString, GeometryCollection
import numpy as np

In [76]:
# Method to read config file settings
def read_config(Config_File):
    config = configparser.ConfigParser()
    config.read(Config_File)
    return config

configurations = read_config("configurations.ini")

In [77]:
# Function to merge csv feed files
def merge_transitview_rt(start_date_time_str, stop_date_time_str):
    start_date_time = datetime.strptime(start_date_time_str, '%m/%d/%Y %H:%M')
    stop_date_time = datetime.strptime(stop_date_time_str, '%m/%d/%Y %H:%M')
    
    start = time.time()
    f_count = 0
    
    # delta time
    delta = dt.timedelta(minutes=1)
    
    csv_list = []
    print('reading in CSVs')
    # iterate over range of date / time
    while (start_date_time <= stop_date_time):

        #generate the file name to open
        folder = configurations['common_settings']['transitviewall_history_data_root'] + "\\" \
            + start_date_time.strftime("%#m") + '\\' \
            + start_date_time.strftime("%#d") + '\\' \
            + start_date_time.strftime("%H") + '\\' \
            + start_date_time.strftime("%M") + '\\' 
        
        file = folder + 'feed.csv'

        if exists(file):
            csv_list.append(pd.read_csv(file))
            
        f_count +=1
        end = time.time()
        print(str(f_count) + '. ' + str(round(end-start,0)) + 'sec --- file: ' + str(start_date_time), flush=True, end='\r')
            
        # increemnt datetime by 1 minute
        start_date_time += delta
    
    print('\nmerging into single data frame')
    csv_merged = pd.concat(csv_list, ignore_index=True)
    print(str(round(time.time()-start,0))+'sec')
    
    indexDrop = csv_merged[(csv_merged['late'] == 998) | (csv_merged['late'] == 999)].index
    csv_merged.drop(indexDrop , inplace=True)
    
    print('exporting to single CSV file')
    csv_merged.to_csv('data\kh_transitview.csv', index=False)
    print(str(round(time.time()-start,0))+'sec')

In [72]:
merge_transitview_rt("7/26/2022 00:00", "7/31/2022 23:59")

reading in CSVs
8640. 195.0sec --- file: 2022-07-31 23:59:00
merging into single data frame
217.0sec
exporting to single CSV file
341.0sec


In [3]:
# LOAD NEEDED DATA

In [4]:
# Load RT data (gtfs_data)

In [78]:
gtfs_data = pd.read_csv('data\kh_transitview.csv')
gtfs_data['file'] = pd.to_datetime(gtfs_data['file'])
gtfs_data['timestamp'] = pd.to_datetime(gtfs_data['timestamp'])
gtfs_data.head(5)

Unnamed: 0,BlockID,Direction,Offset,Offset_sec,VehicleID,destination,estimated_seat_availability,file,heading,label,lat,late,lng,next_stop_id,next_stop_name,next_stop_sequence,original_late,route,timestamp,trip
0,1076,NorthBound,1,19,7452,Neshaminy Mall,NOT_AVAILABLE,2022-07-26,360,7452,40.024891,0,-75.077209,21962.0,Frankford Transportation Center-14-BLVDDIR-MFO,1.0,0,14,2022-07-26 04:00:04+00:00,6644
1,1077,SouthBound,1,13,7444,Frankford Transportation Center,NOT_AVAILABLE,2022-07-26,225,7444,40.112644,8,-74.990776,22765.0,Roosevelt Blvd & Southampton - FS,54.0,8,14,2022-07-26 04:00:04+00:00,6756
2,1078,NorthBound,1,8,7447,Neshaminy Mall,NOT_AVAILABLE,2022-07-26,45,7447,40.110142,2,-74.993507,22537.0,Roosevelt Blvd & Haldeman Av,40.0,2,14,2022-07-26 04:00:04+00:00,6643
3,1120,SouthBound,1,33,8664,Frankford Transportation Center,NOT_AVAILABLE,2022-07-26,195,8664,40.040073,2,-75.05957,22.0,Roosevelt Blvd & Cottman Av,9.0,2,BLVDDIR,2022-07-26 04:00:04+00:00,7920
4,1221,EastBound,1,28,8184,Torresdale-Cottman,NOT_AVAILABLE,2022-07-26,120,8184,40.054508,0,-75.071701,21268.0,Cottman Av & Whitaker Av,46.0,1,70,2022-07-26 04:00:04+00:00,7573


In [79]:
# Load Schedule Trips Data (trips_data)

In [80]:
sched_folder = configurations['common_settings']['schedule_root']
trips_f = sched_folder + '\\' + 'trips.txt'
trips_data = pd.read_csv(trips_f)
trips_data.head()

Unnamed: 0,route_id,service_id,trip_id,trip_headsign,block_id,direction_id,shape_id
0,1,10,6545,Decatur-Drummond,1003,0,272448
1,1,10,6546,Parx Casino,2151,0,272446
2,1,10,6547,Parx Casino via Decatur-Drummond,2156,0,272453
3,1,10,6548,Decatur-Drummond,1426,0,272452
4,1,10,6549,Parx Casino via Decatur-Drummond,2154,0,272453


In [81]:
# Load Schedule Shapes Data (shapes_data)

In [82]:
shapes_f = sched_folder + '\\' + 'shapes.txt'
shapes_data = pd.read_csv(shapes_f)
shapes_data.head()

Unnamed: 0,shape_id,shape_pt_lat,shape_pt_lon,shape_pt_sequence
0,272882,39.932162,-75.149143,707
1,272882,39.928007,-75.150762,727
2,272882,39.914772,-75.155033,759
3,272882,39.912738,-75.139676,790
4,272880,39.98699,-75.13713,358


In [83]:
###### MAPPING

In [84]:
## Plot Schedule shapes

In [85]:
shape_1 = shapes_data.loc[shapes_data['shape_id'] == 272882]
shape_1 = shape_1.sort_values(by=['shape_pt_sequence'])

shape_2 = shapes_data.loc[shapes_data['shape_id'] == 272880]
shape_2 = shape_2.sort_values(by=['shape_pt_sequence'])

In [86]:
s1 = list(zip(shape_1.shape_pt_lat, shape_1.shape_pt_lon))
s2 = list(zip(shape_2.shape_pt_lat, shape_2.shape_pt_lon))

In [87]:
map = folium.Map(location=[40,-75.154839], zoom_start= 11)

folium.PolyLine(s1, color='red', weight=10, opacity=0.8).add_to(map)
folium.PolyLine(s2, color='blue', weight=10, opacity=0.8).add_to(map)

map

In [88]:
# Plot all trip schedule shapes for a given route

In [89]:
route_47_trips = trips_data.loc[trips_data['route_id'] == '47']
route_47_trips = route_47_trips['shape_id'].unique().tolist()
route_47_trips.sort()

In [90]:
map = folium.Map(location=[40,-75.154839], zoom_start= 11)

n = len(route_47_trips)
color = []
# create an n long random list of colors
for i in range(n):
    color.append('#%06X' % randint(0, 0xFFFFFF))

for shape_id, c in zip(route_47_trips, color):
    shape = shapes_data.loc[shapes_data['shape_id'] == shape_id]
    shape = shape.sort_values(by=['shape_pt_sequence'])
    shape = list(zip(shape.shape_pt_lat, shape.shape_pt_lon))
    
    folium.PolyLine(shape, color = c, popup = shape_id, weight=7, opacity=0.6).add_to(map) 

map

In [91]:
# Function to return schedule shape of a trip_id
def get_shape(trip):
    shape_id = trips_data.loc[trips_data['trip_id'] == trip]
    
    shape = shapes_data.loc[shapes_data['shape_id'] == shape_id.iloc[0]['shape_id']]
    shape = shape.sort_values(by=['shape_pt_sequence'])
    shape_coord = list(zip(shape.shape_pt_lat, shape.shape_pt_lon))
    
    return shape_coord

In [92]:
# compare RT to scheduled route
gtfs_rt_47 = gtfs_data.loc[(gtfs_data['route'] == '47') & (gtfs_data['file'] >= '2022-07-26 00:00:00') & (gtfs_data['file'] <= '2022-07-26 23:59:00')]

In [93]:
rt_47_trips = gtfs_rt_47['trip'].unique().tolist()
rt_47_trips.sort()

In [94]:
map = folium.Map(location=[40,-75.154839], zoom_start= 11)

#select only 1 trip_id of those for route 47
trip = rt_47_trips[0]

polyl = gtfs_rt_47.loc[gtfs_rt_47['trip'] == trip]
polyl = polyl.sort_values(by=['file'])
polyl = list(zip(polyl.lat, polyl.lng))
  
folium.PolyLine(polyl, color = 'red', popup = 'RT', weight=7, opacity=0.6).add_to(map) 

shape = get_shape(trip)
  
folium.PolyLine(shape, color = 'blue', popup = 'Sched', weight=7, opacity=0.6).add_to(map) 

map

In [95]:
# going through all RT points and finding Max deviation (maximum min distance)
sch = LineString(shape)

max_dist = 0

for pol in polyl:
    pt = Point(pol)
    dist = pt.distance(sch)
    if dist > max_dist:
        max_dist = dist
 
print(max_dist)

0.007551171871767615


In [96]:
#Find next bus and track it and determine if it went on a detour

In [97]:
# Input
now_date = '7/26/2022'
now_time = '07:30'
now_file = datetime.strptime(now_date + ' ' + now_time, '%m/%d/%Y %H:%M')
route = '47'
direction = 'SouthBound'
my_stop_seq = 40

In [98]:
# All current buses (in current feed file) that have not yet passed my stop
gtfs_rt = gtfs_data.loc[(gtfs_data['file'] == now_file) & (gtfs_data['route'] == route) &\
                        (gtfs_data['Direction'] == direction) & (gtfs_data['next_stop_sequence'] <= my_stop_seq)]

gtfs_rt = gtfs_rt.sort_values(by = ['next_stop_sequence'])

In [99]:
# return location of a trip at a given time (feed file name)
def now_loc(now, trip):
    data = gtfs_data.loc[(gtfs_data['file'] == now) & (gtfs_data['trip'] == trip)]
    if data.empty:
        return 'NOT FOUND'
    else:
        return (data['lat'].iat[0], data['lng'].iat[0])

In [100]:
# Pick closest bus and track it
next_trip_id = gtfs_rt['trip'].iat[-1]
next_trip_shape = get_shape(next_trip_id)

now = now_file

ln = LineString(next_trip_shape)
location = now_loc(now, next_trip_id)
delta = dt.timedelta(minutes=1)

In [101]:
# each time this block is run the bus moves 1 minute

# check if there is a previous detour
max_prev_dist = 0
for pt in rt_shape:
    pt = Point(pt)
    dist = pt.distance(ln)
    if dist > max_prev_dist:
        max_prev_dist = dist
        

if max_prev_dist >= 0.003:
    print('BUS HAS DETOURED --- Max deviation distance = ' + str(max_prev_dist))
else: 
    print('BUS NEVER DETOURED --- Max deviation distance = '  + str(max_prev_dist))

      
pt = Point(location)
now_dist = pt.distance(ln)

if now_dist >= 0.003:
    print('BUS STILL ON DETOUR --- Current deviation distance = ' + str(dist))
else: 
    print('BUS CURRENTLY ON ROUTE --- Current distance from route = '  + str(dist))
    
rt_shape = gtfs_rt_47.loc[(gtfs_rt_47['trip'] == next_trip_id) & (gtfs_rt_47['file'] <= now)]
rt_shape = rt_shape.sort_values(by=['file'])
rt_shape = list(zip(rt_shape.lat, rt_shape.lng))


map = folium.Map(location=[40,-75.154839], zoom_start= 12)

folium.PolyLine(rt_shape, color = 'blue', weight=7, opacity=0.6).add_to(map) 
folium.PolyLine(next_trip_shape, color = 'red', weight=7, opacity=0.6).add_to(map) 
folium.Marker(location, icon=folium.Icon(color='blue', icon='bus', prefix='fa'), popup = 'Trip_ID = '+ str(next_trip_id)).add_to(map)

now += delta
location = now_loc(now, next_trip_id)

map

BUS HAS DETOURED --- Max deviation distance = 0.007370346057017579
BUS CURRENTLY ON ROUTE --- Current distance from route = 3.160023513856936e-05
