In [21]:
import os, sys
from geopy.geocoders import GoogleV3, Nominatim
import geopandas as gpd
import pandas as pd
from SCanalyzer.busSim import BusSim
from SCanalyzer.busSim.manager import LocalManager
from shapely.geometry import Point
from matplotlib.lines import Line2D
from math import sin, cos, asin, sqrt, pi, ceil, floor
from datetime import timedelta

In [22]:
DATA_PATH = "../../data/minneapolis_gtfs.zip"
OUT_PATH = "/tmp/output"
DAY = "monday"
START_TIME = "12:00:00"
ELAPSE_TIME = "00:30:00"
AVG_WALKING_SPEED = 1.4 # 1.4 meters per second
MAX_WALKING_MIN = 10
MAX_WALKING_DISTANCE = MAX_WALKING_MIN * 60.0 * AVG_WALKING_SPEED

# ccan do geocoding or reverse geocoding
geolocator = Nominatim(user_agent="wisc_bazarr_demo")
# something needs to be passed in for the borders parameter in LocalManager even though it isn't used
manager = LocalManager(DATA_PATH, OUT_PATH, [])

In [23]:
stops_df = manager.read_gtfs("stops.txt")
trips_df = manager.read_gtfs("trips.txt")
stopTimes_df = manager.read_gtfs("stop_times.txt")
calendar_df = manager.read_gtfs("calendar.txt")

In [24]:
def is_day_valid(day):
    # return the valid calender in a specific day
    return (day == 1)

In [25]:
def get_valid_stopTime(df, start_time, elapse_time):
    start_time = pd.to_timedelta(start_time)
    end_time = start_time + pd.to_timedelta(elapse_time)
    return df[(df['arrival_time'] > start_time) & (df['arrival_time'] < end_time)]

In [26]:
# get valid service_ids
calendar_df['start_date'] = pd.to_datetime(
calendar_df['start_date'], format='%Y%m%d')
calendar_df['end_date'] = pd.to_datetime(
calendar_df['end_date'], format='%Y%m%d')

calendar_filtered_df = calendar_df[is_day_valid(calendar_df[DAY])]

service_ids = calendar_filtered_df["service_id"].tolist()

# get valid trips
trips_df = trips_df[trips_df["service_id"].isin(service_ids)]

In [27]:
# get valid stop_times
stopTimes_filtered_df = trips_df.merge(stopTimes_df, on="trip_id")
stopTimes_merged_df = (stopTimes_filtered_df.merge(stops_df, on="stop_id")[
["service_id", "trip_id", "route_id", "stop_id", "stop_sequence", "arrival_time", "stop_lon", "stop_lat"]].rename(columns={"stop_lon": "stop_x", "stop_lat": "stop_y"}))

# get stop_times within the time frame
stopTimes_merged_df['arrival_time'] = pd.to_timedelta(
stopTimes_merged_df['arrival_time'])

# add trip_delays
for (trip_id, delay) in []:
    stopTimes_merged_df.loc[stopTimes_merged_df["trip_id"] == trip_id, "arrival_time"] += pd.to_timedelta(delay)

stopTimes_final_df = get_valid_stopTime(stopTimes_merged_df, START_TIME, ELAPSE_TIME).sort_values(by="arrival_time")

In [28]:
stopTimes_final_df

modified_final = stopTimes_final_df[:1000]

In [29]:
from SCanalyzer.busSim.graph import Node, NodeCostPair
from collections import defaultdict

In [30]:
map_grid = []
min_x = stopTimes_final_df.stop_x.min()
max_x = stopTimes_final_df.stop_x.max()
min_y = stopTimes_final_df.stop_y.min()
max_y = stopTimes_final_df.stop_y.max()

x_num = ceil((max_x - min_x) / MAX_WALKING_DISTANCE)
y_num = ceil((max_y - min_y) / MAX_WALKING_DISTANCE)

# improvement? ok for now
for i in range(x_num):
    x_list = []
    for j in range(y_num):
        x_list.append([])
    map_grid.append(x_list)

In [31]:
MAX_WALKING_DISTANCE

840.0

In [32]:
curr_nodes = []
# gen nodes
trip_node_dict = defaultdict(list)
stop_node_dict = defaultdict(list)

for index, row in stopTimes_final_df.iterrows():
    node = Node(row["trip_id"], row["route_id"], row["stop_sequence"], row["stop_id"], row["stop_x"],
                row["stop_y"], row["arrival_time"], MAX_WALKING_DISTANCE, index)
    curr_nodes.append(node)
    trip_node_dict[row["trip_id"]].append(node)
    stop_node_dict[row["stop_id"]].append(node)

    # finding where the node is on the map_grid
    x_bucket = floor((row["stop_x"] - min_x) /
                     MAX_WALKING_DISTANCE)
    y_bucket = floor((row["stop_y"] - min_y) /
                     MAX_WALKING_DISTANCE)
    map_grid[x_bucket][y_bucket].append(node)

In [40]:
print(curr_nodes)

IOPub data rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_data_rate_limit`.

Current values:
ServerApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
ServerApp.rate_limit_window=3.0 (secs)



In [36]:
map_grid[0][0]

[(18270502-MAR21-MVS-BUS-Weekday-02, 21, 14, 16909, 0 days 12:01:00, 840.0, -93.23141, 44.948464)
 Children:,
 (18368873-MAR21-MVS-BUS-Weekday-02, 68, 13, 18096, 0 days 12:01:00, 840.0, -93.093528, 44.988666)
 Children:,
 (18271486-MAR21-MVS-BUS-Weekday-03, 10, 2, 19277, 0 days 12:01:00, 840.0, -93.271337, 44.97138)
 Children:,
 (18271486-MAR21-MVS-BUS-Weekday-03, 10, 1, 19337, 0 days 12:01:00, 840.0, -93.271211, 44.971967)
 Children:,
 (18276056-MAR21-MVS-BUS-Weekday-03, 2, 13, 13211, 0 days 12:01:00, 840.0, -93.243642, 44.972534)
 Children:,
 (18369842-MAR21-MVS-BUS-Weekday-02, 6, 47, 6216, 0 days 12:01:00, 840.0, -93.318921, 44.905269)
 Children:,
 (18273352-MAR21-MVS-BUS-Weekday-02, 227, 24, 4086, 0 days 12:01:00, 840.0, -93.139281, 45.022952)
 Children:,
 (18273352-MAR21-MVS-BUS-Weekday-02, 227, 25, 45338, 0 days 12:01:00, 840.0, -93.139915, 45.022763)
 Children:,
 (18276532-MAR21-MVS-BUS-Weekday-03, 615, 19, 4783, 0 days 12:01:00, 840.0, -93.375464, 44.938762)
 Children:,
 (18276

In [37]:
trip_node_dict

defaultdict(list,
            {'18270502-MAR21-MVS-BUS-Weekday-02': [(18270502-MAR21-MVS-BUS-Weekday-02, 21, 14, 16909, 0 days 12:01:00, 840.0, -93.23141, 44.948464)
              Children:,
              (18270502-MAR21-MVS-BUS-Weekday-02, 21, 16, 16900, 0 days 12:02:00, 840.0, -93.235763, 44.948483)
              Children:,
              (18270502-MAR21-MVS-BUS-Weekday-02, 21, 15, 16904, 0 days 12:02:00, 840.0, -93.234037, 44.948467)
              Children:,
              (18270502-MAR21-MVS-BUS-Weekday-02, 21, 17, 40954, 0 days 12:04:00, 840.0, -93.239741, 44.948505)
              Children:,
              (18270502-MAR21-MVS-BUS-Weekday-02, 21, 18, 16894, 0 days 12:05:00, 840.0, -93.242101, 44.948446)
              Children:,
              (18270502-MAR21-MVS-BUS-Weekday-02, 21, 19, 16888, 0 days 12:06:00, 840.0, -93.247176, 44.948437)
              Children:,
              (18270502-MAR21-MVS-BUS-Weekday-02, 21, 20, 16887, 0 days 12:07:00, 840.0, -93.249795, 44.948424)
            

In [39]:
# gen edges
# direct sequence
# forms connection in each trip, with start being the first.
for trip_id, nodes in trip_node_dict.items():
    for i in range(len(nodes)-1):
        start = nodes[i]
        end = nodes[i+1]
        nodeCostPair = NodeCostPair(end, start.harversine_distance(end))
        start.children.append(nodeCostPair)
        start.children_ids.add(end.id)

In [41]:
stop_node_dict

defaultdict(list,
            {16909: [(18270502-MAR21-MVS-BUS-Weekday-02, 21, 14, 16909, 0 days 12:01:00, 840.0, -93.23141, 44.948464)
              Children:
                cost: 483.69457451525574 (18270502-MAR21-MVS-BUS-Weekday-02, 21, 16, 16900, 0 days 12:02:00, 840.0, -93.235763, 44.948483),
              (18270502-MAR21-MVS-BUS-Weekday-03, 21, 14, 16909, 0 days 12:01:00, 840.0, -93.23141, 44.948464)
              Children:
                cost: 483.69457451525574 (18270502-MAR21-MVS-BUS-Weekday-03, 21, 16, 16900, 0 days 12:02:00, 840.0, -93.235763, 44.948483),
              (18270567-MAR21-MVS-BUS-Weekday-02, 21, 60, 16909, 0 days 12:08:00, 840.0, -93.23141, 44.948464)
              Children:
                cost: 483.69457451525574 (18270567-MAR21-MVS-BUS-Weekday-02, 21, 62, 16900, 0 days 12:09:00, 840.0, -93.235763, 44.948483),
              (18270567-MAR21-MVS-BUS-Weekday-03, 21, 60, 16909, 0 days 12:08:00, 840.0, -93.23141, 44.948464)
              Children:
               

In [42]:
# wait on stop: new version (by Charles)
#time0= time()
for stop_id, nodes in stop_node_dict.items():
    for node in nodes:
        node.children.extend([NodeCostPair(n, 0) for n in filter(lambda n: n.arrival_time>node.arrival_time, nodes)])
# print(f'new time for reducted triple for loop {time() - time0}')

In [55]:
from math import radians, cos

def get_bounding_box(lat, lon, distance):
    # Earth's radius in meters
    R = 6371000  
    
    # Convert distance from meters to degrees (approximate)
    lat_delta = distance / R * (180 / 3.14159)
    lon_delta = distance / (R * cos(radians(lat))) * (180 / 3.14159)
    
    # Calculate the bounding box
    min_lat = lat - lat_delta
    max_lat = lat + lat_delta
    min_lon = lon - lon_delta
    max_lon = lon + lon_delta

    print(min_lat, max_lat, min_lon, max_lon)
    return (min_lat, max_lat, min_lon, max_lon)

In [56]:
def filter_stops_in_bounding_box(stops, lat, lon, distance):
    # Get bounding box coordinates
    min_lat, max_lat, min_lon, max_lon = get_bounding_box(lat, lon, distance)
    
    # Filter stops within the bounding box
    return [stop for stop in stops if min_lat <= stop.stop_y <= max_lat and min_lon <= stop.stop_x <= max_lon]

In [57]:
from math import radians, sin, cos, sqrt, atan2

def haversine(lat1, lon1, lat2, lon2):
    # Radius of Earth in meters
    R = 6371000
    
    # Convert latitude and longitude from degrees to radians
    dlat = radians(lat2 - lat1)
    dlon = radians(lon2 - lon1)
    lat1 = radians(lat1)
    lat2 = radians(lat2)
    
    # Haversine formula
    a = sin(dlat / 2)**2 + cos(lat1) * cos(lat2) * sin(dlon / 2)**2
    c = 2 * atan2(sqrt(a), sqrt(1 - a))
    
    # Distance in meters
    return R * c

def get_walkable_stops(stops, curr_stop, max_distance, avg_walking_speed):
    # Filter stops inside the bounding box first
    candidate_stops = filter_stops_in_bounding_box(stops, curr_stop.stop_y, curr_stop.stop_x, max_distance)
    
    # Filter based on the exact distance using Haversine formula
    walkable_stops = []
    print(len(candidate_stops))
    for stop in candidate_stops:
        distance = haversine(curr_stop.stop_y, curr_stop.stop_x, stop.stop_y, stop.stop_x)
        # make sure person can walk to the stop in time (assuming the latest we leave is when the bus at the stop we're currently at arrives) 
        time_delta = distance / AVG_WALKING_SPEED
        time_delta = timedelta(seconds=time_delta)
        if distance <= max_distance and curr_stop.arrival_time + time_delta <= stop.arrival_time:
            walkable_stops.append((stop, distance))
            curr_stop.children.append(NodeCostPair(stop, distance))
            curr_stop.children_ids.add(stop.id)
            
    
    return walkable_stops


In [58]:
point_coords = [(c.stop_y, c.stop_x) for c in curr_nodes]

In [59]:
print(point_coords)

[(44.948464, -93.23141), (44.988666, -93.093528), (44.97138, -93.271337), (44.971967, -93.271211), (44.972534, -93.243642), (44.905269, -93.318921), (45.022952, -93.139281), (45.022763, -93.139915), (44.938762, -93.375464), (44.938821, -93.372682), (45.022791, -93.141488), (45.022808, -93.144067), (44.97138, -93.271337), (44.971967, -93.271211), (45.015446, -93.019756), (45.015446, -93.019756), (44.977454, -93.04744), (45.006086, -93.295651), (45.007864, -93.295639), (44.934071, -93.244645), (44.934062, -93.242389), (44.903512, -93.318903), (45.022952, -93.139281), (45.022763, -93.139915), (44.975123, -93.273807), (44.962647, -93.256364), (44.865531, -93.039559), (44.998734, -93.176867), (44.918006, -93.19677), (44.872656, -93.328713), (44.998734, -93.176867), (45.008142, -92.999633), (45.006686, -93.002274), (45.004996, -93.005378), (44.935833, -93.277827), (44.968815, -93.016497), (44.968073, -93.019037), (44.962821, -93.237595), (44.968307, -93.459655), (45.022791, -93.141488), (44.

In [60]:
MAX_WALKING_DISTANCE

840.0

In [None]:
for coord in curr_nodes:
    pos_stops = get_walkable_stops(curr_nodes, coord, MAX_WALKING_DISTANCE, AVG_WALKING_SPEED)

44.94090969212943 44.95601830787057 -93.24208380816302 -93.22073619183698
120
44.98111169212943 44.99622030787057 -93.10420929192755 -93.08284670807247
158
44.96382569212943 44.978934307870574 -93.28201507213896 -93.26065892786104
946
44.96441269212943 44.97952130787057 -93.28188918142912 -93.26053281857087
1007
44.96497969212943 44.980088307870574 -93.2543202869988 -93.23296371300118
281
44.897714692129426 44.91282330787057 -93.3295867847654 -93.30825521523461
152
45.015397692129426 45.03050630787057 -93.1499686868611 -93.1285933131389
22
45.01520869212943 45.03031730787057 -93.15060265157784 -93.12922734842216
24
44.931207692129426 44.94631630787057 -93.3861360044538 -93.36479199554618
62
44.93126669212943 44.94637530787057 -93.3833540154198 -93.3620099845802
48
45.01523669212943 45.03034530787057 -93.15217565680496 -93.13080034319503
26
45.01525369212943 45.03036230787057 -93.1547546599786 -93.13337934002142
30
44.96382569212943 44.978934307870574 -93.28201507213896 -93.260658927861

In [61]:
for i in range(len(point_coords)):
    if i != 0:
        print(haversine(coords[0][0], coords[0][1], coords[i][0], coords[i][1]))

11732.17161557709
4045.0213559230897
4078.824396936318
2844.241376659803
8398.470829048461
11004.6202907639
10956.00632689837
11388.83920707918
11170.251135178005
10877.51923702506
10748.207779096369
4045.0213559230897
4078.824396936318
18237.11444545282
18237.11444545282
14828.906461937886
8160.059188514607
8315.576812785079
1909.5786021389863
1819.6953550252933
8510.649368004586
11004.6202907639
10956.00632689837
4462.580366706509
2518.4891650371624
17700.964171393945
7046.5591837238435
4348.066774553538
11391.683342804243
7046.5591837238435
19400.90754501826
19150.71625387308
18857.893920315524
3913.976902843308
17060.82759042058
16851.991918591102
1668.9620515934778
18094.204157697248
10877.51923702506
17838.046293045834
7394.814364513214
5947.449993705973
6101.212012341965
15094.05740691562
14121.777090052572
2844.241376659803
7493.583614517498
15269.475648281665
15438.54726265841
7590.685535835911
17838.046293045834
17700.964171393945
3846.215543821156
10748.207779096369
7794.764

In [31]:
!pip install folium

Collecting folium
  Downloading folium-0.17.0-py2.py3-none-any.whl.metadata (3.8 kB)
Collecting branca>=0.6.0 (from folium)
  Downloading branca-0.7.2-py3-none-any.whl.metadata (1.5 kB)
Collecting xyzservices (from folium)
  Downloading xyzservices-2024.9.0-py3-none-any.whl.metadata (4.1 kB)
Downloading folium-0.17.0-py2.py3-none-any.whl (108 kB)
Downloading branca-0.7.2-py3-none-any.whl (25 kB)
Downloading xyzservices-2024.9.0-py3-none-any.whl (85 kB)
Installing collected packages: xyzservices, branca, folium
Successfully installed branca-0.7.2 folium-0.17.0 xyzservices-2024.9.0


In [58]:
coords = [(c.stop_y, c.stop_x) for c in curr_nodes]

In [59]:
walk_vals = []

for i in range(len(coords)):
    if i != 0:
        walk_vals.append(haversine(coords[0][0], coords[0][1], coords[i][0], coords[i][1]))

In [60]:
sorted(walk_vals)

[0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 0.0,
 32.953565402926706,
 32.953565402926706,
 32.953565402926706,
 32.953565402926706,
 200.64810125686998,
 200.64810125686998,
 206.7382742018804,
 206.7382742018804,
 206.7382742018804,
 206.7382742018804,
 206.7382742018804,
 206.7382742018804,
 206.7382742018804,
 206.7382742018804,
 242.41981218445517,
 242.41981218445517,
 242.41981218445517,
 242.41981218445517,
 254.60029729155428,
 254.60029729155428,
 261.7970555711671,
 261.7970555711671,
 308.8061397456981,
 308.8061397456981,
 327.6109129842631,
 327.6109129842631,
 327.6109129842631,
 327.6109129842631,
 336.8666577141038,
 336.8666577141038,
 337.71282902294485,
 337.71282902294485,
 342.57614186870586,
 342.57614186870586,
 342.57614186870586,
 342.57614186870586,
 342.57614186870586,
 342.57614186870586,
 342.57614186870586,
 342.57614186870586,
 353.9072069959337,
 353.9072069959337,
 353.9072069959337,
 353.9072069959337,
 353.9072069959337,
 353.9072069959337,
 372.69412026157

In [38]:
import folium

#coords = [(c.stop_y, c.stop_x) for c in curr_nodes]

# Create a folium map centered around the average latitude and longitude
map_center = [sum([c[0] for c in coords]) / len(coords),
              sum([c[1] for c in coords]) / len(coords)]

# Initialize the map
my_map = folium.Map(location=map_center, zoom_start=4)

# Add each coordinate as a circle marker
for coord in coords:
    folium.CircleMarker(
        location=coord,
        radius=5,  # Radius of the circle
        color='blue',  # Circle border color
        fill=True,  # Fill the circle
        fill_color='blue',  # Fill color
        fill_opacity=0.6  # Transparency of the circle
    ).add_to(my_map)

# Save the map to an HTML file
my_map.save("bus_stops_map_circle.html")


In [None]:
for x in range(x_num):
    for y in range(y_num):
        start_bucket = map_grid[x][y]
        end_buckets = []
        # following should be linear time, since x range and y range is fixed to most at 3
        for x_end in range(max(0, x-1), min(x_num, x+2)):
            for y_end in range(max(0, y-1), min(y_num, y+2)):
                end_buckets.append(map_grid[x_end][y_end])

        for start in start_bucket:
            for end_bucket in end_buckets:
                for end in end_bucket:
                    if start.arrival_time >= end.arrival_time or start.stop_id == end.stop_id:
                        continue

                    # walk
                    distance = start.distance(end)
                    time_delta = distance / AVG_WALKING_SPEED
                    time_delta = timedelta(seconds=time_delta)
                    if distance < MAX_WALKING_DISTANCE and start.arrival_time + time_delta < end.arrival_time:
                        nodeCostPair = NodeCostPair(end, distance)
                        start.children.append(nodeCostPair)
                        start.children_ids.add(end.id)

In [None]:
end_buckets

In [None]:
#curr_nodes[0].children[0]

for see_node in curr_nodes[0]:
    see_node.children

In [None]:
# walk (Charles's version)
#time0 = time()
curr_nodes.sort(key=lambda node: node.stop_x)
for index, node in enumerate(nodes):
    node.index = index
#time1 = time()

In [None]:
nodes

In [None]:
x_num = 1000000
# y_num = 10
# grid = []

In [21]:
li2 = [[] for x in range(x_num)]

In [22]:
li = []
for x in range(x_num):
    li.append([])

In [28]:
class A:
    def __init__(self):
        self.id = 0
        self.count = 1
        self.id2 = int()
    
    def __repr__(self):
        return '{}, {}, {}'.format(self.id, self.count, self.id2)

In [5]:
li = [[1,2],[2,1],[3,9],[4,0]]
li.sort(key=lambda l:l[1])

In [8]:
for index, x in enumerate([1,2,3,4]):
    print(index,x)

0 1
1 2
2 3
3 4


In [35]:
a2 = A()
a2

0, 1, 0

In [9]:
if None:
    print('none')

In [11]:
a = None
if a.b and a:
    print('a')

AttributeError: 'NoneType' object has no attribute 'b'

In [7]:
times = []
for day in ["monday", "tuesday","wednesday","thursday","friday","saturday","sunday"]:
    for time in range(5,24):
        times.append('{} {:02}:{:02}:{:02}'.format(day, time, 0, 0))

In [12]:
a, b = times[0].split(' ')

In [13]:
a

'monday'

In [14]:
b

'05:00:00'

In [17]:
from datetime import datetime
datetime.strptime('Monday 05:00:00  2', "%A %H:%M:%S  %d")

datetime.datetime(1900, 1, 2, 5, 0)