In [1]:
import os
import pathlib
import warnings

import geopandas as gpd
import gtfstools as gtfs
import networkx as nx
import numpy as np
import osmnx as ox

import pandas as pd


%matplotlib inline
%config InlineBackend.figure_format='retina'

# USER INPUTS

In [2]:
out_folder = os.environ.get('OUT_FOLDER')
out_folder = pathlib.Path(out_folder)
out_folder = out_folder / 'B'

db_folder = os.environ.get('DB_FOLDER')
db_folder = pathlib.Path(db_folder)

In [3]:
path_to_gtfs = (db_folder
                / 'beaga'
                / 'GTFS'
                / '2017'
                / 'GTFS_BH_2017.01.05.zip')

In [4]:
state = 31
cities = 3106200

cutoffs = [0,5,9,12,14,17,19,24]

# PROCESSING

In [5]:
feed = gtfs.load_feed(path_to_gtfs)

This is a frequency-based GTFS.
Feed contains less shapes than routes


In [17]:
feed.shapes.iloc[0].geometry.coords[:2]

[(-44.029928, -19.938481), (-44.028, -19.940009)]

In [20]:
feed.trips.iloc[0].route_id

'101   01'

In [21]:
raise Exception(
            "There's only one (apparently) valid transit stop in "\
            f"route {route_id}, which doesn't really make sense."\
                       )

whatever


In [6]:
with warnings.catch_warnings():
        warnings.filterwarnings('ignore', category=FutureWarning)
        
        routes = feed.routes
        trips = feed.trips
        stop_times = feed.stop_times
        stops = feed.stops
        shapes = feed.shapes
        
if shapes.empty:
    print('Feed data does not contain route shapes')

trips = (trips.merge(routes, how='left')
              .reindex(columns=['trip_id',
                                'route_id',
                                'service_id',
                                'direction_id',
                                'shape_id'])
        )

stop_times = (stop_times.merge(trips, how='left')
                        .merge(stops, how='left'))

In [7]:
stop_times

Unnamed: 0,trip_id,arrival_time,departure_time,stop_id,stop_sequence,stop_headsign,pickup_type,drop_off_type,shape_dist_traveled,route_id,service_id,direction_id,shape_id,stop_name,stop_desc,location_type,parent_station,geometry
0,METRO 0110800108,18000.0,18000.0,00110998800035,100010,,0,0,,METRO 01,08,1,EL2,Estação METRO Vilarinho,Ave Vilarinho em frente ao numero 35,,,POINT (-43.94675 -19.82149)
1,METRO 0110800108,18900.0,18900.0,00101865210411,200010,,0,0,,METRO 01,08,1,EL2,Estação METRO Floramar,Ave Cristiano Machado em frente ao numero 10411,,,POINT (-43.94044 -19.83390)
2,METRO 0110800108,19080.0,19080.0,00101865209184,300010,,0,0,,METRO 01,08,1,EL2,Estação METRO Waldomiro Lobo,Ave Cristiano Machado em frente ao numero 9184,,,POINT (-43.93273 -19.84780)
3,METRO 0110800108,19260.0,19260.0,00101865209040,400010,,0,0,,METRO 01,08,1,EL2,Estação METRO Primeiro De Maio,Ave Cristiano Machado em frente ao numero 9040,,,POINT (-43.93420 -19.85870)
4,METRO 0110800108,19440.0,19440.0,00101865205602,500010,,0,0,,METRO 01,08,1,EL2,Estação METRO Sao Gabriel,Ave Cristiano Machado em frente ao numero 5602,,,POINT (-43.92651 -19.86350)
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
142762,9850 0111400114,21349.0,21349.0,00101260201200,100032,,0,0,,9850 01,14,0,,Ave Jose Candido Da Silveira em frente ao nume...,Serpro - Abg1039 (Entre Rua Alto Da Mata E R...,,,POINT (-43.91825 -19.88797)
142763,9850 0111400114,21450.0,21450.0,00101260201648,100033,,0,0,,9850 01,14,0,,Ave Jose Candido Da Silveira em frente ao nume...,Defronte No.1647 - Sobenca (Entre Rua Alto Da ...,,,POINT (-43.91699 -19.88545)
142764,9850 0111400114,21535.0,21535.0,00101260202258,100034,,0,0,,9850 01,14,0,,Ave Jose Candido Da Silveira em frente ao nume...,Abg1044 (Entre Rua Alto Da Mata E Rua Gustavo ...,,,POINT (-43.91535 -19.88344)
142765,9850 0111400114,21574.0,21574.0,00103261701825,100035,,0,0,,9850 01,14,0,,Rua Gustavo Da Silveira em frente ao numero 1825,Oposto Estacao Jose Candido Da Silveira (Entr...,,,POINT (-43.91344 -19.88363)


In [8]:
stop_seq = (stop_times.drop_duplicates(subset=['stop_id','stop_name',
                                                  'stop_sequence', 'shape_id'])
                          .reindex(columns=['route_id','direction_id',
                                            'stop_id','stop_name',
                                            'stop_sequence', 'shape_id'])
               )

In [10]:
(stop_seq.pivot_table('stop_id',
                                         index=['route_id',
                                                'direction_id',
                                                'shape_id'],
                                         aggfunc='count')
                            .reset_index()
                   )

Unnamed: 0,route_id,direction_id,shape_id,stop_id
0,1030 05,0,T1030,22
1,614 01,0,T614,58
2,614 02,0,T614,55
3,614 03,0,T614,78
4,614 04,0,T614,69
5,METRO 01,0,EL1,19
6,METRO 01,1,EL2,19


In [None]:
feed.trips.loc[feed.trips.shape_id.notnull()]

In [None]:
bh.nodes

In [None]:
ox.shortest_path(bh, orig=[8795213893, 7694712699], dest=[8795214055, 8795213762], weight='length')

In [None]:
bh.nodes[8795213893]

In [None]:
feed.trips.what

In [None]:
len(feed.stop_times)

In [None]:
subset = ['stop_id', 'stop_name', 'stop_sequence', 'shape_id']

col_order = ['route_id', 'direction_id', 'shape_id', 'stop_id',
             'stop_name', 'stop_sequence', 'geometry']

sort_on = ['route_id', 'direction_id', 'shape_id', 'stop_sequence']

stop_sequence = (operations
                 .drop_duplicates(subset=subset)
                 .reindex(columns=col_order)
                 .pipe(gpd.GeoDataFrame,
                       crs='EPSG:4326',
                       geometry='geometry')
                 .sort_values(sort_on)
                 .to_crs(epsg=5641)
                )

In [None]:
feed.trips

In [None]:
west, south, east, north  = feed.stops.to_crs(epsg=4326).total_bounds
roads = ox.graph_from_bbox(north,
                           south,
                           east,
                           west,
                           network_type='drive',)

In [None]:
nodes, edges = ox.graph_to_gdfs(roads)

In [None]:
list(edges.iloc[0].geometry.coords)

In [None]:
edges.loc[edges.index==edges.index[0]].reset_index()

In [None]:
cu[['osmid', 'oneway']] = [np.nan, np.nan]


In [None]:
roads[27461710][28384475][0]

In [None]:
mapping = {old: new
           for old, new
           in zip(list(roads.nodes), range(1, len(roads)))}

cu = nx.relabel_nodes(roads, mapping)
           

In [None]:
nodes, edges = ox.graph_to_gdfs(cu)

In [None]:
nodes.index

In [None]:
stops.plot()

In [None]:
cut_routes_df, anomalies = gtfs.cut_routes(stop_times=stop_times,
                                           route_shapes=shapes,
                                           flag_outliers=True,
                                           threshold=2.5,)

Route shapefiles cannot be properly built because gtfs data is incomplete. I'll have to make do with data from other sources

In [None]:
map_ = plot_gtfs_data(gtfs_data=[route_summary, stop_times, shapes],
                      variable='trips',
                      window='05:00 - 09:00',
                      direction=None,
                      method='NaturalBreaks',
                      k=5,
                      cmap='magma',
                      linear=False,
                      tiles='cartodbpositron',)

map_

In [None]:
import os
import pathlib
import re
import warnings

import geopandas as gpd
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import gtfstools as gtfs


%matplotlib inline
%config InlineBackend.figure_format='retina'

In [None]:
out_folder = os.environ.get('OUT_FOLDER')
out_folder = pathlib.Path(out_folder)
out_folder = out_folder / 'B'

db_folder = os.environ.get('DB_FOLDER')
db_folder = pathlib.Path(db_folder)

In [None]:
path = (db_folder
        / 'beaga'
        / 'GTFS'
        / '2021'
        / 'GTFS_BH_convencional_2021.01.11.zip')

In [None]:
path = 'GTFS_2017.zip'

In [None]:
feed = gtfs.load_feed(path)

In [None]:
summary = gtfs.summarize_trips(feed, summ_by='route_id', cutoffs=[0, 6, 9, 12, 14, 17, 19, 24])

In [None]:
f, ax = plt.subplots(figsize=(15,7.5))

data = summary.loc[summary.headway_minutes<300]

sns.boxplot(data=data, y='headway_minutes', x='window', ax=ax)

In [None]:
summary.loc[summary.stop_sequence==1, 'route_id'].value_counts()

In [None]:
stop_times

In [None]:
feed.frequencies.sort_values(['trip_id', 'start_time'])

In [None]:
3540/3600

In [None]:
feed.stop_times