In [1]:
import logging
from pathlib import Path

from network_wrangler.models._base.records import DBModel, RecordModel
from network_wrangler.models.gtfs.tables import (
    StopsTable,
    RoutesTable,
    TripsTable,
    StopTimesTable,
    ShapesTable,
    FrequenciesTable,
)
from network_wrangler.transit.io import _read_table_from_file

log = logging.getLogger()

%load_ext autoreload
%autoreload 2

pyogrio not installed, falling back to default engine (fiona)


agency_id='1' agency_name='Transit Agency' agency_url=Url('https://example.com/') agency_timezone='America/New_York' agency_lang='en' agency_phone='123-456-7890' agency_fare_url=Url('https://example.com/fares') agency_email='info@example.com'





In [2]:
from pydantic import BaseModel


class GtfsModel(BaseModel):
    """
    Wrapper class around standard GTFS feed.

    Attributes:
        table_names: list of table names in GTFS feed.
        tables: list tables as dataframes.
        stop_times: stop_times dataframe with roadway node_ids
        stops: stops dataframe
        shapes: shapes dataframe
        trips: trips dataframe
        frequencies: frequencies dataframe
        routes: route dataframe
    """

    stops: StopsTable
    routes: RoutesTable
    trips: TripsTable
    stop_times: StopTimesTable
    shapes: ShapesTable
    frequencies: FrequenciesTable

In [3]:
stpaul_gtfs = Path(
    r"/Users/elizabeth/Documents/urbanlabs/MetCouncil/working/network_wrangler/examples/stpaul"
)
FEED_TABLE_READ = ["frequencies", "routes", "shapes", "stop_times", "stops", "trips"]

feed_path = stpaul_gtfs

In [4]:
for table in FEED_TABLE_READ:
    if not any(feed_path.glob(f"*{table}.txt")):
        raise FileNotFoundError(
            f"Required GTFS Feed table {table} not found in {feed_path}"
        )
feed_files = {table: next(feed_path.glob(f"*{table}.txt")) for table in FEED_TABLE_READ}

In [5]:
feed_dfs = {
    table: _read_table_from_file(table, file) for table, file in feed_files.items()
}

In [6]:
freq_df = feed_dfs["frequencies"]
freq_df

Unnamed: 0,trip_id,headway_secs,start_time,end_time
0,14940701-JUN19-MVS-BUS-Weekday-01,3600,06:00:00,09:00:00
1,14941148-JUN19-MVS-BUS-Weekday-01,830,06:00:00,09:00:00
2,14941151-JUN19-MVS-BUS-Weekday-01,540,06:00:00,09:00:00
3,14941153-JUN19-MVS-BUS-Weekday-01,696,09:00:00,15:00:00
4,14941163-JUN19-MVS-BUS-Weekday-01,830,09:00:00,15:00:00
...,...,...,...,...
57,14969944-JUN19-RAIL-Weekday-01,600,06:00:00,09:00:00
58,14969962-JUN19-RAIL-Weekday-01,600,09:00:00,15:00:00
59,121-RL-484.0N-0632-20190216-Weekday-04,1800,06:00:00,09:00:00
60,031-CT-480.1N-0601-20190216-Weekday-04,981,06:00:00,09:00:00


In [7]:
import pandera as pa
from pandera.typing import Series
from network_wrangler.models._base.time import TimeString
from pandas import Timestamp


class FrequenciesTable2(pa.DataFrameModel):
    """
    Represents the Agency table in the GTFS dataset.

    Configurations:
    - dtype: PydanticModel(FrequencyRecord)
    - uniqueness: "trip_id","start_time"]
    """

    trip_id: Series[str] = pa.Field(nullable=False, coerce=True)
    start_time: Series[Timestamp] = pa.Field(nullable=False, coerce=True)
    end_time: Series[Timestamp] = pa.Field(nullable=False, coerce=True)
    headway_secs: Series[int] = pa.Field(
        coerce=True,
        ge=1,
        nullable=False,
    )

    class Config:
        coerce = True
        _pk = ["trip_id", "start_time"]
        _fk = {"trip_id": ["routes", "trip_id"]}
        uniqueness = {"cols": _pk}

In [8]:
freq_df2 = FrequenciesTable2(freq_df)

  col = to_datetime_fn(col, **self.to_datetime_kwargs)
  col = to_datetime_fn(col, **self.to_datetime_kwargs)


In [9]:
freq_df3 = freq_df2.copy()
freq_df3.loc[[0, 1], "trip_id"] = "foo"
freq_df3.loc[[0, 1], "start_time"] = "6:00:00"
freq_df4 = FrequenciesTable2(freq_df3)

Non-Unique values found in columns: ['trip_id', 'start_time']:
  trip_id          start_time
1     foo 2024-03-28 06:00:00


SchemaError: DataFrameSchema 'FrequenciesTable2' failed series or dataframe validator 0: <Check uniqueness>