In [None]:
import polars as pl
import gtfs_delay_analysis as da

In [None]:
ddelay = pl.read_parquet('all_ddelay.parquet').with_columns(
    pl.col('trip_id').cast(pl.Int64))
trips = da.trips.load_trips_without_shapes_df()
shapes = da.trips.load_parsed_shapes_df()

In [None]:
with_shape_id = ddelay.join(trips.select('trip_id', 'shape_id'), on='trip_id')

In [None]:
shapes_with_trip_id = shapes.join(trips.select(
    'trip_id', 'shape_id').unique(), on='shape_id').drop('line_length')

In [None]:
shapes_with_trip_id.select(
    'shape_id',
    'trip_id',
    pl.col('geometry_line').list.len()
)['geometry_line'].sum()

In [None]:
selected_shape_id ="004-171-East"
selected_shape = pl.col('shape_id').eq(selected_shape_id)
pl.Config.set_tbl_rows(62)
exploded = (
    shapes_with_trip_id
    .filter(selected_shape)
    .explode('geometry_line')
    .unique(['trip_id', 'geometry_line'], keep='first', maintain_order=True)
)

In [None]:
dff = shapes_with_trip_id.filter(pl.col('shape_id').str.starts_with('004')).head(1).explode(
    'geometry_line'
)

In [None]:
# The stops don't match with the shape, so we have to find the closest point
df  =exploded.join(
    with_shape_id.filter(selected_shape),
    left_on=[pl.col('geometry_line').struct.field('lon'), pl.col(
        'geometry_line').struct.field('lat'), 'trip_id', 'shape_id'],
    right_on=['a_lon', 'a_lat', 'trip_id', 'shape_id'],
    how='left'
)

In [None]:
write_path = "~/Downloads/GTFS"
df.filter(pl.col('trip_id').eq(25536739)).select(
    pl.col('geometry_line').struct.field('lon'),
    pl.col('geometry_line').struct.field('lat'),
).write_csv(f'{write_path}/{selected_shape_id}.csv')

In [None]:
dff.select(
    pl.col('geometry_line').struct.field('lon'),
    pl.col('geometry_line').struct.field('lat'),
).write_csv('508-33-west.csv')

In [None]:
with_shape_id

In [None]:
with_shape_id.filter(pl.col('trip_id').eq(25536739)).select(
    'a','a_lon','a_lat'
).write_csv(f'{write_path}/004-capilano-stops.csv')

In [None]:
import math
from geopy.distance import geodesic
# Just get one shape
trip_id = 25536564
selected_trip = pl.col('trip_id').eq(trip_id)


def haversine(lon1: str, lat1: str, lon2: str,  lat2: str):
    r = 6371 * 1000 # Radius of the earth in m
    p = math.pi / 180
    a = (
        pl.lit(0.5)
        .sub( pl.col(lat2).sub(pl.col(lat1)).mul(p).cos().truediv(2))
        .add(
            pl.col(lat1).mul(p).cos().mul(pl.col(lat2).mul(p).cos())
            .mul(
                pl.lit(1.0)
                .sub(
                    pl.col(lon2).sub(pl.col(lon1)).mul(p).cos()
                )
                .truediv(2)
            )
        )
    )
    print(a)
    return pl.lit(2*r).mul(a.sqrt().arcsin())

(

exploded
.with_columns(
    pl.col('geometry_line').struct.field('lon'),
    pl.col('geometry_line').struct.field('lat'),
)
.drop('geometry_line')
.filter(selected_trip)
.join(
with_shape_id.filter(selected_trip).select('a', 'a_lon', 'a_lat', 'trip_id'),
on='trip_id',
)
.with_columns(
    pl.struct(
        lon1='lon',
        lat1='lat',
        lon2='a_lon',
        lat2='a_lat',
    ).map_elements(lambda x: geodesic((x['lat1'], x['lon1']), (x['lat2'], x['lon2'])).m).alias('distance')
)
.filter(pl.col('distance').eq(pl.col('distance').min().over('a')))
.sort('distance', descending=True)
# .filter(pl.col('lon').is_duplicated())
)

In [None]:
exploded

In [None]:
import geopandas as gpd

gdf = gpd.read_file('data/geo_export_fcffcbc3-f913-4eea-b630-e36759d5fb03.shp')

In [None]:
gdf: gpd.GeoDataFrame

In [None]:
gdf[gdf['route_id'] == "508"]['trip_heads'].unique()

In [None]:
bruh = gdf[['trip_id', 'shape_id']]

In [None]:
from shapely import Point


# pl.from_pandas(bruh)



df = gdf[gdf['trip_id'] == "25536564"].copy()
df['points'] = df["geometry"].apply(lambda x: [Point(xy) for xy in x.coords])

df.explode('points', index_parts=True)

In [None]:
gdf[['shape_id', 'geometry']].drop_duplicates()

In [None]:
from geopy.distance import geodesic
geodesic().m