# Laden der VBN GTFS-R Daten

In [None]:
import pandas as pd
import os
import duckdb
from google.transit import gtfs_realtime_pb2
import requests


In [None]:
url = "http://gtfsr.vbn.de/gtfsr_connect.bin"

In [None]:
duck = duckdb.connect(database=':memory:')

## Einlesen GTFS 

In [None]:
base_url = '/home/zvbn/python/gtfs/gtfs_top'
gtfs_stops = os.path.join(base_url, 'stops.txt')
gtfs_stop_times = os.path.join(base_url,'stop_times.txt')
gtfs_trips = os.path.join(base_url,'trips.txt')
gtfs_routes = os.path.join(base_url,'routes.txt')
gtfs_agency = os.path.join(base_url,'agency.txt')

In [None]:
duck.sql(f"""
CREATE or replace TABLE stops AS select * from read_csv('{gtfs_stops}');
create or replace table agency as select * from read_csv('{gtfs_agency}'); 

CREATE or replace TABLE stop_times AS select * from read_csv('{gtfs_stop_times}');
CREATE or replace TABLE trips AS select * from read_csv('{gtfs_trips}', delim=',',columns = {{ 
        'route_id' : 'VARCHAR',       
        'service_id' : 'VARCHAR',       
            
        'trip_id' : 'VARCHAR',       
        'trip_headsign' : 'VARCHAR',       
        'trip_short_name': 'VARCHAR',        
        'direction_id': 'VARCHAR',        
        'block_id': 'VARCHAR',        
        'shape_id': 'VARCHAR'  ,      
        'wheelchair_accessible': 'VARCHAR' ,       
        'bikes_allowed': 'VARCHAR'        
    }});
CREATE or replace TABLE routes AS select * from read_csv('{gtfs_routes}');
         """)

In [None]:
duck.sql("from stops limit 10").df()

## Einlesen Protobuf

In [None]:
feed = gtfs_realtime_pb2.FeedMessage()
response = requests.get(url)
feed.ParseFromString(response.content)
trip_updates = []
stop_time_updates = []
for entity in feed.entity:
    #print(entity)
    id = entity.id
    #print(id)

    if entity.HasField('trip_update'):
    #     print(entity.trip_update)
    #     #print(entity.trip.trip_id)
        # print('tu: ',entity.trip_update.trip.trip_id, 
        #       entity.trip_update.trip.route_id, 
        #        entity.trip_update.trip.start_time, 
        #        entity.trip_update.trip.start_date,
        #        entity.trip_update.trip.direction_id,
        #        entity.trip_update.trip.schedule_relationship,
        #        )
        trip_updates.append([entity.trip_update.trip.trip_id, entity.trip_update.trip.route_id, entity.trip_update.trip.start_time, entity.trip_update.trip.start_date, entity.trip_update.trip.schedule_relationship])
        
        if entity.trip_update.stop_time_update:
            for stu in entity.trip_update.stop_time_update:
                #print('stu: ', stu.stop_sequence, stu.stop_id, 'dep: ',stu.departure, ' arr:', stu.arrival)
                stop_time_updates.append([entity.trip_update.trip.trip_id, 
                                  stu.stop_sequence,
                                  stu.stop_id,
                                  stu.departure.delay,
                                  stu.arrival.delay])
               

    # if entity.HasField('alert'):
    #     print(entity.alert)

    # if entity.HasField('is_deleted'):
    #     print(entity.is_deleted)
    


In [None]:
trip_updates_df = pd.DataFrame(trip_updates, columns=['trip_id', 'route_id', 'start_time', 'start_date', 'schedule_relationship'])
trip_updates_df

In [None]:
stop_time_updates_df = pd.DataFrame(stop_time_updates, columns=['trip_id', 'stop_sequence', 'stop_id', 'departure', 'arrival'])
stop_time_updates_df

In [None]:
duck.sql("""create or replace table trip_updates as select * from trip_updates_df;""")
duck.sql("""create or replace table stop_times_updates as select * from stop_time_updates_df;""")

In [None]:
duck.sql(f"""select t.trip_id, a.agency_name, r.route_short_name, t.trip_short_name,t.trip_headsign,tu.start_time ,a.agency_id,  stu.stop_sequence,stu.arrival, stu.departure, s.stop_name
             from trips t
             join routes r on t.route_id = r.route_id
             join agency a on r.agency_id = a.agency_id             
             join trip_updates tu on t.trip_id = tu.trip_id
             join stop_times_updates stu on tu.trip_id = stu.trip_id
             join stops s on stu.stop_id = s.stop_id
             and r.route_short_name in ('630', '670')
             and a.agency_id in (124)
           order by t.trip_short_name, stu.stop_sequence""")