# Laden der VBN GTFS-R Daten

In [1]:
import pandas as pd
import geopandas as gpd
import os
import duckdb
from google.transit import gtfs_realtime_pb2
import requests
import datetime as dt
import locale
from dotenv import load_dotenv


In [2]:
load_dotenv()

True

In [3]:
locale.setlocale(locale.LC_ALL, 'en_GB.UTF-8') # damit Wochentag in englisch

'en_GB.UTF-8'

In [4]:
url = "http://gtfsr.vbn.de/gtfsr_connect.bin"

## Starten der DuckDB

In [5]:
duck = duckdb.connect(database=':memory:')

In [6]:
# Weitere Version
sql = f"""
INSTALL postgres;
LOAD postgres;
ATTACH 'dbname=zvbn_postgis user={os.environ.get('POSTGRES_USER')} password={os.environ.get('POSTGRES_PW')} host=127.0.0.1 port=5432' AS dm (TYPE POSTGRES, READ_ONLY);
SHOW ALL TABLES;
"""
duck.sql(sql) #Herstellen der Verbindung


┌──────────┬──────────┬────────────────────┬───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┬───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┬───────────┐
│ database │  schema  │        name        │                                                                                                                                   column_names                                                                                                                                    │                                                                                                           

In [7]:
df_linien = duck.sql("select nummer from dm.basis.linien where aktiv = True and ebene in ('1', '1+') order by nummer").df() # Abfrage
df_linien

Unnamed: 0,nummer
0,101
1,102
2,120
3,121
4,123
5,133
6,137
7,138
8,150
9,170


In [8]:
duck.sql("""install spatial;""")
duck.sql("""load spatial;""")

In [9]:
duck.sql("""create or replace table vbn as select * from st_read('/home/zvbn/python/gtfs/grenzen/vbn_01082018.shp') as vbn""")

## Einlesen GTFS 

In [10]:
base_url = '/home/zvbn/python/gtfs/gtfs_top'
gtfs_stops = os.path.join(base_url, 'stops.txt')
gtfs_stop_times = os.path.join(base_url,'stop_times.txt')
gtfs_trips = os.path.join(base_url,'trips.txt')
gtfs_routes = os.path.join(base_url,'routes.txt')
gtfs_agency = os.path.join(base_url,'agency.txt')
gtfs_calendar = os.path.join(base_url,'calendar.txt')
gtfs_calendar_dates = os.path.join(base_url,'calendar_dates.txt')

In [11]:
duck.sql(f"""
CREATE or replace TABLE stops AS select * from read_csv('{gtfs_stops}');
create or replace table agency as select * from read_csv('{gtfs_agency}'); 

CREATE or replace TABLE stop_times AS select * from read_csv('{gtfs_stop_times}');
CREATE or replace TABLE trips AS select * from read_csv('{gtfs_trips}', delim=',',columns = {{ 
        'route_id' : 'VARCHAR',       
        'service_id' : 'VARCHAR',       
            
        'trip_id' : 'VARCHAR',       
        'trip_headsign' : 'VARCHAR',       
        'trip_short_name': 'VARCHAR',        
        'direction_id': 'VARCHAR',        
        'block_id': 'VARCHAR',        
        'shape_id': 'VARCHAR'  ,      
        'wheelchair_accessible': 'VARCHAR' ,       
        'bikes_allowed': 'VARCHAR'        
    }});
CREATE or replace TABLE routes AS select * from read_csv('{gtfs_routes}');
CREATE or replace TABLE calendar AS select * from read_csv('{gtfs_calendar}');
CREATE or replace TABLE calendar_dates AS select * from read_csv('{gtfs_calendar_dates}');
         """)

In [12]:
weekday_today_name = dt.datetime.now().strftime('%A').lower()
weekday_today_name

'tuesday'

In [13]:
dt.datetime.now().strftime('%Y%m%d')

'20250708'

In [14]:
duck.sql(f"""create or replace table verkehrt_heute as select * from
         (select distinct t.service_id, cd.verkehrt_cd, c.verkehrt_c
         from trips t
         left join (select service_id, date, exception_type as verkehrt_cd from calendar_dates 
         where date =  {dt.datetime.now().strftime('%Y%m%d')} and exception_type = 1) as cd 
         on t.service_id = cd.service_id
         left join  (select service_id, {weekday_today_name} as verkehrt_c
         from calendar
         where {weekday_today_name} = 1) as c
         on t.service_id = c.service_id
         where c.verkehrt_c = 1 or cd.verkehrt_cd = 1)""")

In [15]:
duck.sql("""from verkehrt_heute""") 

┌────────────┬─────────────┬────────────┐
│ service_id │ verkehrt_cd │ verkehrt_c │
│  varchar   │    int64    │   int64    │
├────────────┼─────────────┼────────────┤
│ 48         │           1 │       NULL │
│ 2966       │           1 │       NULL │
│ 2992       │        NULL │          1 │
│ 3078       │           1 │       NULL │
│ 3116       │           1 │       NULL │
│ 3140       │           1 │       NULL │
│ 3021       │        NULL │          1 │
│ 3084       │        NULL │          1 │
│ 3125       │        NULL │          1 │
│ 3130       │        NULL │          1 │
│  ·         │           · │          · │
│  ·         │           · │          · │
│  ·         │           · │          · │
│ 2216       │           1 │       NULL │
│ 2265       │           1 │       NULL │
│ 2266       │           1 │       NULL │
│ 452        │           1 │       NULL │
│ 2289       │           1 │       NULL │
│ 2332       │           1 │       NULL │
│ 2357       │           1 │      

In [16]:
duck.sql(""" create or replace table lin_vbn as
         (select r.agency_id, r.route_id from
         (select s.stop_id
         from stops s, vbn
         
         where st_contains(vbn.geom, st_point(s.stop_lon, s.stop_lat)) 
         group by all) as s 
         join stop_times st on s.stop_id = st.stop_id
         join trips t on st.trip_id = t.trip_id
         join routes r on t.route_id = r.route_id
         group by all)""")

In [17]:
duck.sql("""select a.agency_name, a.agency_id, r.route_short_name,
         from routes r
         join agency a on r.agency_id = a.agency_id
         where lower(r.route_short_name) like lower('%680%') """).df()

Unnamed: 0,agency_name,agency_id,route_short_name
0,Weser-Ems-Bus Betrieb Osnabrück,2402,680
1,Weser-Ems-Bus Betrieb Bremen,2401,680
2,Verkehrsgemeinschaft Osnabrück,615,680


In [18]:
duck.sql("from agency where lower(agency_name) like lower('%Weser%') ").df()

Unnamed: 0,agency_id,agency_name,agency_url,agency_timezone,agency_lang,agency_phone
0,124,Eisenbahnen und Verkehrsbetriebe Elbe-Weser GmbH,http://www.evb-elbe-weser.de/,Europe/Berlin,de,+49 4281 9440
1,219,Verkehrsbetriebe Wesermarsch GmbH,http://www.vbw-wesermarsch.jimdo.com/,Europe/Berlin,de,+49 4731 8640
2,266,Weserfähre GmbH,http://www.weserfaehre.de/,Europe/Berlin,de,+49 471 3003600
3,1655,Eisenbahnen und Verkehrsbetriebe Elbe-Weser Gm...,http://www.evb-elbe-weser.de/startseite/,Europe/Berlin,de,+49 4761 9931-0
4,2365,Weserfähre Bremen,https://www.faehren-bremen.de/,Europe/Berlin,de,+49421 - 69 03 69
5,2400,Weser-Ems-Bus Betrieb Ostfriesland,http://www.weser-ems-bus.de/,Europe/Berlin,de,+4949192536-0
6,2401,Weser-Ems-Bus Betrieb Bremen,http://www.weser-ems-bus.de/,Europe/Berlin,de,+49 421 89777603
7,2402,Weser-Ems-Bus Betrieb Osnabrück,http://www.weser-ems-bus.de/,Europe/Berlin,de,+49 54197061650
8,2403,Weser-Ems-Bus Auftragnehmerleistungen,http://www.weser-ems-bus.de/,Europe/Berlin,de,+49 421 89777603
9,3030,MEW Mobilitätszentrale Elbe-Weser,https://mew-mobility.de,Europe/Berlin,,+49 (0)4778 2179980


## Einlesen Protobuf

In [19]:
feed = gtfs_realtime_pb2.FeedMessage()
response = requests.get(url)
feed.ParseFromString(response.content)
trip_updates = []
stop_time_updates = []
is_deleted = []
for entity in feed.entity:
    #print(entity)
    id = entity.id
    #print(id)

    if entity.HasField('trip_update'):
    #     print(entity.trip_update)
    #     #print(entity.trip.trip_id)
        # print('tu: ',entity.trip_update.trip.trip_id, 
        #       entity.trip_update.trip.route_id, 
        #        entity.trip_update.trip.start_time, 
        #        entity.trip_update.trip.start_date,
        #        entity.trip_update.trip.direction_id,
        #        entity.trip_update.trip.schedule_relationship,
        #        )
        trip_updates.append([entity.trip_update.trip.trip_id, entity.trip_update.trip.route_id, 
                             entity.trip_update.trip.start_time, entity.trip_update.trip.start_date, 
                             entity.trip_update.trip.schedule_relationship])
        
        if entity.trip_update.stop_time_update:
            for stu in entity.trip_update.stop_time_update:
                #print('stu: ', stu.stop_sequence, stu.stop_id, 'dep: ',stu.departure, ' arr:', stu.arrival)
                stop_time_updates.append([entity.trip_update.trip.trip_id, 
                                  stu.stop_sequence,
                                  stu.stop_id,
                                  stu.departure.delay,
                                  stu.arrival.delay])
               

    if entity.HasField('alert'):
        print(entity.alert)

    if entity.HasField('is_deleted'):
        print(entity.is_deleted)
    


In [20]:
trip_updates_df = pd.DataFrame(trip_updates, columns=['trip_id', 'route_id', 'start_time', 'start_date', 'schedule_relationship'])
trip_updates_df

Unnamed: 0,trip_id,route_id,start_time,start_date,schedule_relationship
0,425058392,76094_3,16:46:00,20250708,0
1,429160284,14246_3,16:16:00,20250708,0
2,400157395,65695_3,18:39:00,20250708,0
3,429158533,14224_3,17:48:00,20250708,0
4,429095647,35755_0,18:41:00,20250708,0
...,...,...,...,...,...
6945,##VDV##COMPOSED;JI~DOO;7163641#Bus#!ADD!#DDS-S...,63220_2,19:12:00,20250708,1
6946,##VDV##COMPOSED;JI~DOO;2949-80-8000152-143600#...,44911_2,14:36:00,20250708,1
6947,##VDV##COMPOSED;JI~DOO;3944006-00006-2#!ADD!#B...,35727_3,17:34:00,20250708,1
6948,##VDV##COMPOSED;JI~DOO;3945006-00006-2#!ADD!#B...,35715_3,18:06:00,20250708,1


In [21]:
stop_time_updates_df = pd.DataFrame(stop_time_updates, columns=['trip_id', 'stop_sequence', 'stop_id', 'departure', 'arrival'])
stop_time_updates_df

Unnamed: 0,trip_id,stop_sequence,stop_id,departure,arrival
0,425058392,0,000000708566,0,0
1,425058392,2,000000704978,0,-77
2,429160284,0,000000905001,197,0
3,429160284,1,000000905011,223,223
4,429160284,2,000000905063,279,272
...,...,...,...,...,...
39170,##VDV##COMPOSED;JI~DOO;2927-80-8011102-091200#...,12,8000096,1020,1020
39171,##VDV##COMPOSED;JI~DOO;2927-80-8011102-091200#...,13,8000170,1200,1140
39172,##VDV##COMPOSED;JI~DOO;2927-80-8011102-091200#...,14,8000013,1500,1440
39173,##VDV##COMPOSED;JI~DOO;2927-80-8011102-091200#...,15,8004158,1380,1380


In [22]:
duck.sql("""create or replace table trip_updates as select * from trip_updates_df;""")
duck.sql("""create or replace table stop_times_updates as select * from stop_time_updates_df;""")

In [23]:
duck.sql("""create or replace table trip_updates_vbn as
         (select t.trip_id, a.agency_name, r.route_short_name, t.trip_short_name,t.trip_headsign,tu.start_time ,a.agency_id,  
         stu.stop_sequence,stu.arrival, stu.departure, s.stop_name
             from trips t
             join routes r on t.route_id = r.route_id
             join agency a on r.agency_id = a.agency_id             
             join trip_updates tu on t.trip_id = tu.trip_id
             join stop_times_updates stu on tu.trip_id = stu.trip_id
             join stops s on stu.stop_id = s.stop_id
             join lin_vbn lv on r.agency_id = lv.agency_id and r.route_id = lv.route_id
           order by r.route_short_name, t.trip_short_name, stu.stop_sequence)""")

In [24]:
duck.sql("""select * from trip_updates_vbn""")

┌───────────┬───────────────────────────────┬──────────────────┬─────────────────┬───────────────────────────────┬────────────┬───────────┬───────────────┬─────────┬───────────┬───────────────────────────────────────────┐
│  trip_id  │          agency_name          │ route_short_name │ trip_short_name │         trip_headsign         │ start_time │ agency_id │ stop_sequence │ arrival │ departure │                 stop_name                 │
│  varchar  │            varchar            │     varchar      │     varchar     │            varchar            │  varchar   │   int64   │     int64     │  int64  │   int64   │                  varchar                  │
├───────────┼───────────────────────────────┼──────────────────┼─────────────────┼───────────────────────────────┼────────────┼───────────┼───────────────┼─────────┼───────────┼───────────────────────────────────────────┤
│ 425725545 │ Bremer Straßenbahn AG         │ 1                │ 00257           │ Bf Mahndorf                  

In [25]:
duck.sql("""select agency_name, route_short_name 
         from trip_updates_vbn 
         group by all
         order by agency_name, route_short_name""").df()

Unnamed: 0,agency_name,route_short_name
0,AM Bus,330
1,AM Bus,340
2,AM Bus,370
3,AllerBus,510
4,AllerBus,701
...,...,...
207,Weser-Ems-Bus Betrieb Bremen,880
208,Weser-Ems-Bus Betrieb Bremen,888
209,Weser-Ems-Bus Betrieb Ostfriesland,623
210,Weser-Ems-Bus Betrieb Ostfriesland,625


## Verknüpfung Fahrten heute mit Verlauf

In [48]:
duck.sql("""select departure_time::time, current_time, datediff('HOUR', departure_time::time, current_time::time), 
         current_time::time from stop_times
        """)

┌──────────────────────────────┬─────────────────────┬────────────────────────────────────────────────────────────────────────────┬────────────────────────────┐
│ CAST(departure_time AS TIME) │    current_time     │ datediff('HOUR', CAST(departure_time AS TIME), CAST(current_time AS TIME)) │ CAST(current_time AS TIME) │
│             time             │ time with time zone │                                   int64                                    │            time            │
├──────────────────────────────┼─────────────────────┼────────────────────────────────────────────────────────────────────────────┼────────────────────────────┤
│ 05:51:00                     │ 17:09:31.428+02     │                                                                         12 │ 17:09:31.428               │
│ 05:54:00                     │ 17:09:31.428+02     │                                                                         12 │ 17:09:31.428               │
│ 06:00:00                     │ 1

In [50]:
duck.sql("""select r.route_short_name, t.trip_short_name, t.trip_headsign, st.stop_sequence, 
         s.stop_name, st.departure_time, tu.departure, tu.arrival, max(departure) over (partition by t.trip_short_name) as max_departure,
         from stop_times st
         join trips t on st.trip_id = t.trip_id
         join verkehrt_heute vh on t.service_id = vh.service_id
         join stops s on st.stop_id = s.stop_id
         join routes r on t.route_id = r.route_id
         join lin_vbn lv on r.agency_id = lv.agency_id and r.route_id = lv.route_id
         join trip_updates_vbn tu on t.trip_id = tu.trip_id and st.stop_sequence = tu.stop_sequence

         where r.route_short_name in (select nummer from dm.basis.linien where aktiv = True and ebene in ('1', '1+') order by nummer) 
         -- and abs(datediff('HOUR', departure_time::time, current_time::time)) < 2

         qualify max_departure = departure

         order by r.route_short_name
                 
         
         """).df()

Unnamed: 0,route_short_name,trip_short_name,trip_headsign,stop_sequence,stop_name,departure_time,departure,arrival,max_departure
0,101,1101037,"Bassum, Bahnhof",9,Bremen Huckelriede,16:43:00,702,702,702
1,101,1101020,"Bremen, Hauptbahnhof",0,Fahrenhorst(Stuhr) Kreuzung,17:35:00,0,0,0
2,101,1101015,"Bassum, Bahnhof",0,Bremen Hauptbahnhof,17:25:00,0,0,0
3,101,1101024,"Bremen, Hauptbahnhof",0,Bassum Bahnhof,19:15:00,0,0,0
4,101,1101019,"Bassum, Bahnhof",0,Bremen Hauptbahnhof,18:25:00,0,0,0
...,...,...,...,...,...,...,...,...,...
178,S35,6352514,Oldenburg(Oldb) ZOB,0,Westerstede Hössenbad,17:05:00,0,0,0
179,S35,6352512,Oldenburg(Oldb) ZOB,10,Oldenburg(Oldb) Kath. Friedhof/BBS Haarentor,16:39:00,531,531,531
180,S60,1600022,Schwanewede Ostlandstraße (Heideschule),0,Bremen Hauptbahnhof,16:40:00,711,0,711
181,S60,1600023,Bremen Hauptbahnhof,3,Eggestedt(Schwanewede) Betonstraße,16:42:00,393,393,393
