# Auswertung der Parquet Dateien aus dem Echtzeitarchiv V14

## Import der Module und Setzen Parameter

In [1]:
import duckdb
import pandas as pd
import seaborn as sns
import sys
from dotenv import load_dotenv
import os
import openpyxl

import datetime as dt
import importlib

from dotenv import load_dotenv, dotenv_values
from redmine import delete_upload_dmsf
import logging
import glob

from openpyxl.styles import NamedStyle

In [None]:
log_file = "log/log_rt.txt"
logging.basicConfig(filename=log_file, 
                        level=logging.DEBUG,
                        style="{",
                        format="{asctime} [{levelname:8}] {message}",
                        datefmt="%d.%m.%Y %H:%M:%S")

load_dotenv()

True

In [3]:
sys.path.append('/home/zvbn/python/rt2')

In [4]:
from class_rt_duck import RtDuck

In [5]:
logging.info("Auswertung RT aus parquet gestartet")

In [6]:
config = dotenv_values(".env")
#config

In [7]:
pd.options.display.max_columns = 100

In [8]:
jetzt = dt.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
gestern= (dt.date.today() - dt.timedelta(1)).strftime('%Y-%m-%d')
letzte07tage= (dt.date.today() - dt.timedelta(7)).strftime('%Y-%m-%d')
letzte14tage= (dt.date.today() - dt.timedelta(14)).strftime('%Y-%m-%d')
letzte21tage= (dt.date.today() - dt.timedelta(21)).strftime('%Y-%m-%d')

print(jetzt, letzte21tage)

2025-01-14 12:59:15 2024-12-24


## Funktionen

In [9]:
def replace_german_special_characters(text) -> str:
    replacements = {
        'ä': 'ae',
        'ö': 'oe',
        'ü': 'ue',
        'Ä': 'Ae',
        'Ö': 'Oe',
        'Ü': 'Ue',
        'ß': 'ss'
    }
    
    for german_char, replacement in replacements.items():
        text = text.replace(german_char, replacement)
    
    return text

In [10]:
#für die Formatierung der Ausgabe in html
func_proz = lambda s: str(int((1-s) * 1000)/10) + '%' if str(int(s)) != '-1' else '-'
func_date = lambda s: s.dt.strftime('%m/%d/%Y')

## CSS Styles

In [11]:
#Zellformatierung CSS
cell_hover = {  # for row hover use <tr> instead of <td>
    'selector': 'td:hover',
    'props': [('background-color', '#ffffb3')]
}
index_names = {
    'selector': '.index_name',
    'props': 'font-style: italic; color: darkgrey; font-weight:normal; font-family: sans-serif;'
}
headers = {
    'selector': 'th:not(.index_name)',
    'props': 'background-color: #FFFFFF; color: #000000; font-family: sans-serif;'
}

td = {'selector' : 'td', 'props': 'text-align:right; font-family: sans-serif'}

## Testen der class

In [12]:
rt = RtDuck()
rt

<class_rt_duck.RtDuck at 0x7f6fcb60d310>

In [13]:
#Schließen der Verbindung
#rt.verbindung_schliessen()

In [14]:
rt.create_table_fahrten(server = 'prod')

Table 'fahrten' created.


In [15]:
rt.create_table_zusatz(server = 'prod')
rt.create_table_verlauf(server = 'prod')
rt.create_table_matrix(server = 'prod')

Table 'zusatz' created.
Table 'verlauf' created.
Table 'matrix' created.


In [17]:
rt.cursor.sql("select lineid, lineid_short, * from fahrten where datum::date = '2025-01-12' and lineid like 'de:VBN:740%'")

┌──────────────┬──────────────┬───┬─────────────────────┬──────────────────────┬──────────────────────┐
│    lineid    │ lineid_short │ … │ fahrtendstationdhid │ realtimeHasEverBee…  │       filename       │
│   varchar    │   varchar    │   │       varchar       │       boolean        │       varchar        │
├──────────────┼──────────────┼───┼─────────────────────┼──────────────────────┼──────────────────────┤
│ de:VBN:740:1 │ de:VBN:740   │ … │ de:04011:13925::H   │ true                 │ out/parquet/prod/f…  │
│ de:VBN:740:1 │ de:VBN:740   │ … │ de:03361:93709::5   │ true                 │ out/parquet/prod/f…  │
│ de:VBN:740:1 │ de:VBN:740   │ … │ de:04011:13925::H   │ true                 │ out/parquet/prod/f…  │
│ de:VBN:740:1 │ de:VBN:740   │ … │ de:03361:93709::5   │ true                 │ out/parquet/prod/f…  │
├──────────────┴──────────────┴───┴─────────────────────┴──────────────────────┴──────────────────────┤
│ 4 rows                                                        

In [18]:
df_line_clientid = rt.cursor.sql("""select distinct lineid_short, clientid, max(datum) as max_datum
                                 from fahrten 
                                 where datum >= (current_date - interval 7 day) 
                                 group by all
                                 order by  clientid, lineid_short""").df()

In [19]:
rt.cursor.sql( """select f.lineid_short, datum::date as datum, buendel, 
                round((count (*) filter (realtimeHasEverBeenReported = true) / count(*) ) * 100, 1) as anteil_ez  
              from fahrten f 
                join linien l on f.lineid_short = l.dlid 
              
              where datum::date = '2025-01-12'
              group by all""")

┌──────────────┬────────────┬───────────┬───────────┐
│ lineid_short │   datum    │  buendel  │ anteil_ez │
│   varchar    │    date    │  varchar  │  double   │
├──────────────┼────────────┼───────────┼───────────┤
│ de:VBN:330   │ 2025-01-12 │ AM Ost    │      92.1 │
│ de:VBN:313   │ 2025-01-12 │ OL Stadt  │     100.0 │
│ de:VBN:4     │ 2025-01-12 │ HB Tram   │     100.0 │
│ de:VBN:504   │ 2025-01-12 │ BHV       │       0.0 │
│ de:VBN:680   │ 2025-01-12 │ OHZ Mitte │     100.0 │
│ de:VBN:502   │ 2025-01-12 │ BHV       │      88.9 │
│ de:VBN:N1    │ 2025-01-12 │ HB Tram   │     100.0 │
│ de:VBN:N6    │ 2025-01-12 │ HB Bus    │     100.0 │
│ de:VBN:98    │ 2025-01-12 │ HB Bus    │      96.2 │
│ de:VBN:660   │ 2025-01-12 │ OHZ Mitte │     100.0 │
│     ·        │     ·      │    ·      │       ·   │
│     ·        │     ·      │    ·      │       ·   │
│     ·        │     ·      │    ·      │       ·   │
│ de:VBN:309   │ 2025-01-12 │ OL Stadt  │     100.0 │
│ de:VBN:517   │ 2025-01-12 

## Erstellen der Statistiken log_12_pivot und log_3_pivot

In [20]:
def sql_ebenen(ebenen):
            sql = f""" 
              select datum::date::text as datum, buendel, 
                round((count (*) filter (realtimeHasEverBeenReported = true) / count(*) ) * 100, 1) as anteil_ez  
              from fahrten f 
                join linien l on f.lineid_short = l.dlid 
              where 
                f.datum > (current_date - interval 60 day)
              and ebene in {ebenen}
              group by all"""
            
            return sql


### HTML Stil

In [21]:
#Zellformatierung CSS
cell_hover = {  # for row hover use <tr> instead of <td>
    'selector': 'td:hover',
    'props': [('background-color', '#ffffb3')]
}
index_names = {
    'selector': '.index_name',
    'props': 'font-style: italic; color: darkgrey; font-weight:normal; font-family: sans-serif;'
}
headers = {
    'selector': 'th:not(.index_name)',
    'props': 'background-color: #FFFFFF; color: #000000; font-family: sans-serif;'
}

td = {'selector' : 'td', 'props': 'text-align:right; font-family: sans-serif'}

In [22]:
ebenen = ('1+', '1', '2', 'Stadt', 'Nacht')
df = rt.cursor.sql(sql_ebenen(ebenen=ebenen)).df()
df_pivot = df.pivot(index='datum', columns='buendel', values='anteil_ez')
df_pivot.sort_values('datum', ascending=False).style.background_gradient(cmap="RdYlGn", axis = None,  vmin=0.0, vmax=95)\
    .highlight_null(color='white').format(formatter = '{:.1f}%', precision=1, na_rep='-', thousands=" ", decimal= ',').set_table_styles([index_names, headers, td])\
        .to_html('/var/www/rt_archiv/log_12_pivot.html', encoding='LATIN1')

In [23]:
ebenen = "('3')"
df = rt.cursor.sql(sql_ebenen(ebenen=ebenen)).df()
df_pivot = df.pivot(index='datum', columns='buendel', values='anteil_ez')
df_pivot.sort_values('datum', ascending=False).style.background_gradient(cmap="RdYlGn", axis = None,  vmin=0.0, vmax=95)\
    .highlight_null(color='white').format(formatter = '{:.1f}%', precision=1, na_rep='-', thousands=" ", decimal= ',').set_table_styles([index_names, headers, td])\
        .to_html('/var/www/rt_archiv/log_3_pivot.html', encoding='LATIN1')

In [24]:
rt.anzahl_fahrten_betreiber()

Unnamed: 0,vu,count_ges,count_rt,heute_minus_1_ges,heute_minus_1_rt,anteil_heute_minus_1,heute_minus_2_ges,heute_minus_2_rt,anteil_heute_minus_2,heute_minus_3_ges,heute_minus_3_rt,anteil_heute_minus_3
0,Bremer Straßenbahn AG,676263,305147,5831,5725,98.2%,2835,2762,97.4%,4382,4266,97.3%
1,Verkehr und Wasser GmbH (VWG),244150,101948,2107,2077,98.6%,916,916,100.0%,1629,1628,99.9%
2,BREMERHAVEN BUS,160621,44632,1338,866,64.7%,764,442,57.9%,1007,582,57.8%
3,KVG Stade GmbH & Co. KG,137387,5171,1072,136,12.7%,861,10,1.2%,968,30,3.1%
4,Verkehrsbetriebe Oldenburger Land,129038,49325,1451,1233,85.0%,167,160,95.8%,376,353,93.9%
5,Eisenbahnen und Verkehrsbetriebe Elbe-Weser GmbH,70086,11439,780,290,37.2%,73,49,67.1%,123,58,47.1%
6,NordWestBahn,57206,25519,442,440,99.5%,353,350,99.2%,417,410,98.3%
7,Delbus GmbH & Co. KG,54783,23050,453,405,89.4%,222,222,100.0%,379,360,95.0%
8,Reisedienst von Rahden GmbH & Co. KG,47652,19435,568,523,92.1%,24,24,100.0%,144,144,100.0%
9,Weser-Ems-Bus Betrieb Bremen,45863,12134,547,329,60.1%,38,32,84.2%,96,61,63.5%


In [25]:
rt.anzahl_fahrten_betreiber().to_html('/var/www/rt_archiv/anzahl_fahrten_betreiber.html', encoding='LATIN1')

## Ermitteln der Fahrten, die nur 0 Min senden bzw. im Verlauf nur 0 gespeichert wurde

In [26]:
rt.cursor.sql(""" select * from
                (select ex_lineid, fnr,min(operday) as start, max(operday) as ende ,count(*) as count from (
                    select * from 
                        (select operday, ex_lineid, fnr, avg(dep_del) as avg_del
                        from verlauf 
                        where dep_del is not null
                        and has_rt = true
                        group by all)
                    where avg_del = 0 and ex_lineid like 'de:VBN:6__:%' and operday > (current_date - interval 28 day)
                    order by ex_lineid)               
                
                group by all
                order by count desc)
              where count > 3
              """).df()

Unnamed: 0,ex_lineid,fnr,start,ende,count
0,de:VBN:63S:2,1006307,2024-12-23,2025-01-13,11
1,de:VBN:63S:1,8263,2024-12-18,2025-01-10,11
2,de:VBN:63S:2,1006305,2024-12-18,2025-01-10,11
3,de:VBN:640:,1640023,2024-12-18,2025-01-13,10
4,de:VBN:63S:1,8264,2024-12-18,2025-01-10,9
...,...,...,...,...,...
117,de:VBN:617:,1617002,2024-12-18,2025-01-09,4
118,de:VBN:63S:2,1006314,2024-12-20,2025-01-03,4
119,de:VBN:640:,1640014,2024-12-18,2025-01-07,4
120,de:VBN:640:,1640020,2025-01-02,2025-01-09,4


In [27]:
rt.cursor.sql("select * from verlauf where fnr = '1630018'").df().to_excel('out/verlauf_1630018.xlsx', index=False)

In [28]:
suffix = 'mitte'
#auswahl_linien = '680|660|N68'
auswahl_linien = '630|670|N63|N67'

df_auswahl_ohne_rt = rt.cursor.sql(f"""select * from
              (select lineshort, min(datum)::date as min_datum, max(datum)::date as max_datum, fnr, 
              count(* ) as anzahl, 
              count(* ) filter (hasRealtime  = false ) as anzahl_ohne_rt, 
              anzahl_ohne_rt / count(* ) as proz_ohne_rt
              from fahrten  
              where lineid  SIMILAR TO '.*({auswahl_linien}).*' 
              -- and hasRealtime  = false 
              and datum  >= (current_date() - interval 28 days)
              group by all
              )
              where anzahl_ohne_rt > 1
              order by proz_ohne_rt desc

              -- limit 10""").df()

df_zusatz = rt.cursor.sql(f"""select * from zusatz where lineid  SIMILAR TO 'de:VBN:.*({auswahl_linien}).*' """).df()

ohne_rt_xl = f"out/rt_ohne_realtime_{suffix}.xlsx"
sn01 = 'ohne_rt'
with pd.ExcelWriter(ohne_rt_xl, engine='openpyxl') as writer:
    df_auswahl_ohne_rt.to_excel(writer, sheet_name=sn01, index=False)
    worksheet = writer.sheets[sn01]
    worksheet.freeze_panes = 'a2'

    worksheet.column_dimensions['B'].width = 15
    worksheet.column_dimensions['C'].width = 15
    worksheet.auto_filter.ref = worksheet.dimensions

    # Format the 'Zeit' column as date
    for cell in worksheet['B']:  # Assuming 'Zeit' is in column D
        if cell.row == 1:  # Skip the header row
            continue
        cell.number_format = 'YYYY-MM-DD'

    # Format the 'Zeit' column as date
    for cell in worksheet['C']:  # Assuming 'Zeit' is in column D
        if cell.row == 1:  # Skip the header row
            continue
        cell.number_format = 'YYYY-MM-DD'

    # Format the 'Prozent' column as percentage
    for cell in worksheet['G']:  # Assuming 'Prozent' is in column D
        if cell.row == 1:  # Skip the header row
            continue
        cell.number_format = '0.0%'
 
df_zusatz

Unnamed: 0,datum,fnr,destination,hasRealtime,vu,lineid,lineid_short,lineshort,reported_cancelled,journey_cancelled,ts_reported_cancelled,cancelled_kum,deviceid,clientid,journeyrttype,fahrtstarttime,fahrtstartstationname,fahrtstartstationdhid,fahrtendtime,fahrtendstationname,fahrtendstationdhid,realtimeHasEverBeenReported,filename
0,2024-09-01,1630226,Zeven Bahnhof Süd,True,Eisenbahnen und Verkehrsbetriebe Elbe-Weser GmbH,de:VBN:630:,de:VBN:630,630,False,False,,False,0901-1630226#!ADD!#IVU-Regio#,IVU-Regio,DEVIATION_OF_SCHEDULED,2024-09-01T20:00:00+02:00,Bremen Hauptbahnhof,de:04011:13925::G,2024-09-01T21:29:00+02:00,Zeven Bahnhof Süd,de:03357:102760::7,,out/parquet/prod/zusatz_2024_09_01.parquet
1,2024-09-02,1630054,Zeven Bahnhof Süd,True,Eisenbahnen und Verkehrsbetriebe Elbe-Weser GmbH,de:VBN:630:,de:VBN:630,630,False,False,,False,0902-1630054#!ADD!#IVU-Regio#,IVU-Regio,DEVIATION_OF_SCHEDULED,2024-09-02T18:20:00+02:00,Bremen Hauptbahnhof,de:04011:13925::G,2024-09-02T19:56:00+02:00,Zeven Bahnhof Süd,de:03357:102760::7,,out/parquet/prod/zusatz_2024_09_02.parquet
2,2024-09-02,1630058,Zeven Bahnhof Süd,True,Eisenbahnen und Verkehrsbetriebe Elbe-Weser GmbH,de:VBN:630:,de:VBN:630,630,False,False,,False,0902-1630058#!ADD!#IVU-Regio#,IVU-Regio,DEVIATION_OF_SCHEDULED,2024-09-02T20:20:00+02:00,Bremen Hauptbahnhof,de:04011:13925::G,2024-09-02T21:43:00+02:00,Zeven Bahnhof Süd,de:03357:102760::7,,out/parquet/prod/zusatz_2024_09_02.parquet
3,2024-09-09,1630054,Zeven Bahnhof Süd,True,Eisenbahnen und Verkehrsbetriebe Elbe-Weser GmbH,de:VBN:630:,de:VBN:630,630,False,False,,False,0909-1630054#!ADD!#IVU-Regio#,IVU-Regio,DEVIATION_OF_SCHEDULED,2024-09-09T18:20:00+02:00,Bremen Hauptbahnhof,de:04011:13925::G,2024-09-09T19:56:00+02:00,Zeven Bahnhof Süd,de:03357:102760::7,False,out/parquet/prod/zusatz_2024_09_09.parquet
4,2024-09-09,1630058,Zeven Bahnhof Süd,True,Eisenbahnen und Verkehrsbetriebe Elbe-Weser GmbH,de:VBN:630:,de:VBN:630,630,False,False,,False,0909-1630058#!ADD!#IVU-Regio#,IVU-Regio,DEVIATION_OF_SCHEDULED,2024-09-09T20:20:00+02:00,Bremen Hauptbahnhof,de:04011:13925::G,2024-09-09T21:43:00+02:00,Zeven Bahnhof Süd,de:03357:102760::7,False,out/parquet/prod/zusatz_2024_09_09.parquet
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
283,2025-01-06,1670018,Osterholz-Scharmbeck Bahnhof (Bus),True,Eisenbahnen und Verkehrsbetriebe Elbe-Weser GmbH,de:VBN:670:,de:VBN:670,670,False,False,,False,0106-1670018#!ADD!#IVU-Regio#,IVU-Regio,DEVIATION_OF_SCHEDULED,2025-01-06T11:40:00+01:00,Bremen Hauptbahnhof,de:04011:13925::G,2025-01-06T12:45:00+01:00,Osterholz-Scharmbeck Bahnhof (Bus),de:03356:72063::1,True,out/parquet/prod/zusatz_2025_01_06.parquet
284,2025-01-07,1630014,Zeven Bahnhof Süd,False,Eisenbahnen und Verkehrsbetriebe Elbe-Weser GmbH,de:VBN:630:,de:VBN:630,630,True,True,2025-01-07T09:26:15.065+01:00,True,0107-1630014#!ADD!#IVU-Regio#,IVU-Regio,DEVIATION_OF_SCHEDULED,2025-01-07T07:50:00+01:00,Bremen Hauptbahnhof,de:04011:13925::G,2025-01-07T09:20:00+01:00,Zeven Bahnhof Süd,de:03357:102760::7,False,out/parquet/prod/zusatz_2025_01_07.parquet
285,2025-01-12,1630226,Zeven Bahnhof Süd,True,Eisenbahnen und Verkehrsbetriebe Elbe-Weser GmbH,de:VBN:630:,de:VBN:630,630,False,False,,False,0112-1630226#!ADD!#IVU-Regio#,IVU-Regio,DEVIATION_OF_SCHEDULED,2025-01-12T20:20:00+01:00,Bremen Hauptbahnhof,de:04011:13925::G,2025-01-12T21:53:00+01:00,Zeven Bahnhof Süd,de:03357:102760::7,True,out/parquet/prod/zusatz_2025_01_12.parquet
286,2025-01-13,1630050,Zeven Bahnhof Süd,True,Eisenbahnen und Verkehrsbetriebe Elbe-Weser GmbH,de:VBN:630:,de:VBN:630,630,False,False,,False,0113-1630050#!ADD!#IVU-Regio#,IVU-Regio,DEVIATION_OF_SCHEDULED,2025-01-13T18:20:00+01:00,Bremen Hauptbahnhof,de:04011:13925::G,2025-01-13T19:59:00+01:00,Zeven Bahnhof Süd,de:03357:102760::7,True,out/parquet/prod/zusatz_2025_01_13.parquet


In [29]:
df_linien_quote_rt = rt.cursor.sql("""
            select * from
              ( select lineshort, min(datum)::date as min_datum, max(datum)::date as max_datum,  
                    count(* ) as anzahl, 
                    count(* ) filter (hasRealtime  = false ) as anzahl_ohne_rt, 
                    anzahl_ohne_rt / count(* ) as proz_ohne_rt
                from fahrten  
                where 
                -- and hasRealtime  = false 
                    datum  >= (current_date() - interval 28 days)
                group by all
              )
              where anzahl_ohne_rt > 1
              order by proz_ohne_rt desc

              -- limit 10""").df()

df_linien_quote_rt

Unnamed: 0,lineshort,min_datum,max_datum,anzahl,anzahl_ohne_rt,proz_ohne_rt
0,48,2024-12-17,2025-01-13,994,994,1.000000
1,377,2024-12-17,2025-01-13,30,30,1.000000
2,519,2024-12-17,2025-01-13,690,690,1.000000
3,394,2024-12-17,2025-01-13,270,270,1.000000
4,116,2024-12-17,2025-01-13,240,240,1.000000
...,...,...,...,...,...,...
537,5,2024-12-17,2025-01-13,1661,2,0.001204
538,28,2024-12-17,2025-01-13,2056,2,0.000973
539,21,2024-12-17,2025-01-13,3110,3,0.000965
540,27,2024-12-17,2025-01-13,3002,2,0.000666


In [30]:
df_zusatz

Unnamed: 0,datum,fnr,destination,hasRealtime,vu,lineid,lineid_short,lineshort,reported_cancelled,journey_cancelled,ts_reported_cancelled,cancelled_kum,deviceid,clientid,journeyrttype,fahrtstarttime,fahrtstartstationname,fahrtstartstationdhid,fahrtendtime,fahrtendstationname,fahrtendstationdhid,realtimeHasEverBeenReported,filename
0,2024-09-01,1630226,Zeven Bahnhof Süd,True,Eisenbahnen und Verkehrsbetriebe Elbe-Weser GmbH,de:VBN:630:,de:VBN:630,630,False,False,,False,0901-1630226#!ADD!#IVU-Regio#,IVU-Regio,DEVIATION_OF_SCHEDULED,2024-09-01T20:00:00+02:00,Bremen Hauptbahnhof,de:04011:13925::G,2024-09-01T21:29:00+02:00,Zeven Bahnhof Süd,de:03357:102760::7,,out/parquet/prod/zusatz_2024_09_01.parquet
1,2024-09-02,1630054,Zeven Bahnhof Süd,True,Eisenbahnen und Verkehrsbetriebe Elbe-Weser GmbH,de:VBN:630:,de:VBN:630,630,False,False,,False,0902-1630054#!ADD!#IVU-Regio#,IVU-Regio,DEVIATION_OF_SCHEDULED,2024-09-02T18:20:00+02:00,Bremen Hauptbahnhof,de:04011:13925::G,2024-09-02T19:56:00+02:00,Zeven Bahnhof Süd,de:03357:102760::7,,out/parquet/prod/zusatz_2024_09_02.parquet
2,2024-09-02,1630058,Zeven Bahnhof Süd,True,Eisenbahnen und Verkehrsbetriebe Elbe-Weser GmbH,de:VBN:630:,de:VBN:630,630,False,False,,False,0902-1630058#!ADD!#IVU-Regio#,IVU-Regio,DEVIATION_OF_SCHEDULED,2024-09-02T20:20:00+02:00,Bremen Hauptbahnhof,de:04011:13925::G,2024-09-02T21:43:00+02:00,Zeven Bahnhof Süd,de:03357:102760::7,,out/parquet/prod/zusatz_2024_09_02.parquet
3,2024-09-09,1630054,Zeven Bahnhof Süd,True,Eisenbahnen und Verkehrsbetriebe Elbe-Weser GmbH,de:VBN:630:,de:VBN:630,630,False,False,,False,0909-1630054#!ADD!#IVU-Regio#,IVU-Regio,DEVIATION_OF_SCHEDULED,2024-09-09T18:20:00+02:00,Bremen Hauptbahnhof,de:04011:13925::G,2024-09-09T19:56:00+02:00,Zeven Bahnhof Süd,de:03357:102760::7,False,out/parquet/prod/zusatz_2024_09_09.parquet
4,2024-09-09,1630058,Zeven Bahnhof Süd,True,Eisenbahnen und Verkehrsbetriebe Elbe-Weser GmbH,de:VBN:630:,de:VBN:630,630,False,False,,False,0909-1630058#!ADD!#IVU-Regio#,IVU-Regio,DEVIATION_OF_SCHEDULED,2024-09-09T20:20:00+02:00,Bremen Hauptbahnhof,de:04011:13925::G,2024-09-09T21:43:00+02:00,Zeven Bahnhof Süd,de:03357:102760::7,False,out/parquet/prod/zusatz_2024_09_09.parquet
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
283,2025-01-06,1670018,Osterholz-Scharmbeck Bahnhof (Bus),True,Eisenbahnen und Verkehrsbetriebe Elbe-Weser GmbH,de:VBN:670:,de:VBN:670,670,False,False,,False,0106-1670018#!ADD!#IVU-Regio#,IVU-Regio,DEVIATION_OF_SCHEDULED,2025-01-06T11:40:00+01:00,Bremen Hauptbahnhof,de:04011:13925::G,2025-01-06T12:45:00+01:00,Osterholz-Scharmbeck Bahnhof (Bus),de:03356:72063::1,True,out/parquet/prod/zusatz_2025_01_06.parquet
284,2025-01-07,1630014,Zeven Bahnhof Süd,False,Eisenbahnen und Verkehrsbetriebe Elbe-Weser GmbH,de:VBN:630:,de:VBN:630,630,True,True,2025-01-07T09:26:15.065+01:00,True,0107-1630014#!ADD!#IVU-Regio#,IVU-Regio,DEVIATION_OF_SCHEDULED,2025-01-07T07:50:00+01:00,Bremen Hauptbahnhof,de:04011:13925::G,2025-01-07T09:20:00+01:00,Zeven Bahnhof Süd,de:03357:102760::7,False,out/parquet/prod/zusatz_2025_01_07.parquet
285,2025-01-12,1630226,Zeven Bahnhof Süd,True,Eisenbahnen und Verkehrsbetriebe Elbe-Weser GmbH,de:VBN:630:,de:VBN:630,630,False,False,,False,0112-1630226#!ADD!#IVU-Regio#,IVU-Regio,DEVIATION_OF_SCHEDULED,2025-01-12T20:20:00+01:00,Bremen Hauptbahnhof,de:04011:13925::G,2025-01-12T21:53:00+01:00,Zeven Bahnhof Süd,de:03357:102760::7,True,out/parquet/prod/zusatz_2025_01_12.parquet
286,2025-01-13,1630050,Zeven Bahnhof Süd,True,Eisenbahnen und Verkehrsbetriebe Elbe-Weser GmbH,de:VBN:630:,de:VBN:630,630,False,False,,False,0113-1630050#!ADD!#IVU-Regio#,IVU-Regio,DEVIATION_OF_SCHEDULED,2025-01-13T18:20:00+01:00,Bremen Hauptbahnhof,de:04011:13925::G,2025-01-13T19:59:00+01:00,Zeven Bahnhof Süd,de:03357:102760::7,True,out/parquet/prod/zusatz_2025_01_13.parquet


### Auswertung Matrix nach Verlauf Zeitpunkt der Meldung

In [31]:
df_matrix = rt.cursor.sql("""select m.operatingDay::date, m.lineShortName, m.journeyId, v.index, 
                          m.stationName, m.scheduleDeparture,m.delay_minutes_arrival, m.delay_minutes_departure, m.timestamp, v.arr_del, v.dep_del
                from matrix m
                left join verlauf v on 
                          m.operatingDay = v.operday and 
                          m.lineShortName = v.lineshortname and 
                          m.journeyId = v.fnr and 
                          m.stationName = v.station_name
                where stop_cancelled = false
              and m.lineShortName = 'RS3'
              order by  m.operatingDay, m.externalLineId, m.journeyId, v.index,  m.timestamp 
              
              -- limit 20""").df()

In [32]:
auswahl_linien = '630|670|N68|N63|N67'
df_zusatz = rt.cursor.sql(f"""
                select datum::date as datum, lineshort,lineid ,fnr,  vu 
                from zusatz 
                where                       

                    lineid SIMILAR TO 'de:VBN:.*({auswahl_linien}).*' and 
                    -- and vu like 'Reisedienst von Rahden%' 
                    datum::date >= (current_date - interval 30 day)
                group by all 
                order by lineshort, fnr """).df()

df_zusatz

#rt.cursor.sql(f"""select * from zusatz where lineid  SIMILAR TO 'de:VBN:.*({auswahl_linien}).*' and datum::date >= (current_date - interval 30 day)""").df()

Unnamed: 0,datum,lineshort,lineid,fnr,vu
0,2025-01-07,630,de:VBN:630:,1630014,Eisenbahnen und Verkehrsbetriebe Elbe-Weser GmbH
1,2024-12-23,630,de:VBN:630:,1630050,Eisenbahnen und Verkehrsbetriebe Elbe-Weser GmbH
2,2024-12-20,630,de:VBN:630:,1630050,Eisenbahnen und Verkehrsbetriebe Elbe-Weser GmbH
3,2025-01-13,630,de:VBN:630:,1630050,Eisenbahnen und Verkehrsbetriebe Elbe-Weser GmbH
4,2024-12-18,630,de:VBN:630:,1630050,Eisenbahnen und Verkehrsbetriebe Elbe-Weser GmbH
...,...,...,...,...,...
231,2024-12-22,670,de:VBN:670:,1670228,Eisenbahnen und Verkehrsbetriebe Elbe-Weser GmbH
232,2024-12-29,670,de:VBN:670:,1670228,Eisenbahnen und Verkehrsbetriebe Elbe-Weser GmbH
233,2024-12-29,670,de:VBN:670:,1670230,Eisenbahnen und Verkehrsbetriebe Elbe-Weser GmbH
234,2024-12-22,670,de:VBN:670:,1670230,Eisenbahnen und Verkehrsbetriebe Elbe-Weser GmbH


In [33]:
rt.cursor.sql("select min(datum )::date as min_date, max(datum)::date as amx_date, count(*) as anzahl from fahrten")

┌────────────┬────────────┬─────────┐
│  min_date  │  amx_date  │ anzahl  │
│    date    │    date    │  int64  │
├────────────┼────────────┼─────────┤
│ 2024-08-29 │ 2025-01-13 │ 1961946 │
└────────────┴────────────┴─────────┘

In [34]:
rt.create_vw_buendel('TN 5 CUX')

In [35]:
rt.cursor.sql("select * from vw_buendel").df()

Unnamed: 0,datum,ebene,vu,fnr,fahrtstartstationname,fahrtendstationname,lineshort,lineid_short,hasRealtime,journey_cancelled,reported_cancelled,ts_reported_cancelled,realtimeHasEverBeenReported
0,2024-11-13,,Reisedienst von Rahden GmbH & Co. KG,1553109,Wehden Feuerwehr,Spaden Grundschule,553,de:VBN:553,True,False,False,,True
1,2024-11-13,,MEW Mobilitätszentrale Elbe-Weser,1542113,Geestenseth(Schiffdorf) Bahnhof,Ringstedt Keilstraße,542,de:VBN:542,True,False,False,,True
2,2024-11-13,,MEW Mobilitätszentrale Elbe-Weser,1529103,Bad Bederkesa Seminarstraße GS/NIG,Schiffdorf Schulzentrum,529,de:VBN:529,False,False,False,,False
3,2024-11-13,,Reisedienst von Rahden GmbH & Co. KG,1542104,Bad Bederkesa Moor-Therme,Ringstedt Keilstraße,542,de:VBN:542,True,False,False,,True
4,2024-11-13,,MEW Mobilitätszentrale Elbe-Weser,1542106,Bad Bederkesa Moor-Therme,Ringstedt Keilstraße,542,de:VBN:542,True,False,False,,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...
6286,2025-01-12,,MEW Mobilitätszentrale Elbe-Weser,1525706,Bremerhaven Bahnhof Lehe,Bad Bederkesa Moor-Therme,525,de:VBN:525,True,False,False,,True
6287,2025-01-12,,MEW Mobilitätszentrale Elbe-Weser,1525707,Bad Bederkesa Moor-Therme,Bremerhaven Bahnhof Lehe,525,de:VBN:525,True,False,False,,True
6288,2025-01-12,,MEW Mobilitätszentrale Elbe-Weser,1525708,Bremerhaven Bahnhof Lehe,Bad Bederkesa Moor-Therme,525,de:VBN:525,True,False,False,,True
6289,2025-01-12,,MEW Mobilitätszentrale Elbe-Weser,1525709,Bad Bederkesa Moor-Therme,Bremerhaven Bahnhof Lehe,525,de:VBN:525,True,False,False,,True


### Häufung von Fahrten ohne Echtzeit

In [36]:
df_fahrten_ohne_ez = rt.cursor.sql("""
              
                select datum::date as datum, ebene, lineshort , fnr, hasrealtime
               
                from vw_buendel 
                where datum >= (current_date - interval 30 day) and hasrealtime = false
                group by all
                order by ebene, lineshort, fnr
    
              """).df()

df_fahrten_ohne_ez_zusatz = df_fahrten_ohne_ez.merge(df_zusatz, left_on = ['datum', 'fnr'], right_on = ['datum', 'fnr'], how='left')
df_fahrten_ohne_ez_zusatz.query("~vu.isnull()") 

df_fahrten_ohne_ez_zusatz[['lineshort_x','datum','fnr']].groupby(['lineshort_x','fnr'], as_index=False)\
    .agg(datum_min=('datum', 'min'), datum_max=('datum', 'max'), count=('datum', 'count')).sort_values('count', ascending=False)\
    .to_excel('out/rt_fahrten_ohne_ez_zusatz.xlsx', index=False)

In [37]:
df_fahrten_ohne_ez_zusatz.query("~vu.isnull()")

Unnamed: 0,datum,ebene,lineshort_x,fnr,hasRealtime,lineshort_y,lineid,vu


In [38]:
interval_auswertung = 21
df_fahrten_mit_nicht_vollstaendiger_echtzeit = rt.cursor.sql(f"""
            select * from 
                (select ebene, lineshort , fnr, count(*) as anz, count(*) filter (hasRealtime) as anz_rt, 
                    (anz - anz_rt) as f_ohne_rt ,round(anz_rt/anz,2) as quote,
                    max(datum::date) filter (hasRealtime) as letzte_lieferung_echtzeit
                from vw_buendel 
                where datum >= (current_date - interval {interval_auswertung} day)
                group by all
                order by ebene, lineshort, fnr)
            where f_ohne_rt > 1 and ebene in ('1+','1', '2') 
            order by f_ohne_rt desc                                                             
            """).df()

df_fahrten_mit_nicht_vollstaendiger_echtzeit

Unnamed: 0,ebene,lineshort,fnr,anz,anz_rt,f_ohne_rt,quote,letzte_lieferung_echtzeit


In [39]:
xl = 'out/nicht_vollstaendig.xlsx'
sn01 = '01 fahrten_rt_kl_100_roz'
sn02 = '02 zusatzfahrten'
sn03 = '03 ohne ez merge zusatz'

with pd.ExcelWriter(xl, engine='openpyxl') as writer: 
    df_fahrten_mit_nicht_vollstaendiger_echtzeit.to_excel(writer, index=False, sheet_name=sn01)
    writer.book[sn01].freeze_panes = 'A2'
    writer.book[sn01].auto_filter.ref='A:H'

    df_zusatz.to_excel(writer, index=False, sheet_name=sn02)
    writer.book[sn02].freeze_panes = 'A2'
    writer.book[sn02].auto_filter.ref='A:H'

    df_fahrten_ohne_ez_zusatz.to_excel(writer, index=False, sheet_name=sn03)
    writer.book[sn03].freeze_panes = 'A2'
    writer.book[sn03].auto_filter.ref='A:H'


In [40]:
q = rt.cursor.sql("""
                   (select 
                    datum::date as datum, ebene, lineshort, lineid_short, count(*) anz,
                    count(*) filter (hasRealtime) anz_rt, round(anz_rt/ anz,2) anteil_rt, 
                    max(datum) filter (hasRealtime) letzte_lieferung
                    from vw_buendel 
                    where datum >= date_trunc('month', (date_trunc('month',current_date) - interval 1 day)::date)
                    and datum <= (date_trunc('month',current_date) - interval 1 day)::date
                  
                    group by all

                    order by datum::date)
                  """)
#q.filter("lineshort in ('S35', '350')") #mit filter einfache Abfragen

q

┌────────────┬─────────┬───────────┬──────────────┬───────┬────────┬───────────┬─────────────────────┐
│   datum    │  ebene  │ lineshort │ lineid_short │  anz  │ anz_rt │ anteil_rt │  letzte_lieferung   │
│    date    │ varchar │  varchar  │   varchar    │ int64 │ int64  │  double   │      timestamp      │
├────────────┼─────────┼───────────┼──────────────┼───────┼────────┼───────────┼─────────────────────┤
│ 2024-12-02 │ NULL    │ 525       │ de:VBN:525   │     2 │      0 │       0.0 │ NULL                │
│ 2024-12-02 │ NULL    │ 542       │ de:VBN:542   │    22 │      7 │      0.32 │ 2024-12-02 00:00:00 │
│ 2024-12-02 │ NULL    │ 545       │ de:VBN:545   │     8 │      3 │      0.38 │ 2024-12-02 00:00:00 │
│ 2024-12-02 │ NULL    │ 555       │ de:VBN:555   │     7 │      6 │      0.86 │ 2024-12-02 00:00:00 │
│ 2024-12-02 │ NULL    │ 529       │ de:VBN:529   │    11 │      6 │      0.55 │ 2024-12-02 00:00:00 │
│ 2024-12-02 │ NULL    │ 553       │ de:VBN:553   │    12 │      0 │     

In [41]:
#Abfrage für den letzten Monat
q_pivot_lm = rt.cursor.sql("""
                    pivot (select 
                            datum::date as datum, ebene, lineshort, lineid_short, count(*) anz,
                            count(*) filter (hasRealtime) anz_rt, round(anz_rt/ anz,2) anteil_rt
                        from vw_buendel 
                        where datum >= date_trunc('month', (date_trunc('month',current_date) - interval 1 day)::date)
                            and datum <= (date_trunc('month',current_date) - interval 1 day)::date
                        group by all
                        )
                    on datum
                    using sum(anteil_rt)
                    group by lineshort, ebene
                    order by ebene, lineshort""")

q_pivot_lm.df().fillna('-')

Unnamed: 0,lineshort,ebene,2024-12-02,2024-12-03,2024-12-04,2024-12-05,2024-12-06,2024-12-08,2024-12-09,2024-12-10,2024-12-11,2024-12-12,2024-12-13,2024-12-16,2024-12-17,2024-12-18,2024-12-19,2024-12-20,2024-12-23,2024-12-27,2024-12-30
0,525,-,0.0,0.0,0.0,0.0,0.0,-,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,529,-,0.55,0.55,0.55,0.55,0.44,-,0.67,0.64,0.64,0.64,0.56,-,-,-,-,-,-,-,-
2,542,-,0.32,0.23,0.23,0.23,0.33,0.0,0.39,0.39,0.22,0.22,0.18,0.33,0.25,0.25,0.25,0.25,0.0,0.0,0.0
3,545,-,0.38,0.38,0.38,0.38,0.4,-,0.4,0.4,0.4,0.4,0.43,-,-,-,-,-,-,-,-
4,553,-,0.0,0.0,0.0,0.0,0.0,-,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-,-,-
5,555,-,0.86,0.86,0.86,0.86,0.83,-,0.86,0.86,0.86,0.86,0.83,0.0,0.0,0.0,0.0,0.0,-,-,-


## Ausgabe je Bündel als html / xlsx

### Erstellen der sortierten Bündelliste

In [42]:
list_buendel = sorted(rt.cursor.sql("select distinct buendel from linien where buendel not in ('nahsh')").df()['buendel'].to_list())

In [43]:
#Zellformatierung CSS
cell_hover = {  # for row hover use <tr> instead of <td>
    'selector': 'td:hover',
    'props': [('background-color', '#ffffb3')]
}
index_names = {
    'selector': '.index_name',
    'props': 'font-style: italic; color: darkgrey; font-weight:normal; font-family: sans-serif; font-size: 15px;'
}
headers = {
    'selector': 'th:not(.index_name)',
    'props': 'background-color: #FFFFFF; color: #000000; font-family: sans-serif; font-size: 15px;text-orientation: upright;'
}

td = {'selector' : 'td', 'props': 'text-align:right; font-family: sans-serif; font-size: 14px;'}

In [None]:
rt.cursor.sql("""describe fahrten""")

┌─────────────────────────────┬─────────────┬─────────┬─────────┬─────────┬─────────┐
│         column_name         │ column_type │  null   │   key   │ default │  extra  │
│           varchar           │   varchar   │ varchar │ varchar │ varchar │ varchar │
├─────────────────────────────┼─────────────┼─────────┼─────────┼─────────┼─────────┤
│ datum                       │ TIMESTAMP   │ YES     │ NULL    │ NULL    │ NULL    │
│ fnr                         │ VARCHAR     │ YES     │ NULL    │ NULL    │ NULL    │
│ destination                 │ VARCHAR     │ YES     │ NULL    │ NULL    │ NULL    │
│ hasRealtime                 │ BOOLEAN     │ YES     │ NULL    │ NULL    │ NULL    │
│ vu                          │ VARCHAR     │ YES     │ NULL    │ NULL    │ NULL    │
│ lineid                      │ VARCHAR     │ YES     │ NULL    │ NULL    │ NULL    │
│ lineid_short                │ VARCHAR     │ YES     │ NULL    │ NULL    │ NULL    │
│ lineshort                   │ VARCHAR     │ YES     

In [45]:
for b in list_buendel[8:9]:
    print(b, b.replace(' ', '_').lower(), replace_german_special_characters(b).replace(' ', '_').lower())

DH Südwest dh_südwest dh_suedwest


In [46]:
func_proz = lambda s: str(int((1-s) * 1000)/10) + '%' if str(int(s)) != '-1' else '-'
func_date = lambda s: s.dt.strftime('%m/%d/%Y')

interval_auswertung = 21

date_style = NamedStyle(name="date_style", number_format="YYYY-MM-DD")

for b in list_buendel:
    print(b, b.replace(' ', '_').lower(), replace_german_special_characters(b).replace(' ', '_').lower())

    rt.create_vw_buendel(b)
    rt.create_vw_buendel_verlauf(buendel=b)
    #Abfrage für die letzten 30 Tage
    q_pivot_lm = rt.cursor.sql(f"""
                        pivot (select 
                                datum::date as datum, ebene, lineshort, lineid_short, count(*) anz,
                                count(*) filter (realtimeHasEverBeenReported ) anz_rt, round(anz_rt/ anz,2) anteil_rt
                            from vw_buendel 
                            where datum >= (current_date - interval {interval_auswertung} day)
                            group by all
                            )
                        on datum
                        using sum(anteil_rt)
                        group by lineshort, ebene
                        order by ebene, lineshort""")
    
    #Liste der Fahrten ohne Echtzeit die häufiger als 1 mal vorkommen
    df_fahrten_mit_nicht_vollstaendiger_echtzeit = rt.cursor.sql(f"""
                select * from 
                    (select ebene, lineshort , fnr, count(*) as anz, count(*) filter (hasRealtime) as anz_ez, 
                    (anz - anz_ez) as fahrten_ohne_ez ,round(anz_ez/anz,2) as quote,
                    max(datum::date) filter (realtimeHasEverBeenReported ) as letzte_lieferung_echtzeit
                    from vw_buendel 
                    where datum >= (current_date - interval {interval_auswertung} day)
                    group by all
                    order by ebene, lineshort, fnr)
                where fahrten_ohne_ez > 1 and ebene in ('1+','1', '2','Nacht') 
                    order by fahrten_ohne_ez desc                                                             
                
                """).df()
    
    df_fahrten_ohne_ez = rt.cursor.sql(f"""              
                select datum::date as datum, ebene, lineshort , fnr, hasrealtime               
                from vw_buendel 
                where datum >= (current_date - interval {interval_auswertung} day) and realtimeHasEverBeenReported  = false
                group by all
                order by ebene, lineshort, fnr
    
              """).df()
    
    df_fahrten_gesamt = rt.cursor.sql(f"""              
                select *               
                from vw_buendel 
                where datum >= (current_date - interval {interval_auswertung} day) 
                order by ebene, lineshort, fnr
    
              """).df()
    
    html_zusatz_table = 'html/pre_zusatz.html'
    df_fahrten_ohne_ez_zusatz = df_fahrten_ohne_ez.merge(df_zusatz, left_on = ['datum', 'fnr'], right_on = ['datum', 'fnr'], how='left')
    df_fahrten_ohne_ez_zusatz.query("~vu.isnull()").to_html(html_zusatz_table, index=False)

    html_pre_table = 'html/pre_table.html'
    df_fahrten_mit_nicht_vollstaendiger_echtzeit.to_html(html_pre_table, index=False)

    html_pre_pivot = 'html/pre_pivot.html'
    q_pivot_lm.df().style.background_gradient(cmap="RdYlGn", axis = None,  vmin=0.5, vmax=1).highlight_null(color='white')\
        .format( precision=2, na_rep='-', thousands=" ")\
        .highlight_null(color='white')\
        .set_table_styles([index_names, headers, td])\
        .to_html(html_pre_pivot)
    
    # Save the HTML table to a file (optional)   
    with open(html_pre_pivot, 'r') as file:
        html_pre_pivot = file.read()
    
    # Load the HTML page template
    with open('html/template.html', 'r') as file:
        html_template = file.read()

    # Insert the HTML table into the template
    title = f"Echtzeitquote Bündel {b} je Linie erstellt: {dt.datetime.now().strftime('%d.%m.%Y %H:%M')}" 
    html_page = html_template.replace('{{ html_pivot }}', html_pre_pivot).replace('{{ html_title }}', title)

    if df_fahrten_mit_nicht_vollstaendiger_echtzeit.shape[0] > 0:
        html_page = html_page.replace('{{ html_table }}', df_fahrten_mit_nicht_vollstaendiger_echtzeit.to_html(index=False))
    else:
        html_page = html_page.replace('{{ html_table }}', "Keine Häufung Fahrten ohne Echtzeit")

    if df_fahrten_ohne_ez_zusatz.query("~vu.isnull()").shape[0] > 0:
        html_page = html_page.replace('{{ html_table_zusatz }}', df_fahrten_ohne_ez_zusatz.query("~vu.isnull()").to_html(index=False))
    else:
        html_page = html_page.replace('{{ html_table_zusatz }}', "Keine Zusatzfahrten mit gleicher Fahrtnummer")

    # Save the combined HTML page to a file
    html_combined = f"/var/www/rt_archiv/buendel/rt_{replace_german_special_characters(b).replace(' ', '_').lower()}.html"


    with open(html_combined, 'w') as file:
        file.write(html_page)


    #Ausgabe der wichtigen Ergebnisse als Excel
    xl = f"buendel_stat/{replace_german_special_characters(b).replace(' ', '_').lower()}_stat.xlsx"

    sn00 = '01 hilfe'
    sn01 = '02 statistik ebene'
    sn02 = '03 statistik pivot'
    sn03 = '04 fahrten gesamt'
    sn04 = '05 verlauf' #nicht im Stadtverkehr

    with pd.ExcelWriter(xl, engine='openpyxl') as writer:
        writer.book.add_named_style(date_style)

        df_vorfaelle = rt.df_vorfaelle_echtzeit()
        df_vorfaelle.to_excel(writer, index=False, sheet_name=sn01)
        writer.book[sn01].freeze_panes = 'A2'
        writer.book[sn01].auto_filter.ref='A:F'
        for row in writer.book[sn01].iter_rows(min_row=2, min_col=1, max_col=1):
                for cell in row:
                    cell.style = date_style
        writer.book[sn01][f"F{df_vorfaelle.shape[0]+3}"] = f"=SUBTOTAL(9, F2:F{df_vorfaelle.shape[0]+1})"
        writer.book[sn01].column_dimensions['A'].width = 15

        q_pivot_lm.df().to_excel(writer, index=True, sheet_name=sn02)
        writer.book[sn02].freeze_panes = 'A2'
        writer.book[sn02].auto_filter.ref='A:H'

        df_fahrten_gesamt.to_excel(writer, index=False, sheet_name=sn03)
        writer.book[sn03].freeze_panes = 'A2'
        writer.book[sn03].auto_filter.ref='A:N'
        writer.book[sn03].column_dimensions['A'].width = 15
        for row in writer.book[sn03].iter_rows(min_row=2, min_col=1, max_col=1):
                for cell in row:
                    cell.style = date_style

        #Verlauf nicht im Verkehr
        if b not in ('HB Bus', 'HB Tram', 'BHV', 'DEL', 'OL Stadt'):

            rt.cursor.sql("""from vw_buendel_verlauf""").df().to_excel(writer, index=False, sheet_name=sn04)
            writer.book[sn04].freeze_panes = 'A2'
            writer.book[sn04].auto_filter.ref='A:J'
            writer.book[sn04].column_dimensions['A'].width = 15
            writer.book[sn04].column_dimensions['F'].width = 30
            for row in writer.book[sn04].iter_rows(min_row=2, min_col=1, max_col=1):
                for cell in row:
                    cell.style = date_style

    # Öffnen des Workbooks und Anwenden der Formatierung
    wb = openpyxl.load_workbook(xl)

    #Erstellen des Hilfeblattes an erster Position
    wb.create_sheet(sn00, index=0)
    sheet = wb[sn00]
    sheet['A1'] = f"Erstellt: {dt.datetime.now().strftime('%Y-%m-%d %H:%M')}"
    sheet['A2'] =  "Erläuterung der Werte in der Tabelle"
    sheet['A3'] = f"Blatt {sn01} enthält die Echtzeitquote der Ebenen des Bündels {b} für die letzten {interval_auswertung} Tage"
    sheet['A4'] = f"Blatt {sn02} enthält die Echtzeitquote der Linien des Bündels {b} für die letzten {interval_auswertung} Tage"
    sheet['A5'] = f"Blatt {sn03} filterbare Liste der Fahrten {b} für die letzten {interval_auswertung} Tage"

    wb.save(xl)       

DH Südwest dh_südwest dh_suedwest


In [49]:
rt.cursor.sql("describe verlauf")

┌───────────────────────┬─────────────┬─────────┬─────────┬─────────┬─────────┐
│      column_name      │ column_type │  null   │   key   │ default │  extra  │
│        varchar        │   varchar   │ varchar │ varchar │ varchar │ varchar │
├───────────────────────┼─────────────┼─────────┼─────────┼─────────┼─────────┤
│ operday               │ TIMESTAMP   │ YES     │ NULL    │ NULL    │ NULL    │
│ journeyOperator       │ VARCHAR     │ YES     │ NULL    │ NULL    │ NULL    │
│ deviceid              │ VARCHAR     │ YES     │ NULL    │ NULL    │ NULL    │
│ lineshortname         │ VARCHAR     │ YES     │ NULL    │ NULL    │ NULL    │
│ ex_lineid             │ VARCHAR     │ YES     │ NULL    │ NULL    │ NULL    │
│ fnr                   │ VARCHAR     │ YES     │ NULL    │ NULL    │ NULL    │
│ index                 │ INTEGER     │ YES     │ NULL    │ NULL    │ NULL    │
│ has_rt                │ BOOLEAN     │ YES     │ NULL    │ NULL    │ NULL    │
│ dschedtime            │ TIMESTAMP   │ 

In [50]:
rt.cursor.sql("select * from verlauf where operday::date > (current_date - interval 35 day) limit 100").df()

Unnamed: 0,operday,journeyOperator,deviceid,lineshortname,ex_lineid,fnr,index,has_rt,dschedtime,aschedtime,dep_del,arr_del,station_nr,station_name,lat,lon,canc,additional,ts_reported_cancelled,reported_cancelled,filename,lineid_short
0,2024-12-11,Weser-Ems-Bus Betrieb Bremen,1211-1886003-8016306900000#!ADD!#DBRB#,886,de:VBN:886:1,1886003,1,True,2024-12-11 11:15:00,NaT,3.0,,110344,Brockel Am Sportplatz,53.101456,9.517051,False,False,,False,out/parquet/prod/verlauf_2024_12_11.parquet,de:VBN:886
1,2024-12-11,Weser-Ems-Bus Betrieb Bremen,1211-1886003-8016306900000#!ADD!#DBRB#,886,de:VBN:886:1,1886003,2,True,2024-12-11 11:17:00,2024-12-11 11:17:00,3.0,3.0,1101241,Wensebrock Bösenkampweg,53.101636,9.501499,False,False,,False,out/parquet/prod/verlauf_2024_12_11.parquet,de:VBN:886
2,2024-12-11,Weser-Ems-Bus Betrieb Bremen,1211-1886003-8016306900000#!ADD!#DBRB#,886,de:VBN:886:1,1886003,3,True,2024-12-11 11:18:00,2024-12-11 11:18:00,3.0,3.0,107979,Wensebrock Rotenburger Weg,53.101690,9.496070,False,False,,False,out/parquet/prod/verlauf_2024_12_11.parquet,de:VBN:886
3,2024-12-11,Weser-Ems-Bus Betrieb Bremen,1211-1886003-8016306900000#!ADD!#DBRB#,886,de:VBN:886:1,1886003,4,True,2024-12-11 11:19:00,2024-12-11 11:19:00,4.0,3.0,107978,Wensebrock Hauptstraße,53.100971,9.498128,False,False,,False,out/parquet/prod/verlauf_2024_12_11.parquet,de:VBN:886
4,2024-12-11,Weser-Ems-Bus Betrieb Bremen,1211-1886003-8016306900000#!ADD!#DBRB#,886,de:VBN:886:1,1886003,5,True,2024-12-11 11:20:00,2024-12-11 11:20:00,4.0,3.0,107980,Brockel B 71,53.096189,9.513266,False,False,,False,out/parquet/prod/verlauf_2024_12_11.parquet,de:VBN:886
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,2024-12-11,Bremer Straßenbahn AG,1211-1835026-00026-1#!ADD!#BSAG#,3,de:VBN:3:,1019,1,True,2024-12-11 11:13:00,NaT,0.0,,675231,Bremen Gröpelingen,53.120630,8.752922,False,False,,False,out/parquet/prod/verlauf_2024_12_11.parquet,de:VBN:3
96,2024-12-11,Bremer Straßenbahn AG,1211-1835026-00026-1#!ADD!#BSAG#,3,de:VBN:3:,1019,2,True,2024-12-11 11:14:00,2024-12-11 11:14:00,0.0,0.0,606320,Bremen Kap-Horn-Straße,53.115713,8.752482,False,False,,False,out/parquet/prod/verlauf_2024_12_11.parquet,de:VBN:3
97,2024-12-11,Bremer Straßenbahn AG,1211-1835026-00026-1#!ADD!#BSAG#,3,de:VBN:3:,1019,3,True,2024-12-11 11:16:00,2024-12-11 11:16:00,0.0,0.0,606004,Bremen Waterfront (Use Akschen),53.112459,8.752419,False,False,,False,out/parquet/prod/verlauf_2024_12_11.parquet,de:VBN:3
98,2024-12-11,Bremer Straßenbahn AG,1211-1835026-00026-1#!ADD!#BSAG#,3,de:VBN:3:,1019,4,True,2024-12-11 11:17:00,2024-12-11 11:17:00,0.0,0.0,606225,Bremen Goosestraße,53.110418,8.757372,False,False,,False,out/parquet/prod/verlauf_2024_12_11.parquet,de:VBN:3


### Upload nach Redmine

In [47]:
list_excel = glob.glob('buendel_stat/*.xlsx')
list_redmine = pd.read_csv('input/folder_vms.csv', sep=';', quotechar="'")['buen'].to_list()
df_redmine  = pd.read_csv('input/folder_vms.csv', sep=';', quotechar="'")

for b in list_buendel:
    b_clean = replace_german_special_characters(b).replace(' ', '_').lower()
    folder_file_name = os.path.join('buendel_stat', f"{b_clean}_stat.xlsx") 
    file_name =  f"{b_clean}_stat.xlsx"


    if b_clean not in list_redmine:
        print(f"not in {b_clean}")
    
    else:
        print(f"{b} in {b_clean}")
        project_url = df_redmine.query(f"buen == '{b_clean}'").reset_index().at[0, 'project_url']
        folder_id = df_redmine.query(f"buen == '{b_clean}'").reset_index().at[0, 'folder_id']

        delete_upload_dmsf(project_url=project_url, folder_id=folder_id, file_name=file_name, folder_file_name=folder_file_name)
        


AM Ost in am_ost
https://vms.zvbn.de/projects/steuerung-ammerland-ost_nn/dmsf.xml?folder_id=2718
id 30409
<Response [200]>
https://vms.zvbn.de/projects/steuerung-ammerland-ost_nn/dmsf/upload.xml?filename=am_ost_stat.xlsx
token 81017.8fd23fa8f8fe1ea136f062690b9852cf5c47e85633237b94bd49bd422bf08619
 <?xml version="1.0" encoding="utf-8" ?>
    <attachments>
      <folder_id>2718</folder_id>
      <uploaded_file>
        <name>am_ost_stat.xlsx</name>
        <title>am_ost_stat.xlsx</title>
        <description>REST API</description>
        <version>3</version> <!-- It must be 3 (Custom version) -->
        <custom_version_major>1</custom_version_major> <!-- Major version -->
        <custom_version_minor>0</custom_version_minor>
        <comment>From API</comment>
        <!-- For an automatic version: -->
        <version/>
        <token>81017.8fd23fa8f8fe1ea136f062690b9852cf5c47e85633237b94bd49bd422bf08619</token>
      </uploaded_file>
    </attachments>
https://vms.zvbn.de/projects/s

In [None]:
q_pivot_lm.df()

In [None]:
rt.cursor.sql(f"""
                select * from 
                    (select ebene, lineshort , fnr, count(*) as anz, count(*) filter (hasRealtime) as anz_ez, 
                    (anz - anz_ez) as fahrten_ohne_ez ,round(anz_ez/anz,2) as quote,
                    max(datum::date) filter (hasRealtime) as letzte_lieferung_echtzeit
                    from vw_buendel 
                    where datum >= (current_date - interval {interval_auswertung} day)
                    group by all
                    order by ebene, lineshort, fnr)
                where fahrten_ohne_ez > 1 and ebene in ('1+','1', '2','Nacht') 
                    order by fahrten_ohne_ez desc                                                             
                
                """).df().shape[0]

In [None]:
html_template

In [None]:
rt.anzahl_fahrten_betreiber().df()

In [34]:
logging.info(f"Anzahl Fahrten gesamt {rt.anzahl_fahrten()}")

## Ohne class

In [35]:
con = duckdb.connect()

In [36]:
con.sql(f"""INSTALL postgres;
LOAD postgres;
ATTACH 'dbname=zvbn_postgis user={config['POSTGRES_USER']} host=127.0.0.1 password={config['POSTGRES_PW']}' AS db_dm (TYPE POSTGRES, READ_ONLY);""")

In [None]:
con.sql("create or replace table lin_buendel as select * from db_dm.basis.lin_buendel")
con.sql("select * from lin_buendel")

In [None]:
sql_lin = """
        Create or replace table linien as 
        SELECT nummer AS linie, buendel, ebene, dlid, id 
        FROM db_dm.basis.linien 
        WHERE buendel IS NOT NULL AND aktiv IS TRUE 
        ORDER BY buendel, ebene, nummer """
con.sql(sql_lin)
con.sql("select * from linien")

### Abruf der Parquet Files (Tagespakete)

In [None]:
server = 'prod'
con.sql(f"create or replace table fahrten as select * from read_parquet('out/parquet/{server}/fahrten*.parquet',  union_by_name = true, filename = true)")
con.sql(f"create or replace table verlauf as select * from read_parquet('out/parquet/{server}/verlauf*.parquet',  union_by_name = true, filename = true)")
con.sql(f"create or replace table zusatz as select * from read_parquet('out/parquet/{server}/zusatz*.parquet',  union_by_name = true, filename = true)")

### Ermitteln und Löschen von nicht gewollten Betreibern

In [40]:
#con.sql("select distinct vu from fahrten where vu like '%Weser%'")

In [None]:
con.sql("describe fahrten")

In [None]:
con.sql("select count(*), datum from fahrten group by datum order by datum")

In [43]:
if False: #True / False um ggf. weiterhin alles durchlaufen zu lassen
    print('Löschen von Betreibern')
    con.sql("delete from fahrten where vu not in ('Weser-Ems-Bus Betrieb Bremen', 'Weser-Ems-Bus Auftragnehmerleistungen')")
    #con.sql("delete from verlauf where vu not in ('Weser-Ems-Bus Betrieb Bremen', 'Weser-Ems-Bus Auftragnehmerleistungen')")
    #con.sql("delete from zusatz where vu not in ('Weser-Ems-Bus Betrieb Bremen', 'Weser-Ems-Bus Auftragnehmerleistungen')")

In [None]:
con.sql("select count(*) from fahrten where datum >= (current_date - interval 100 days)").df().values.tolist()[0][0]

In [None]:
anzahl_fahrten = con.sql(f"select count(*) from fahrten where datum >= '{letzte14tage}'").df().values.tolist()[0][0]
print(f"""Anzahl Fahrten: {anzahl_fahrten},  Länge Verlauf: {con.sql("select count(*) from verlauf").df().values.tolist()[0][0]}    """) 

In [None]:
con.sql("""select 
            datum, 
            fahrtstartstationname, 
           strftime( cast(fahrtstarttime as TIMESTAMPTZ), '%H:%M') as fahrtstart,
           fahrtendstationname,
           strftime( cast(fahrtendtime as TIMESTAMPTZ), '%H:%M') as fahrtende,
            
            deviceid, 
            split_part(deviceid, '-', 2) as fnr, 
            cast(((cast(split_part(split_part(deviceid, '-', 3), '#', 1) as int64) - 8000000000000) / 1000) as int64) as m2, 
        from fahrten 
        where deviceid like '%680%DBRB%' and datum = '2024-10-29'
        order by datum, fahrtstarttime
        
        """).df()
#.to_excel('out/web.xlsx', index=False)

### Anzahl der Fahrten je Betreiber

In [None]:
con.sql("select journeyOperator, count(journeyOperator) as count from verlauf group by journeyOperator order by count")

### Fahrten mit hohen Verspätungen

In [None]:
con.sql("select distinct deviceid from verlauf where dep_del > 100").df()

In [None]:
con.sql("describe fahrten")

### Verkürzung der DLID
- Zum Teil weren bei mehreren Betreibern einer Linie TLID mit vierteiliger DLID geliefert 
- Verkürzung ermöglicht die Verknüpfung mit Liste aus DM

In [None]:
con.sql("alter table fahrten add column if not exists lineid_short VARCHAR")
con.sql("""update fahrten 
        set lineid_short = concat_ws(':', split_part(lineid,':', 1), split_part(lineid,':', 2), split_part(lineid,':', 3))""")
con.sql("""select distinct lineid, 
        concat_ws(':', split_part(lineid,':', 1), split_part(lineid,':', 2), split_part(lineid,':', 3)) 
        from fahrten""")

### Über HIM gemeldete Ausfälle (ts_reported_cancelled gefüllt)

In [51]:
df_fahrten_ausfall_him = con.sql(f"""
                              select vu, fnr, ts_reported_cancelled, journey_cancelled 
                              from fahrten f 
                              where ts_reported_cancelled != '' and f.datum >= '{letzte14tage}'""").df()

### Echzeitquote

#### nach Linie und Betreiber

In [None]:
df_ez_quote_betreiber = con.sql(f"""
        select l.buendel, l.ebene,f.datum, f.vu, f.lineshort,f.lineid_short, count(f.hasRealtime) filter (f.hasRealtime = True) ez_true, count(f.*) count, 
        round(ez_true / count * 100, 1) anteil_ez
        from fahrten f
        left outer join linien l on f.lineid_short = l.dlid
        where f.datum >= '{letzte14tage}'              
        group by f.lineid_short, f.vu, f.datum, f.lineshort, f.lineid_short, l.buendel, l.ebene
        order by f.vu, f.lineid_short
        """).df()
df_ez_quote_betreiber['buendel'] = df_ez_quote_betreiber['buendel'].fillna('-')
df_ez_quote_betreiber['ebene'] = df_ez_quote_betreiber['ebene'].fillna('-')
anteil_ez_pivot_betreiber = pd.pivot_table(df_ez_quote_betreiber, index=['buendel','ebene', 'vu', 'lineshort'], columns='datum', values='anteil_ez').reset_index()
anteil_ez_pivot_betreiber

#### nach Linie (ohne Betreiber)

In [None]:
df_ez_quote_o_betreiber = con.sql(f"""
        select l.buendel, l.ebene,f.datum, f.lineshort,f.lineid_short, count(f.hasRealtime) filter (f.hasRealtime = True) ez_true, count(f.*) count, 
        round(ez_true / count * 100, 1) anteil_ez
        from fahrten f        
        left outer join linien l on f.lineid_short = l.dlid      
        where f.datum >= '{letzte14tage}'        
        group by f.lineid_short, f.datum, f.lineshort, f.lineid_short, l.buendel, l.ebene
        order by f.lineid_short
        """).df()
df_ez_quote_o_betreiber['buendel'] = df_ez_quote_o_betreiber['buendel'].fillna('-')
df_ez_quote_o_betreiber['ebene'] = df_ez_quote_o_betreiber['ebene'].fillna('-')
anteil_ez_pivot_o_betreiber = pd.pivot_table(df_ez_quote_o_betreiber, index=['buendel','ebene', 'lineshort'], columns='datum', values='anteil_ez').reset_index()
anteil_ez_pivot_o_betreiber

### Fahrten ohne Echtzeit Ebene 1/1+ und 2

In [54]:
df_fahrten_ohne_ez_ebenen_1_1p_2 = con.sql(f"""
        select f.datum, l.buendel, l.ebene, f.vu, f.fnr, f.lineshort,f.lineid_short, f.hasrealtime, f.journey_cancelled, f.reported_cancelled, f.ts_reported_cancelled
        
        from fahrten f
                                           
        left outer join linien l on f.lineid_short = l.dlid              
        where l.ebene in ('1', '1+') and f.hasrealtime = False and f.datum >= '{letzte14tage}'
                                           
        order by f.datum, f.lineid_short
        """).df()

In [55]:
df_fahrten_ausfall_1_1p_2 = con.sql(f"""
        select f.datum, l.buendel, l.ebene, f.vu, f.fnr, f.lineshort,f.lineid_short, f.hasrealtime, f.journey_cancelled, f.reported_cancelled, f.ts_reported_cancelled
        
        from fahrten f
                                    
        left outer join linien l on f.lineid_short = l.dlid              
        where l.ebene in ('1', '1+', '2') and (journey_cancelled = True or f.reported_cancelled = True) and 
        f.datum >= '{letzte14tage}'                            
        order by f.datum, f.lineid_short
        """).df()

# Ausgabe xlsx EZ Statistiken

In [56]:
xlsx = "/var/www/rt_archiv/anteil_echtzeit_linien_vbn.xlsx"
sn00 = '00 Hilfe'
sn01 = '01 pivot alle Linien betreiber'
sn02 = '02 pivot alle Linien'
sn03 = '03 fahrten ohne EZ 1 1+ 2'
sn04 = '04 fahrten ohne EZ 1 1+ 3 grup'
sn06 = '05 fahrten ausfall'
sn07 = '06 fahrten ausfall über HIM'
with pd.ExcelWriter(xlsx, engine="openpyxl") as writer:
    #Hilfeblatt
    writer.book.create_sheet(sn00)
    sheet = writer.book[sn00]
    sheet['A1'] = f"Erstellt: {dt.datetime.now().strftime('%Y-%m-%d %H:%M')} Zeitraum: {letzte14tage} bis {gestern}"

    sheet['A3'] = "Inhalt"
    sheet['B4'] = f"Blatt {sn01}: Pivot Echtzeitquote inkl. Betreiberkennung"
    sheet['B5'] = f"Blatt {sn02}: Pivot Echtzeitquote ohne Betreiberkennung"
    sheet['B6'] = f"Blatt {sn03}: Fahrten ohne Echtzeit"
    sheet['B7'] = f"Blatt {sn04}: Fahrten ohne Echtzeit mit Anzahl"
    sheet['B8'] = f"Blatt {sn06}: Fahrten Ausfall"
    sheet['B9'] = f"Blatt {sn07}: Fahrten Ausfall über HIM"

    #mit Kennung der Betreiber
    anteil_ez_pivot_betreiber.to_excel(writer, sheet_name=sn01, index=False)
    writer.book[sn01].freeze_panes = 'e2'
    writer.book[sn01].auto_filter.ref='A:H'
    for cell in writer.book[sn01]["1:1"]:
        cell.number_format = 'YYYY-MM-DD'
    writer.book[sn01].column_dimensions['c'].width = 22
    for c in ['D', 'E', 'F', 'G', 'H']:
        writer.book[sn01].column_dimensions[c].width = 22        
    for c in writer.book[sn01].iter_cols(min_col=4, max_col=anteil_ez_pivot_betreiber.shape[1]+4):
                #ermitteln der Spalte column letter
                cl = c[int(f"{anteil_ez_pivot_betreiber.shape[0]}")].column_letter
                writer.book[sn01].column_dimensions[cl].width = 16

    #Anteil EZ ohne Kennung der Betreiber
    anteil_ez_pivot_o_betreiber.to_excel(writer, sheet_name=sn02, index=False)
    writer.book[sn02].freeze_panes = 'd2'
    writer.book[sn02].auto_filter.ref='A:H'
    for cell in writer.book[sn02]["1:1"]:
        cell.number_format = 'YYYY-MM-DD'
    writer.book[sn02].column_dimensions['c'].width = 22
    for c in ['D', 'E', 'F', 'G', 'H']:
        writer.book[sn02].column_dimensions[c].width = 22 
         
    for c in writer.book[sn02].iter_cols(min_col=4, max_col=anteil_ez_pivot_o_betreiber.shape[1]+4):
                #ermitteln der Spalte column letter
                cl = c[int(f"{anteil_ez_pivot_o_betreiber.shape[0]}")].column_letter
                writer.book[sn02].column_dimensions[cl].width = 16

    ## Ausgabe der Fahrten ohne Echtzeit Ebene 1 und 1+ und 2 einzeln
    df_fahrten_ohne_ez_ebenen_1_1p_2.to_excel(writer, sheet_name=sn03, index=False)
    writer.book[sn03].freeze_panes = 'a2'
    writer.book[sn03].auto_filter.ref='A:M'
    for cell in writer.book[sn03]["A"]:
        cell.number_format = 'YYYY-MM-DD'
    writer.book[sn03].column_dimensions['A'].width = 18

    ## Ausgabe der Fahrten ohne Echtzeit Ebene 1 und 1+ und 2 gruppiert mit Anzahl
    df_fahrten_ohne_ez_ebenen_1_1p_2[['vu', 'fnr']].value_counts().reset_index().sort_values(['count', 'vu'], ascending=False).to_excel(writer, sheet_name=sn04, index=False)
    writer.book[sn04].freeze_panes = 'a2'
    writer.book[sn04].auto_filter.ref='A:H'
    writer.book[sn04].column_dimensions['A'].width = 22   

    ## Ausgabe der Fahrten Ausfall Ebene 1, 1+ und 2
    df_fahrten_ausfall_1_1p_2.to_excel(writer, sheet_name=sn06, index=False)
    writer.book[sn06].freeze_panes = 'a2'
    writer.book[sn06].auto_filter.ref='A:M'
    for cell in writer.book[sn06]["A"]:
        cell.number_format = 'YYYY-MM-DD'
    writer.book[sn06].column_dimensions['A'].width = 18

    ## Ausgabe der Fahrten Ausfall über HIM
    df_fahrten_ausfall_him.to_excel(writer, sheet_name=sn07, index=False)
    writer.book[sn07].freeze_panes = 'a2'
    writer.book[sn07].auto_filter.ref='A:M'
    for cell in writer.book[sn07]["A"]:
        cell.number_format = 'YYYY-MM-DD'
    writer.book[sn07].column_dimensions['A'].width = 18

In [None]:
df_stat_rt_canc = con.sql("""
        select 
            vu, 
            count(*) as anzahl, 
            count(*) filter (hasRealtime) as hasRealtime, 
            count(*) filter (realtimeHasEverBeenReported) as realtimeHasEverBeenReported,
            count(*) filter (realtimehaseverbeenreported or hasrealtime) as rt_combined,
            count(*) filter (journey_cancelled) as journey_cancelled,
            count(*) filter (reported_cancelled) as reported_cancelled
        from fahrten
        where datum >= (current_date - interval 3 days)
        group by all
        order by vu""").df()
df_stat_rt_canc

In [58]:
output_file = 'reports/df_stat_rt_canc.xlsx'
sheet_name = 'Stat RT Canc'
df_stat_rt_canc = df_stat_rt_canc.sort_values(by='vu')
with pd.ExcelWriter(output_file, engine='openpyxl') as writer:
    df_stat_rt_canc.to_excel(writer, index=False, sheet_name=sheet_name)
    worksheet = writer.book[sheet_name]
    worksheet.auto_filter.ref = worksheet.dimensions
    worksheet.column_dimensions['A'].width = 30  # Set the width of column A to 30
    worksheet.freeze_panes = 'A2'  # Freeze the first row
    len = df_stat_rt_canc.shape[0]  # Get the number of rows
    worksheet[f'B{len+3}'] = f'=subtotal(9,B2:B{len + 1})'  # Add a sum formula for column B
    worksheet[f'C{len+3}'] = f'=subtotal(9,C2:C{len + 1})'  # Add a sum formula for column C
    worksheet[f'D{len+3}'] = f'=subtotal(9,D2:D{len + 1})'  # Add a sum formula for column D
    worksheet[f'E{len+3}'] = f'=subtotal(9,E2:E{len + 1})'  # Add a sum formula for column E
    worksheet[f'F{len+3}'] = f'=subtotal(9,F2:F{len + 1})'  # Add a sum formula for column F
