# Auswertung der Parquet Dateien aus dem Echtzeitarchiv V14

## Import der Module und Setzen Parameter

In [None]:
import duckdb
import pandas as pd

import datetime as dt

from dotenv import load_dotenv, dotenv_values
import logging
log_file = f"log/log_rt.txt"
logging.basicConfig(filename=log_file, 
                        level=logging.DEBUG,
                        style="{",
                        format="{asctime} [{levelname:8}] {message}",
                        datefmt="%d.%m.%Y %H:%M:%S")

load_dotenv()

In [None]:
logging.info("Auswertung parquet gestartet")

In [None]:
config = dotenv_values(".env")
#config

In [None]:
pd.options.display.max_columns = 100

In [None]:
jetzt = dt.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
gestern= (dt.date.today() - dt.timedelta(1)).strftime('%Y-%m-%d')
letzte07tage= (dt.date.today() - dt.timedelta(7)).strftime('%Y-%m-%d')
letzte14tage= (dt.date.today() - dt.timedelta(14)).strftime('%Y-%m-%d')
letzte21tage= (dt.date.today() - dt.timedelta(21)).strftime('%Y-%m-%d')

print(jetzt, letzte21tage)

## Aufbau der class

In [None]:
class rt_duck:    
    #db_name=':memory:'
    db_name = 'db/rt_archiv.db'
    
    def __init__(self, db_name=db_name):
        # Initialize the DuckDB connection
        self.conn = duckdb.connect(database=db_name)
        self.cursor = self.conn.cursor()
        self.cursor.sql(f"""INSTALL postgres;
                            LOAD postgres;
                            ATTACH 'dbname=zvbn_postgis user={config['POSTGRES_USER']} host=127.0.0.1 password={config['POSTGRES_PW']}' AS db_dm (TYPE POSTGRES, READ_ONLY);"""
                        )
        self.cursor.sql("create or replace table lin_buendel as select * from db_dm.basis.lin_buendel")
        sql_lin = """
        Create or replace table linien as 
        SELECT nummer AS linie, buendel, ebene, dlid, id 
        FROM db_dm.basis.linien 
        WHERE buendel IS NOT NULL AND aktiv IS TRUE 
        ORDER BY buendel, ebene, nummer """
        self.cursor.sql(sql_lin)

    def create_table_fahrten(self, server):
        """ erstellt eine Tabelle fshrten aus den Parquet Files Fahrten fahrten_yyyy_mm_dd.parquet"""
        sql_create = f"create or replace table fahrten as select * from read_parquet('out/parquet/{server}/fahrten*.parquet',  union_by_name = true, filename = true)"
        self.cursor.execute(sql_create)
        #self.cursor("update fahrten ")
        self.cursor.sql("alter table fahrten add column if not exists lineid_short VARCHAR")
        self.cursor.sql("""update fahrten 
                set lineid_short = concat_ws(':', split_part(lineid,':', 1), split_part(lineid,':', 2), split_part(lineid,':', 3))""")
        self.cursor.sql("""select distinct lineid, 
                concat_ws(':', split_part(lineid,':', 1), split_part(lineid,':', 2), split_part(lineid,':', 3)) 
                from fahrten""")

        print("Table 'fahrten' created.")

    def create_table_zusatz(self, server):
        """ erstellt eine Tabelle zusatz aus den Parquet Files Fahrten zusatz_yyyy_mm_dd.parquet"""
        sql_create = f"create or replace table zusatz as select * from read_parquet('out/parquet/{server}/zusatz*.parquet',  union_by_name = true, filename = true)"
        self.cursor.execute(sql_create)
        print("Table 'zusatz' created.")

    def create_table_verlauf(self, server):
        """ erstellt eine Tabelle zusatz aus den Parquet Files Fahrten verlauf_yyyy_mm_dd.parquet"""
        sql_create = f"create or replace table verlauf as select * from read_parquet('out/parquet/{server}/verlauf*.parquet',  union_by_name = true, filename = true)"
        self.cursor.execute(sql_create)
        print("Table 'verlauf' created.")

    def create_vw_buendel(self, buendel):
        """ erstellt sicht auf ein Linienbündel mit dem Namen vw_buendel"""
        sql_buendel = f"""create or replace view vw_buendel as
                                (select f.datum, l.buendel, l.ebene, f.vu, f.fnr, f.lineshort,f.lineid_short, f.hasrealtime, 
                                f.journey_cancelled, f.reported_cancelled, f.ts_reported_cancelled
        
                                from fahrten f                                         
                                left outer join linien l on f.lineid_short = l.dlid 
                                 where buendel like '%{buendel}%') """
        self.cursor.execute(sql_buendel)
        

    def anzahl_fahrten(self):        
        return self.cursor.sql("from fahrten").shape[0]
    
    def anzahl_fahrten_betreiber(self):
        return self.cursor.sql("select vu, count(vu) as count from fahrten group by vu order by count desc")
    
    def verbindung_schließen(self):
        """ Schließen der DB Verbindung"""
        self.conn.close()
        print("Verbindung zur DB geschlossen")

### Testen der class

In [None]:
rt = rt_duck()
rt

In [None]:
#Schließen der Verbindung
#rt.verbindung_schließen()

In [None]:
rt.create_table_fahrten(server = 'prod')

In [None]:
rt.create_table_zusatz(server = 'prod')
rt.create_table_verlauf(server = 'prod')

In [None]:
rt.cursor.sql("""from linien limit 5""")

In [None]:
df_zusatz = rt.cursor.sql("""select datum::date as datum, lineshort, fnr,  vu 
              from zusatz 
              where 
              lineshort in ('630' , '670')
              -- and vu like 'Reisedienst von Rahden%' 
            and datum::date >= (current_date - interval 30 day)
              group by all order 
              by lineshort, fnr """).df()

df_zusatz

In [None]:
rt.cursor.sql("select min(datum )::date as min_date, max(datum)::date as amx_date, count(*) as anzahl from fahrten")

In [None]:
rt.create_vw_buendel('OHZ Ost')

In [None]:
rt.cursor.sql("from fahrten where datum = '2024-11-01' limit 10")

### Häufung von Fahrten ohne Echtzeit

In [None]:
df_fahrten_ohne_ez = rt.cursor.sql("""
              
                select datum::date as datum, ebene, lineshort , fnr, hasrealtime
               
                from vw_buendel 
                where datum >= (current_date - interval 30 day) and hasrealtime = false
                group by all
                order by ebene, lineshort, fnr
    
              """).df()

df_fahrten_ohne_ez_zusatz = df_fahrten_ohne_ez.merge(df_zusatz, left_on = ['datum', 'fnr'], right_on = ['datum', 'fnr'], how='left')
df_fahrten_ohne_ez_zusatz

In [None]:
df_fahrten_mit_nicht_vollstaendiger_echtzeit = rt.cursor.sql("""
              select * from 
                (select ebene, lineshort , fnr, count(*) as anz, count(*) filter (hasRealtime) as anz_rt, round(anz_rt/anz,2) as quote,
                max(datum::date) filter (hasRealtime) as letzte_lieferung
                from vw_buendel 
                where datum >= (current_date - interval 30 day)
                group by all
                order by ebene, lineshort, fnr)
              where quote < 1
              """).df()

df_fahrten_mit_nicht_vollstaendiger_echtzeit

In [None]:
xl = 'out/nicht_vollstaendig.xlsx'
sn01 = '01 fahrten_rt_kl_100_roz'
sn02 = '02 zusatzfahrten'
sn03 = '03 ohne ez merge zusatz'

with pd.ExcelWriter(xl, engine='openpyxl') as writer: 
    df_fahrten_mit_nicht_vollstaendiger_echtzeit.to_excel(writer, index=False, sheet_name=sn01)
    writer.book[sn01].freeze_panes = 'A2'
    writer.book[sn01].auto_filter.ref='A:H'

    df_zusatz.to_excel(writer, index=False, sheet_name=sn02)
    writer.book[sn02].freeze_panes = 'A2'
    writer.book[sn02].auto_filter.ref='A:H'

    df_fahrten_ohne_ez_zusatz.to_excel(writer, index=False, sheet_name=sn03)
    writer.book[sn03].freeze_panes = 'A2'
    writer.book[sn03].auto_filter.ref='A:H'


In [None]:
q = rt.cursor.sql("""
                   (select 
                    datum::date as datum, ebene, lineshort, lineid_short, count(*) anz,
                    count(*) filter (hasRealtime) anz_rt, round(anz_rt/ anz,2) anteil_rt, 
                    max(datum) filter (hasRealtime) letzte_lieferung
                    from vw_buendel 
                    where datum >= date_trunc('month', (date_trunc('month',current_date) - interval 1 day)::date)
                    and datum <= (date_trunc('month',current_date) - interval 1 day)::date
                  
                    group by all

                    order by datum::date)
                  """)
#q.filter("lineshort in ('S35', '350')") #mit filter einfache Abfragen

q

In [None]:
#Abfrage für den letzten Monat
q_pivot_lm = rt.cursor.sql("""
                    pivot (select 
                            datum::date as datum, ebene, lineshort, lineid_short, count(*) anz,
                            count(*) filter (hasRealtime) anz_rt, round(anz_rt/ anz,2) anteil_rt
                        from vw_buendel 
                        where datum >= date_trunc('month', (date_trunc('month',current_date) - interval 1 day)::date)
                            and datum <= (date_trunc('month',current_date) - interval 1 day)::date
                        group by all
                        )
                    on datum
                    using sum(anteil_rt)
                    group by lineshort, ebene
                    order by ebene, lineshort""")

q_pivot_lm.df().fillna('-')

In [None]:
#Abfrage für die letzten 30 Tage
q_pivot_lm = rt.cursor.sql("""
                    pivot (select 
                            datum::date as datum, ebene, lineshort, lineid_short, count(*) anz,
                            count(*) filter (hasRealtime) anz_rt, round(anz_rt/ anz,2) anteil_rt
                        from vw_buendel 
                        where datum >= (current_date - interval 30 day)
                        group by all
                        )
                    on datum
                    using sum(anteil_rt)
                    group by lineshort, ebene
                    order by ebene, lineshort""")

q_pivot_lm.df().style.background_gradient(cmap="RdYlGn", axis = None,  vmin=0.5, vmax=1).highlight_null(color='white').format(precision=2)

In [None]:
rt.anzahl_fahrten_betreiber().df()

In [None]:
logging.info(f"Anzahl Fahrten gesamt {rt.anzahl_fahrten()}")

## Ohne class

In [None]:
con = duckdb.connect()

In [None]:
con.sql(f"""INSTALL postgres;
LOAD postgres;
ATTACH 'dbname=zvbn_postgis user={config['POSTGRES_USER']} host=127.0.0.1 password={config['POSTGRES_PW']}' AS db_dm (TYPE POSTGRES, READ_ONLY);""")

In [None]:
con.sql("create or replace table lin_buendel as select * from db_dm.basis.lin_buendel")
con.sql("select * from lin_buendel")

In [None]:
sql_lin = """
        Create or replace table linien as 
        SELECT nummer AS linie, buendel, ebene, dlid, id 
        FROM db_dm.basis.linien 
        WHERE buendel IS NOT NULL AND aktiv IS TRUE 
        ORDER BY buendel, ebene, nummer """
con.sql(sql_lin)
con.sql("select * from linien")

### Abruf der Parquet Files (Tagespakete)

In [None]:
server = 'prod'
con.sql(f"create or replace table fahrten as select * from read_parquet('out/parquet/{server}/fahrten*.parquet',  union_by_name = true, filename = true)")
con.sql(f"create or replace table verlauf as select * from read_parquet('out/parquet/{server}/verlauf*.parquet',  union_by_name = true, filename = true)")
con.sql(f"create or replace table zusatz as select * from read_parquet('out/parquet/{server}/zusatz*.parquet',  union_by_name = true, filename = true)")

### Ermitteln und Löschen von nicht gewollten Betreibern

In [None]:
#con.sql("select distinct vu from fahrten where vu like '%Weser%'")

In [None]:
con.sql("describe fahrten")

In [None]:
con.sql("select count(*), datum from fahrten group by datum order by datum")

In [None]:
if False: #True / False um ggf. weiterhin alles durchlaufen zu lassen
    print('Löschen von Betreibern')
    con.sql("delete from fahrten where vu not in ('Weser-Ems-Bus Betrieb Bremen', 'Weser-Ems-Bus Auftragnehmerleistungen')")
    #con.sql("delete from verlauf where vu not in ('Weser-Ems-Bus Betrieb Bremen', 'Weser-Ems-Bus Auftragnehmerleistungen')")
    #con.sql("delete from zusatz where vu not in ('Weser-Ems-Bus Betrieb Bremen', 'Weser-Ems-Bus Auftragnehmerleistungen')")

In [None]:
con.sql(f"select count(*) from fahrten where datum >= (current_date - interval 100 days)").df().values.tolist()[0][0]

In [None]:
anzahl_fahrten = con.sql(f"select count(*) from fahrten where datum >= '{letzte14tage}'").df().values.tolist()[0][0]
print(f"""Anzahl Fahrten: {anzahl_fahrten},  Länge Verlauf: {con.sql("select count(*) from verlauf").df().values.tolist()[0][0]}    """) 

In [None]:
con.sql("""select 
            datum, 
            fahrtstartstationname, 
           strftime( cast(fahrtstarttime as TIMESTAMPTZ), '%H:%M') as fahrtstart,
           fahrtendstationname,
           strftime( cast(fahrtendtime as TIMESTAMPTZ), '%H:%M') as fahrtende,
            
            deviceid, 
            split_part(deviceid, '-', 2) as fnr, 
            cast(((cast(split_part(split_part(deviceid, '-', 3), '#', 1) as int64) - 8000000000000) / 1000) as int64) as m2, 
        from fahrten 
        where deviceid like '%680%DBRB%' and datum = '2024-10-29'
        order by datum, fahrtstarttime
        
        """).df()
#.to_excel('out/web.xlsx', index=False)

### Anzahl der Fahrten je Betreiber

In [None]:
con.sql("select journeyOperator, count(journeyOperator) as count from verlauf group by journeyOperator order by count")

### Fahrten mit hohen Verspätungen

In [None]:
con.sql("select distinct deviceid from verlauf where dep_del > 100").df()

In [None]:
con.sql("describe fahrten")

### Verkürzung der DLID
- Zum Teil weren bei mehreren Betreibern einer Linie TLID mit vierteiliger DLID geliefert 
- Verkürzung ermöglicht die Verknüpfung mit Liste aus DM

In [None]:
con.sql("alter table fahrten add column if not exists lineid_short VARCHAR")
con.sql("""update fahrten 
        set lineid_short = concat_ws(':', split_part(lineid,':', 1), split_part(lineid,':', 2), split_part(lineid,':', 3))""")
con.sql("""select distinct lineid, 
        concat_ws(':', split_part(lineid,':', 1), split_part(lineid,':', 2), split_part(lineid,':', 3)) 
        from fahrten""")

### Über HIM gemeldete Ausfälle (ts_reported_cancelled gefüllt)

In [None]:
df_fahrten_ausfall_him = con.sql(f"""
                              select vu, fnr, ts_reported_cancelled, journey_cancelled 
                              from fahrten f 
                              where ts_reported_cancelled != '' and f.datum >= '{letzte14tage}'""").df()

### Echzeitquote

#### nach Linie und Betreiber

In [None]:
df_ez_quote_betreiber = con.sql(f"""
        select l.buendel, l.ebene,f.datum, f.vu, f.lineshort,f.lineid_short, count(f.hasRealtime) filter (f.hasRealtime = True) ez_true, count(f.*) count, 
        round(ez_true / count * 100, 1) anteil_ez
        from fahrten f
        left outer join linien l on f.lineid_short = l.dlid
        where f.datum >= '{letzte14tage}'              
        group by f.lineid_short, f.vu, f.datum, f.lineshort, f.lineid_short, l.buendel, l.ebene
        order by f.vu, f.lineid_short
        """).df()
df_ez_quote_betreiber['buendel'] = df_ez_quote_betreiber['buendel'].fillna('-')
df_ez_quote_betreiber['ebene'] = df_ez_quote_betreiber['ebene'].fillna('-')
anteil_ez_pivot_betreiber = pd.pivot_table(df_ez_quote_betreiber, index=['buendel','ebene', 'vu', 'lineshort'], columns='datum', values='anteil_ez').reset_index()
anteil_ez_pivot_betreiber

#### nach Linie (ohne Betreiber)

In [None]:
df_ez_quote_o_betreiber = con.sql(f"""
        select l.buendel, l.ebene,f.datum, f.lineshort,f.lineid_short, count(f.hasRealtime) filter (f.hasRealtime = True) ez_true, count(f.*) count, 
        round(ez_true / count * 100, 1) anteil_ez
        from fahrten f        
        left outer join linien l on f.lineid_short = l.dlid      
        where f.datum >= '{letzte14tage}'        
        group by f.lineid_short, f.datum, f.lineshort, f.lineid_short, l.buendel, l.ebene
        order by f.lineid_short
        """).df()
df_ez_quote_o_betreiber['buendel'] = df_ez_quote_o_betreiber['buendel'].fillna('-')
df_ez_quote_o_betreiber['ebene'] = df_ez_quote_o_betreiber['ebene'].fillna('-')
anteil_ez_pivot_o_betreiber = pd.pivot_table(df_ez_quote_o_betreiber, index=['buendel','ebene', 'lineshort'], columns='datum', values='anteil_ez').reset_index()
anteil_ez_pivot_o_betreiber

### Fahrten ohne Echtzeit Ebene 1/1+ und 2

In [None]:
df_fahrten_ohne_ez_ebenen_1_1p_2 = con.sql(f"""
        select f.datum, l.buendel, l.ebene, f.vu, f.fnr, f.lineshort,f.lineid_short, f.hasrealtime, f.journey_cancelled, f.reported_cancelled, f.ts_reported_cancelled
        
        from fahrten f
                                           
        left outer join linien l on f.lineid_short = l.dlid              
        where l.ebene in ('1', '1+') and f.hasrealtime = False and f.datum >= '{letzte14tage}'
                                           
        order by f.datum, f.lineid_short
        """).df()

In [None]:
df_fahrten_ausfall_1_1p_2 = con.sql(f"""
        select f.datum, l.buendel, l.ebene, f.vu, f.fnr, f.lineshort,f.lineid_short, f.hasrealtime, f.journey_cancelled, f.reported_cancelled, f.ts_reported_cancelled
        
        from fahrten f
                                    
        left outer join linien l on f.lineid_short = l.dlid              
        where l.ebene in ('1', '1+', '2') and (journey_cancelled = True or f.reported_cancelled = True) and 
        f.datum >= '{letzte14tage}'                            
        order by f.datum, f.lineid_short
        """).df()

# Ausgabe xlsx EZ Statistiken

In [None]:
xlsx = "/var/www/rt_archiv/anteil_echtzeit_linien_vbn.xlsx"
sn00 = '00 Hilfe'
sn01 = '01 pivot alle Linien betreiber'
sn02 = '02 pivot alle Linien'
sn03 = '03 fahrten ohne EZ 1 1+ 2'
sn04 = '04 fahrten ohne EZ 1 1+ 3 grup'
sn06 = '05 fahrten ausfall'
sn07 = '06 fahrten ausfall über HIM'
with pd.ExcelWriter(xlsx, engine="openpyxl") as writer:
    #Hilfeblatt
    writer.book.create_sheet(sn00)
    sheet = writer.book[sn00]
    sheet['A1'] = f"Erstellt: {dt.datetime.now().strftime('%Y-%m-%d %H:%M')} Zeitraum: {letzte14tage} bis {gestern}"

    sheet['A3'] = "Inhalt"
    sheet['B4'] = f"Blatt {sn01}: Pivot Echtzeitquote inkl. Betreiberkennung"
    sheet['B5'] = f"Blatt {sn02}: Pivot Echtzeitquote ohne Betreiberkennung"
    sheet['B6'] = f"Blatt {sn03}: Fahrten ohne Echtzeit"
    sheet['B7'] = f"Blatt {sn04}: Fahrten ohne Echtzeit mit Anzahl"
    sheet['B8'] = f"Blatt {sn06}: Fahrten Ausfall"
    sheet['B9'] = f"Blatt {sn07}: Fahrten Ausfall über HIM"

    #mit Kennung der Betreiber
    anteil_ez_pivot_betreiber.to_excel(writer, sheet_name=sn01, index=False)
    writer.book[sn01].freeze_panes = 'e2'
    writer.book[sn01].auto_filter.ref='A:H'
    for cell in writer.book[sn01]["1:1"]:
        cell.number_format = 'YYYY-MM-DD'
    writer.book[sn01].column_dimensions['c'].width = 22
    for c in ['D', 'E', 'F', 'G', 'H']:
        writer.book[sn01].column_dimensions[c].width = 22        
    for c in writer.book[sn01].iter_cols(min_col=4, max_col=anteil_ez_pivot_betreiber.shape[1]+4):
                #ermitteln der Spalte column letter
                cl = c[int(f"{anteil_ez_pivot_betreiber.shape[0]}")].column_letter
                writer.book[sn01].column_dimensions[cl].width = 16

    #Anteil EZ ohne Kennung der Betreiber
    anteil_ez_pivot_o_betreiber.to_excel(writer, sheet_name=sn02, index=False)
    writer.book[sn02].freeze_panes = 'd2'
    writer.book[sn02].auto_filter.ref='A:H'
    for cell in writer.book[sn02]["1:1"]:
        cell.number_format = 'YYYY-MM-DD'
    writer.book[sn02].column_dimensions['c'].width = 22
    for c in ['D', 'E', 'F', 'G', 'H']:
        writer.book[sn02].column_dimensions[c].width = 22 
         
    for c in writer.book[sn02].iter_cols(min_col=4, max_col=anteil_ez_pivot_o_betreiber.shape[1]+4):
                #ermitteln der Spalte column letter
                cl = c[int(f"{anteil_ez_pivot_o_betreiber.shape[0]}")].column_letter
                writer.book[sn02].column_dimensions[cl].width = 16

    ## Ausgabe der Fahrten ohne Echtzeit Ebene 1 und 1+ und 2 einzeln
    df_fahrten_ohne_ez_ebenen_1_1p_2.to_excel(writer, sheet_name=sn03, index=False)
    writer.book[sn03].freeze_panes = 'a2'
    writer.book[sn03].auto_filter.ref='A:M'
    for cell in writer.book[sn03]["A"]:
        cell.number_format = 'YYYY-MM-DD'
    writer.book[sn03].column_dimensions['A'].width = 18

    ## Ausgabe der Fahrten ohne Echtzeit Ebene 1 und 1+ und 2 gruppiert mit Anzahl
    df_fahrten_ohne_ez_ebenen_1_1p_2[['vu', 'fnr']].value_counts().reset_index().sort_values(['count', 'vu'], ascending=False).to_excel(writer, sheet_name=sn04, index=False)
    writer.book[sn04].freeze_panes = 'a2'
    writer.book[sn04].auto_filter.ref='A:H'
    writer.book[sn04].column_dimensions['A'].width = 22   

    ## Ausgabe der Fahrten Ausfall Ebene 1, 1+ und 2
    df_fahrten_ausfall_1_1p_2.to_excel(writer, sheet_name=sn06, index=False)
    writer.book[sn06].freeze_panes = 'a2'
    writer.book[sn06].auto_filter.ref='A:M'
    for cell in writer.book[sn06]["A"]:
        cell.number_format = 'YYYY-MM-DD'
    writer.book[sn06].column_dimensions['A'].width = 18

    ## Ausgabe der Fahrten Ausfall über HIM
    df_fahrten_ausfall_him.to_excel(writer, sheet_name=sn07, index=False)
    writer.book[sn07].freeze_panes = 'a2'
    writer.book[sn07].auto_filter.ref='A:M'
    for cell in writer.book[sn07]["A"]:
        cell.number_format = 'YYYY-MM-DD'
    writer.book[sn07].column_dimensions['A'].width = 18

In [None]:
df_stat_rt_canc = con.sql("""
        select 
            vu, 
            count(*) as anzahl, 
            count(*) filter (hasRealtime) as hasRealtime, 
            count(*) filter (realtimeHasEverBeenReported) as realtimeHasEverBeenReported,
            count(*) filter (realtimehaseverbeenreported or hasrealtime) as rt_combined,
            count(*) filter (journey_cancelled) as journey_cancelled,
            count(*) filter (reported_cancelled) as reported_cancelled
        from fahrten
        where datum >= (current_date - interval 3 days)
        group by all
        order by vu""").df()
df_stat_rt_canc

In [None]:
output_file = 'reports/df_stat_rt_canc.xlsx'
sheet_name = 'Stat RT Canc'
df_stat_rt_canc = df_stat_rt_canc.sort_values(by='vu')
with pd.ExcelWriter(output_file, engine='openpyxl') as writer:
    df_stat_rt_canc.to_excel(writer, index=False, sheet_name=sheet_name)
    worksheet = writer.book[sheet_name]
    worksheet.auto_filter.ref = worksheet.dimensions
    worksheet.column_dimensions['A'].width = 30  # Set the width of column A to 30
    worksheet.freeze_panes = 'A2'  # Freeze the first row
    len = df_stat_rt_canc.shape[0]  # Get the number of rows
    worksheet[f'B{len+3}'] = f'=subtotal(9,B2:B{len + 1})'  # Add a sum formula for column B
    worksheet[f'C{len+3}'] = f'=subtotal(9,C2:C{len + 1})'  # Add a sum formula for column C
    worksheet[f'D{len+3}'] = f'=subtotal(9,D2:D{len + 1})'  # Add a sum formula for column D
    worksheet[f'E{len+3}'] = f'=subtotal(9,E2:E{len + 1})'  # Add a sum formula for column E
    worksheet[f'F{len+3}'] = f'=subtotal(9,F2:F{len + 1})'  # Add a sum formula for column F
