# GTFS

## GTFS VINTRA

In [178]:
import numpy as np
import pandas as pd
from itables import init_notebook_mode
import os
import subprocess

init_notebook_mode(all_interactive=True)


<IPython.core.display.Javascript object>

In [179]:
working_directory = f'{os.getcwd()}/../../data/gtfs/vintra/'
gtfs_files_directory = f'{working_directory}/gtfs-files/'

gtfs_file_stats_df = pd.DataFrame()

for file in sorted(os.listdir(gtfs_files_directory)):
    if file.endswith('.zip'):
        filename, _, _ = file.partition('.zip')

        p = subprocess.Popen([
            f'java -jar gtfs-validator-301.jar -i gtfs-files/{file} -o reports -v {filename}_report.json -e {filename}_system_errors.json -n -c lt'],
            cwd=working_directory, shell=True, stdout=subprocess.PIPE,
            stderr=subprocess.PIPE)
        out, err = p.communicate(timeout=60)
        errcode = p.returncode

        _, _, gtfs_files_txt = out.decode("utf-8").partition('seconds\n')
        gtfs_files = gtfs_files_txt.splitlines()

        gtfs_files_dict = {'failas': filename}
        for gtfs_file_rep in gtfs_files:
            gtfs_file, c = gtfs_file_rep.split('\t')
            gtfs_files_dict[gtfs_file] = c if c != 'MISSING_FILE' else None

        gtfs_file_stats_df = gtfs_file_stats_df.append(gtfs_files_dict, ignore_index=True, )

gtfs_file_stats_df = gtfs_file_stats_df.reindex(
    columns=[
        'failas',
        'agency.txt',
        'calendar.txt',
        'calendar_dates.txt',
        'routes.txt',
        'shapes.txt',
        'stop_times.txt',
        'stops.txt',
        'trips.txt',
        'fare_attributes.txt',
        'fare_rules.txt',
        'attributions.txt',
        'feed_info.txt',
        'frequencies.txt',
        'levels.txt',
        'pathways.txt',
        'transfers.txt',
        'translations.txt'
    ]
).set_index('failas')


gtfs_file_stats_df[
    [
        'agency.txt',
        'calendar.txt',
        'calendar_dates.txt',
        'routes.txt',
        'shapes.txt',
        'stop_times.txt',
        'stops.txt',
        'trips.txt',
        'fare_attributes.txt',
        'fare_rules.txt',
    ]
] = gtfs_file_stats_df[
    [
        'agency.txt',
        'calendar.txt',
        'calendar_dates.txt',
        'routes.txt',
        'shapes.txt',
        'stop_times.txt',
        'stops.txt',
        'trips.txt',
        'fare_attributes.txt',
        'fare_rules.txt',
    ]
].fillna('❌')

gtfs_file_stats_df.fillna('⚠️', inplace=True)
gtfs_file_stats_df.style.set_sticky(axis="index")

gtfs_file_stats_df

Unnamed: 0,agency.txt,calendar.txt,calendar_dates.txt,routes.txt,shapes.txt,stop_times.txt,stops.txt,trips.txt,fare_attributes.txt,fare_rules.txt,attributions.txt,feed_info.txt,frequencies.txt,levels.txt,pathways.txt,transfers.txt,translations.txt
Loading... (need help?),,,,,,,,,,,,,,,,,


In [180]:
import json

reports_dir = f'{working_directory}/reports/'

gtfs_notices_df = pd.DataFrame()


for file in sorted(os.listdir(reports_dir)):
    if file.endswith('report.json'):
        gtfs_filename, _, _ = file.partition('_report.json')

        with open(os.path.join(reports_dir, file)) as fp:
                data = json.load(fp)

                for notice in data['notices']:
                    gtfs_notices_df = gtfs_notices_df.append({
                        'failas': gtfs_filename,
                        'klaida': notice['code'],
                        'sunkumas': notice['severity'],
                        'viso': notice['totalNotices'],
                    }, ignore_index=True, )


gtfs_notices_df['viso'] = pd.to_numeric(gtfs_notices_df['viso'], downcast='integer')

In [181]:
### GTFS patikrinimo klaidos

In [182]:
def show_notices_table_by_severity(severity: str) -> pd.DataFrame:
    gtfs_errors_df = gtfs_notices_df[gtfs_notices_df['sunkumas'] == severity].drop(columns=['sunkumas'])

    gtfs_errors_df = gtfs_errors_df.pivot_table(index='failas', columns='klaida', values='viso', aggfunc='sum', margins=True, fill_value=0)


    gtfs_errors_df.style.set_sticky(axis="index")
    gtfs_errors_df = gtfs_errors_df.style.apply(lambda x: ["background: orange" if v >0 else '' for v in x], axis = 1)

    return gtfs_errors_df

show_notices_table_by_severity('ERROR')

klaida,decreasing_or_equal_stop_time_distance,duplicate_fare_rule_zone_id_fields,equal_shape_distance_diff_coordinates,missing_required_file,All
failas,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
AnyksciuR,0,0,1,0,1
Birstono,0,0,0,5,5
BirzuR,0,0,0,4,4
Druskininku,0,0,0,4,4
IgnalinosR,0,0,5,0,5
JonavosR,0,0,1,0,1
JoniskioR,0,0,1,0,1
JurbarkoR,0,0,2,0,2
Kalvarijos,0,0,0,4,4
KaunoM,0,0,0,4,4


### GTFS patikrinimo įspėjimai

In [183]:
show_notices_table_by_severity('WARNING')

klaida,duplicate_route_name,equal_shape_distance_same_coordinates,fast_travel_between_consecutive_stops,fast_travel_between_far_stops,leading_or_trailing_whitespaces,missing_timepoint_column,missing_timepoint_value,same_route_and_agency_url,stop_too_far_from_shape,stop_too_far_from_shape_using_user_distance,stops_match_shape_out_of_order,unexpected_enum_value,All
failas,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
AlytausM,0,0,0,0,0,0,5744,0,0,0,0,0,5744
AlytausR,0,3,0,0,0,0,2885,0,0,0,0,0,2888
AnyksciuR,0,0,0,0,0,0,2627,0,2,0,0,0,2629
Birstono,0,0,0,0,0,1,0,0,0,0,0,0,1
BirzuR,0,0,0,0,0,1,0,0,0,0,0,0,1
Druskininku,0,0,0,0,0,1,0,0,0,0,0,0,1
Elektrenu,0,0,0,0,0,0,434,0,0,0,0,0,434
IgnalinosR,0,0,0,0,0,0,1608,0,5,0,0,0,1613
JonavosR,0,0,0,0,1,0,6866,0,3,0,0,0,6870
JoniskioR,0,4,0,0,0,0,880,0,0,0,0,0,884
