# GTFS

## GTFS VINTRA

In [155]:
import numpy as np
import pandas as pd
from itables import init_notebook_mode
import os
import subprocess

init_notebook_mode(all_interactive=True)


<IPython.core.display.Javascript object>

In [156]:
working_directory = f'{os.getcwd()}/../../data/gtfs/vintra/'
gtfs_files_directory = f'{working_directory}/gtfs-files/'

gtfs_file_stats_df = pd.DataFrame()

for file in sorted(os.listdir(gtfs_files_directory)):
    if file.endswith('.zip'):
        filename, _, _ = file.partition('.zip')

        p = subprocess.Popen([
            f'java -jar gtfs-validator-301.jar -i gtfs-files/{file} -o reports -v {filename}_report.json -e {filename}_system_errors.json -n -c lt'],
            cwd=working_directory, shell=True, stdout=subprocess.PIPE,
            stderr=subprocess.PIPE)
        out, err = p.communicate(timeout=60)
        errcode = p.returncode

        _, _, gtfs_files_txt = out.decode("utf-8").partition('seconds\n')
        gtfs_files = gtfs_files_txt.splitlines()

        gtfs_files_dict = {'failas': filename}
        for gtfs_file_rep in gtfs_files:
            gtfs_file, c = gtfs_file_rep.split('\t')
            gtfs_files_dict[gtfs_file] = c if c != 'MISSING_FILE' else None

        gtfs_file_stats_df = gtfs_file_stats_df.append(gtfs_files_dict, ignore_index=True, )

gtfs_file_stats_df = gtfs_file_stats_df.reindex(
    columns=[
        'failas',
        'agency.txt',
        'calendar.txt',
        'calendar_dates.txt',
        'routes.txt',
        'shapes.txt',
        'stop_times.txt',
        'stops.txt',
        'trips.txt',
        'fare_attributes.txt',
        'fare_rules.txt',
        'attributions.txt',
        'feed_info.txt',
        'frequencies.txt',
        'levels.txt',
        'pathways.txt',
        'transfers.txt',
        'translations.txt'
    ]
).set_index('failas')


gtfs_file_stats_df[
    [
        'agency.txt',
        'calendar.txt',
        'calendar_dates.txt',
        'routes.txt',
        'shapes.txt',
        'stop_times.txt',
        'stops.txt',
        'trips.txt',
        'fare_attributes.txt',
        'fare_rules.txt',
    ]
] = gtfs_file_stats_df[
    [
        'agency.txt',
        'calendar.txt',
        'calendar_dates.txt',
        'routes.txt',
        'shapes.txt',
        'stop_times.txt',
        'stops.txt',
        'trips.txt',
        'fare_attributes.txt',
        'fare_rules.txt',
    ]
].fillna('❌')

gtfs_file_stats_df.fillna('⚠️', inplace=True)
gtfs_file_stats_df.style.set_sticky(axis="index")

gtfs_file_stats_df

Unnamed: 0,agency.txt,calendar.txt,calendar_dates.txt,routes.txt,shapes.txt,stop_times.txt,stops.txt,trips.txt,fare_attributes.txt,fare_rules.txt,attributions.txt,feed_info.txt,frequencies.txt,levels.txt,pathways.txt,transfers.txt,translations.txt
Loading... (need help?),,,,,,,,,,,,,,,,,


In [157]:
import json

reports_dir = f'{working_directory}/reports/'

gtfs_notices_df = pd.DataFrame()


for file in sorted(os.listdir(reports_dir)):
    if file.endswith('report.json'):
        gtfs_filename, _, _ = file.partition('_report.json')

        with open(os.path.join(reports_dir, file)) as fp:
                data = json.load(fp)

                for notice in data['notices']:
                    gtfs_notices_df = gtfs_notices_df.append({
                        'failas': gtfs_filename,
                        'klaida': notice['code'],
                        'sunkumas': notice['severity'],
                        'viso': notice['totalNotices'],
                    }, ignore_index=True, )


gtfs_notices_df['viso'] = pd.to_numeric(gtfs_notices_df['viso'], downcast='integer')

gtfs_notices_df[gtfs_notices_df['sunkumas'] == 'ERROR']

Unnamed: 0,failas,klaida,sunkumas,viso
Loading... (need help?),,,,


In [158]:
import numpy as np
gtfs_errors_df = gtfs_notices_df[gtfs_notices_df['sunkumas'] == 'ERROR'].drop(columns=['sunkumas']).pivot_table(index='failas', columns='klaida', values='viso')

float_col = gtfs_errors_df.select_dtypes(include=['float64'])

for col in float_col.columns.values:
    gtfs_errors_df[col] = gtfs_errors_df[col].astype('int64', errors='ignore')

gtfs_errors_df = gtfs_errors_df.fillna('')

gtfs_errors_df = gtfs_errors_df.style.apply(lambda x: ["background: orange" if v != '' else '' for v in x], axis = 1)

gtfs_errors_df

klaida,decreasing_or_equal_stop_time_distance,duplicate_fare_rule_zone_id_fields,equal_shape_distance_diff_coordinates,missing_required_file
failas,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
AnyksciuR,,,1.0,
Birstono,,,,5.0
BirzuR,,,,4.0
Druskininku,,,,4.0
IgnalinosR,,,5.0,
JonavosR,,,1.0,
JoniskioR,,,1.0,
JurbarkoR,,,2.0,
Kalvarijos,,,,4.0
KaunoM,,,,4.0
