# VINTRA



In [None]:
import numpy as np
import pandas
import pandas as pd
from itables import init_notebook_mode
import os
import subprocess

init_notebook_mode(all_interactive=True)


In [None]:
working_directory = f'{os.getcwd()}/../data/gtfs/vintra/'
gtfs_files_directory = f'{working_directory}/gtfs-files/'

gtfs_file_stats_df = pd.DataFrame()

for file in sorted(os.listdir(gtfs_files_directory)):
    if file.endswith('.zip'):
        filename, _, _ = file.partition('.zip')

        p = subprocess.Popen([
            f'java -jar gtfs-validator-301.jar -i gtfs-files/{file} -o reports -v {filename}_report.json -e {filename}_system_errors.json -n -c lt'],
            cwd=working_directory, shell=True, stdout=subprocess.PIPE,
            stderr=subprocess.PIPE)
        out, err = p.communicate(timeout=60)
        errcode = p.returncode

        _, _, gtfs_files_txt = out.decode("utf-8").partition('seconds\n')
        gtfs_files = gtfs_files_txt.splitlines()

        gtfs_files_dict = {'failas': filename}
        for gtfs_file_rep in gtfs_files:
            gtfs_file, c = gtfs_file_rep.split('\t')
            gtfs_files_dict[gtfs_file] = c if c != 'MISSING_FILE' else None

        gtfs_file_stats_df = gtfs_file_stats_df.append(gtfs_files_dict, ignore_index=True, )

gtfs_file_stats_df = gtfs_file_stats_df.reindex(
    columns=[
        'failas',
        'agency.txt',
        'calendar.txt',
        'calendar_dates.txt',
        'routes.txt',
        'shapes.txt',
        'stop_times.txt',
        'stops.txt',
        'trips.txt',
        'fare_attributes.txt',
        'fare_rules.txt',
        'attributions.txt',
        'feed_info.txt',
        'frequencies.txt',
        'levels.txt',
        'pathways.txt',
        'transfers.txt',
        'translations.txt'
    ]
).set_index('failas')


gtfs_file_stats_df[
    [
        'agency.txt',
        'calendar.txt',
        'calendar_dates.txt',
        'routes.txt',
        'shapes.txt',
        'stop_times.txt',
        'stops.txt',
        'trips.txt',
        'fare_attributes.txt',
        'fare_rules.txt',
    ]
] = gtfs_file_stats_df[
    [
        'agency.txt',
        'calendar.txt',
        'calendar_dates.txt',
        'routes.txt',
        'shapes.txt',
        'stop_times.txt',
        'stops.txt',
        'trips.txt',
        'fare_attributes.txt',
        'fare_rules.txt',
    ]
].fillna('❌')

gtfs_file_stats_df.fillna('⚠️', inplace=True)
gtfs_file_stats_df.style.set_sticky(axis="index")

gtfs_file_stats_df

In [None]:
import json

reports_dir = f'{working_directory}/reports/'

gtfs_notices_df = pd.DataFrame()


for file in sorted(os.listdir(reports_dir)):
    if file.endswith('report.json'):
        gtfs_filename, _, _ = file.partition('_report.json')

        with open(os.path.join(reports_dir, file)) as fp:
                data = json.load(fp)

                for notice in data['notices']:
                    gtfs_notices_df = gtfs_notices_df.append({
                        'failas': gtfs_filename,
                        'klaida': notice['code'],
                        'sunkumas': notice['severity'],
                        'viso': notice['totalNotices'],
                    }, ignore_index=True, )


gtfs_notices_df['viso'] = pd.to_numeric(gtfs_notices_df['viso'], downcast='integer')

## GTFS patikrinimas
### GTFS patikrinimo klaidos

In [None]:
def show_notices_table_by_severity(severity: str) -> pd.DataFrame:
    gtfs_errors_df = gtfs_notices_df[gtfs_notices_df['sunkumas'] == severity].drop(columns=['sunkumas'])

    gtfs_errors_df = gtfs_errors_df.pivot_table(index='failas', columns='klaida', values='viso', aggfunc='sum', margins=True, fill_value=0)


    gtfs_errors_df.style.set_sticky(axis="index")
    gtfs_errors_df = gtfs_errors_df.style.apply(lambda x: ["background: orange" if v >0 else '' for v in x], axis = 1)

    return gtfs_errors_df

show_notices_table_by_severity('ERROR')

### GTFS patikrinimo įspėjimai

In [None]:
show_notices_table_by_severity('WARNING')

### Stotelės

In [None]:
from zipfile import ZipFile
import gtfs_functions as gtfs
import plotly.express as px

all_stops = pd.DataFrame()
for file in sorted(os.listdir(gtfs_files_directory)):
    if file.endswith('.zip') and file != 'gtfs_all.zip':
        filename, _, _ = file.partition('.zip')

        with ZipFile(os.path.join(gtfs_files_directory, file)) as gtfs_zip:
            if "stops.txt" not in gtfs_zip.namelist():
                continue

            stops_csv = gtfs_zip.open("stops.txt")

        stops_df = pd.read_csv(stops_csv)
        stops_df['failas'] = filename
        all_stops = pd.concat([all_stops, stops_df])

mapbox_access_token = open("../.mapbox_token").read()
px.set_mapbox_access_token(mapbox_access_token)

fig = px.scatter_mapbox(
    data_frame=all_stops,
    lat='stop_lat',
    lon='stop_lon',
    mapbox_style="light",
    zoom=6,
    title='Stotelės',
    hover_name='stop_name',
    color='failas',
)


fig.update_layout(
    mapbox_layers=[
        {
            "sourceattribution": '© <a href="https://judumas.vycius.lt" target="_blank">Karolis Vyčius</a> © <a href="https://www.visimarsrutai.lt/gtfs/" target="_blank">Visimarsrutai.lt</a>'
        }
    ])
fig.update_layout(margin={"r":0,"l":0,"b":0})
fig.show()

In [None]:
with ZipFile(os.path.join(gtfs_files_directory, 'google_transit.zip')) as gtfs_zip:
    stops_csv = gtfs_zip.open("stops.txt")

    google_transit_vintra_stops_df = pd.read_csv(stops_csv)

    fig = px.scatter_mapbox(
        data_frame=google_transit_vintra_stops_df,
        lat='stop_lat',
        lon='stop_lon',
        mapbox_style="light",
        zoom=6,
        title='Google Maps stotelės iš Vintra',
        hover_name='stop_name',
    )


    fig.update_layout(
        mapbox_layers=[
            {
                "sourceattribution": '© <a href="https://judumas.vycius.lt" target="_blank">Karolis Vyčius</a> © <a href="https://www.visimarsrutai.lt/gtfs/" target="_blank">Visimarsrutai.lt</a>'
            }
        ])
    fig.update_layout(margin={"r":0,"l":0,"b":0})
    fig.show()
