<a href="https://colab.research.google.com/github/vshalisko/python_at_JetBrainsAcademy/blob/main/Project_EasyRideCompany/project_easy_ride_company_stage_5.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [29]:
input_json = ''' [
    {
        "bus_id": 128,
        "stop_id": 1,
        "stop_name": "Prospekt Avenue",
        "next_stop": 3,
        "stop_type": "S",
        "a_time": "08:12"
    },
    {
        "bus_id": 128,
        "stop_id": 3,
        "stop_name": "Elm Street",
        "next_stop": 5,
        "stop_type": "",
        "a_time": "08:19"
    },
    {
        "bus_id": 128,
        "stop_id": 5,
        "stop_name": "Fifth Avenue",
        "next_stop": 7,
        "stop_type": "O",
        "a_time": "08:17"
    },
    {
        "bus_id": 128,
        "stop_id": 7,
        "stop_name": "Sesame Street",
        "next_stop": 0,
        "stop_type": "F",
        "a_time": "08:07"
    },
    {
        "bus_id": 256,
        "stop_id": 2,
        "stop_name": "Pilotow Street",
        "next_stop": 3,
        "stop_type": "S",
        "a_time": "09:20"
    },
    {
        "bus_id": 256,
        "stop_id": 3,
        "stop_name": "Elm Street",
        "next_stop": 6,
        "stop_type": "",
        "a_time": "09:45"
    },
    {
        "bus_id": 256,
        "stop_id": 6,
        "stop_name": "Sunset Boulevard",
        "next_stop": 7,
        "stop_type": "",
        "a_time": "09:44"
    },
    {
        "bus_id": 256,
        "stop_id": 7,
        "stop_name": "Sesame Street",
        "next_stop": "0",
        "stop_type": "F",
        "a_time": "10:12"
    },
    {
        "bus_id": 512,
        "stop_id": 4,
        "stop_name": "Bourbon Street",
        "next_stop": 6,
        "stop_type": "S",
        "a_time": "08:13"
    },
    {
        "bus_id": 512,
        "stop_id": 6,
        "stop_name": "Sunset Boulevard",
        "next_stop": 0,
        "stop_type": "F",
        "a_time": "08:16"
    }
] '''

## Stages 1 & 2

In [35]:
import json
import re

data = json.loads(input_json)

errors = {
    "total": 0,
    "bus_id": 0,
    "stop_id": 0,
    "stop_name": 0,
    "next_stop": 0,
    "stop_type": 0,
    "a_time": 0,
}

required_fields = ["bus_id", "stop_id", "stop_name", "next_stop", "a_time"]
fields_in_order = ["bus_id", "stop_id", "stop_name", "next_stop", "stop_type", "a_time"]

field_types = {
    "bus_id": int,
    "stop_id": int,
    "stop_name": str,
    "next_stop": int,
    "stop_type": str,
    "a_time": str,
}

for record in data:
    for field in required_fields:
        if field not in record or record[field] == "":
            errors[field] += 1
            errors["total"] += 1

    for field, field_type in field_types.items():
        if field in record and record[field] != "":
            if field == "stop_type":
                if not (isinstance(record[field], field_type) and (len(record[field]) == 1 or len(record[field]) == 0)):
                     errors[field] += 1
                     errors["total"] += 1
                if not (record[field] in ["S", "O", "F"]):
                     errors[field] += 1
                     errors["total"] += 1
            elif field == "stop_name":
                 if not re.match(r"^[A-Z][a-zA-Z\s]* (Street|Road|Avenue|Boulevard)$", record[field]):
                    errors[field] += 1
                    errors["total"] += 1
            elif field == "a_time":
                if not re.match(r"^(0[0-9]|1[0-9]|2[0-3]):[0-5][0-9]$", record[field]):
                     errors[field] += 1
                     errors["total"] += 1
            else:
                if not isinstance(record[field], field_type):
                    errors[field] += 1
                    errors["total"] += 1



### Stage 5

In [36]:
from datetime import datetime

bus_times = {}
for record in data:
    bus_id = record.get("bus_id")
    a_time = record.get("a_time")
    stop_name = record.get("stop_name")
    if bus_id is not None and a_time is not None and stop_name is not None:
        if bus_id not in bus_times:
            bus_times[bus_id] = []
        bus_times[bus_id].append({"stop_name": stop_name, "a_time": a_time})

a_time_errors = 0
for bus_id, stops in bus_times.items():
    for i in range(len(stops) - 1):
        current_time_str = stops[i]["a_time"]
        next_time_str = stops[i+1]["a_time"]

        try:
            current_time = datetime.strptime(current_time_str, '%H:%M')
            next_time = datetime.strptime(next_time_str, '%H:%M')

            if next_time <= current_time:
                #print(f"bus_id: {bus_id}, stop_name: {stops[i+1]['stop_name']}")
                a_time_errors += 1
                break # Stop checking this bus line after the first error
        except ValueError:
            # This case is already handled in Stage 1 and 2 validation
            pass

errors["a_time"] += a_time_errors
errors["total"] += a_time_errors

#print(f"Arrival time validation: {a_time_errors} errors")


print(f"Type and field validation: {errors['total']} errors")
for field in fields_in_order:
    if field != "total":
        print(f"{field}: {errors[field]}")

Type and field validation: 3 errors
bus_id: 0
stop_id: 0
stop_name: 0
next_stop: 1
stop_type: 0
a_time: 2


### Stage 3

In [33]:
bus_stops = {}
for record in data:
    bus_id = record.get("bus_id")
    stop_id = record.get("stop_id")
    if bus_id is not None and stop_id is not None:
        if bus_id not in bus_stops:
            bus_stops[bus_id] = set()
        bus_stops[bus_id].add(stop_id)

for bus_id in sorted(bus_stops.keys()):
    print(f"bus_id: {bus_id} stops: {len(bus_stops[bus_id])}")

bus_id: 128 stops: 4
bus_id: 256 stops: 4
bus_id: 512 stops: 2


### Stage 4

In [34]:
break_flag = False
for bus_id in bus_stops_set:
    if len(start_stops.get(bus_id, [])) != 1 or len(finish_stops.get(bus_id, [])) != 1:
        print(f"There is no start or end stop for the line: {bus_id}")
        break_flag = True
        break

if not break_flag:
    all_stops = {}
    for record in data:
        bus_id = record.get("bus_id")
        stop_id = record.get("stop_id")
        stop_name = record.get("stop_name")
        if bus_id is not None and stop_id is not None and stop_name is not None:
            if stop_name not in all_stops:
                all_stops[stop_name] = []
            all_stops[stop_name].append(bus_id)


    start_stop_names = set()
    finish_stop_names = set()
    transfer_stop_names = set()

    for record in data:
        stop_type = record.get("stop_type")
        stop_name = record.get("stop_name")
        if stop_name and stop_type:
            if stop_type == "S":
                start_stop_names.add(stop_name)
            elif stop_type == "F":
                finish_stop_names.add(stop_name)

    for stop_name, bus_ids in all_stops.items():
        if len(set(bus_ids)) >= 2:
            transfer_stop_names.add(stop_name)

    print(f"\nStart stops: {len(start_stop_names)} {sorted(list(start_stop_names))}")
    print(f"Transfer stops: {len(transfer_stop_names)} {sorted(list(transfer_stop_names))}")
    print(f"Finish stops: {len(finish_stop_names)} {sorted(list(finish_stop_names))}")


Start stops: 3 ['Bourbon Street', 'Pilotow Street', 'Prospekt Avenue']
Transfer stops: 3 ['Elm Street', 'Sesame Street', 'Sunset Boulevard']
Finish stops: 2 ['Sesame Street', 'Sunset Boulevard']
