# Lab 1

## Imports

In [2]:
from collections import defaultdict

import pandas as pd
from datetime import datetime
import numpy as np
from collections import defaultdict

### Data

In [30]:
def normalize_time(time):
    h, m, s = map(int, time.split(':'))
    h = h % 24
    return datetime.strptime(f"{h:02}:{m:02}:{s:02}", "%H:%M:%S")

df = pd.read_csv('connection_graph.csv', index_col=0, dtype={'line': str})
df = df.drop(columns='company')

df['departure_time'] = df['departure_time'].apply(normalize_time)
df['arrival_time'] = df['arrival_time'].apply(normalize_time)
df = df[df.duplicated(keep=False)]
df

Unnamed: 0,line,departure_time,arrival_time,start_stop,end_stop,start_stop_lat,start_stop_lon,end_stop_lat,end_stop_lon
0,A,1900-01-01 20:52:00,1900-01-01 20:53:00,Zajezdnia Obornicka,Paprotna,51.148737,17.021069,51.147752,17.020539
1,A,1900-01-01 20:53:00,1900-01-01 20:54:00,Paprotna,Obornicka (Wołowska),51.147752,17.020539,51.144385,17.023735
2,A,1900-01-01 20:54:00,1900-01-01 20:55:00,Obornicka (Wołowska),Bezpieczna,51.144385,17.023735,51.141360,17.026376
3,A,1900-01-01 20:55:00,1900-01-01 20:57:00,Bezpieczna,Bałtycka,51.141360,17.026376,51.136632,17.030617
4,A,1900-01-01 20:57:00,1900-01-01 20:59:00,Bałtycka,Broniewskiego,51.136632,17.030617,51.135851,17.037383
...,...,...,...,...,...,...,...,...,...
996515,967,1900-01-01 18:38:00,1900-01-01 18:39:00,Smolec - Lipowa/pętla,Smolec - Dębowa (sklep),51.075005,16.884421,51.076932,16.881543
996516,967,1900-01-01 18:39:00,1900-01-01 18:41:00,Smolec - Dębowa (sklep),Krzeptów - skrzy.,51.076932,16.881543,51.088752,16.875808
996517,967,1900-01-01 18:41:00,1900-01-01 18:42:00,Krzeptów - skrzy.,Krzeptów - Boisko,51.088752,16.875808,51.090678,16.885584
996518,967,1900-01-01 18:42:00,1900-01-01 18:43:00,Krzeptów - Boisko,Krzeptów - Dolina Krzeptowa,51.090678,16.885584,51.092268,16.892523


## Exercise 1

### Variables

In [124]:
beginning_stop = "Paprotna"
final_stop = "Kasprowicza"
optimization = ""
starting_time = datetime.strptime("20:53:00", "%H:%M:%S")

In [127]:
class Node:
    def __init__(self, name):
        self.name = name
        self.eta = None
        self.nodes = defaultdict(list)
        self.previous_stop = None

    def __repr__(self):
        return f"Node({self.name}, ETA={self.eta})"

    def reproduce_path(self):
        path = [self]

        previous_stop = self.previous_stop
        while previous_stop is not None:
            path.append(previous_stop)
            previous_stop = previous_stop.previous_stop

        return path


class Graph:
    def __init__(self, data):
        start_stops = data['start_stop'].unique()
        end_stops = data['end_stop'].unique()
        stops = {stop: Node(stop) for stop in np.union1d(start_stops, end_stops)}

        for start_stop, group in data.groupby('start_stop'):
            for row in group.itertuples(index=False):
                stops[start_stop].nodes[row.end_stop].append((row.departure_time, row.arrival_time))

        for node in stops.values():
            for end_stop in node.nodes:
                node.nodes[end_stop].sort(key=lambda x: x[1])

        self.stops = stops

    def run_dijkstra(self):
        current_node: Node = self.stops.pop(beginning_stop)
        current_node.eta = pd.Timedelta(hours=0)
        not_inf_nodes = []
        last_stop_arr = starting_time

        # Updating the neighbour distance
        while current_node.name != final_stop:
            for next_stop_name, timetable in current_node.nodes.items():
                if next_stop_name in self.stops:
                    next_stop = self.stops[next_stop_name]
                    if next_stop not in not_inf_nodes:
                        not_inf_nodes.append(next_stop)

                    next_stop_eta = None
                    for connection in timetable:
                        dep_time, arr_time = connection
                        if dep_time >= last_stop_arr and next_stop_eta is None:
                            tmp_eta = arr_time - last_stop_arr
                            if tmp_eta < pd.Timedelta(hours=0):
                                tmp_eta += pd.Timedelta(hours=24)

                            next_stop_eta = tmp_eta + current_node.eta

                    if next_stop_eta is None:
                        next_stop_eta = timetable[0][1] - last_stop_arr + pd.Timedelta(hours=24)

                    if next_stop.eta is None or next_stop.eta > next_stop_eta:
                        next_stop.eta = next_stop_eta
                        next_stop.previous_stop = current_node

            not_inf_nodes.sort(key=lambda x: x.eta)
            current_node = self.stops.pop(not_inf_nodes.pop(0).name)
            last_stop_arr = starting_time + current_node.eta
        print(starting_time, current_node, last_stop_arr)
        print(current_node.reproduce_path())

y = Graph(df)
y.run_dijkstra()
# df.loc[df['end_stop'] == 'Żórawina - Niepodległości (Mostek)']

1900-01-01 20:53:00 Node(Kasprowicza, ETA=0 days 00:09:00) 1900-01-01 21:02:00 1900-01-01 21:02:00
[Node(Kasprowicza, ETA=0 days 00:09:00), Node(Syrokomli, ETA=0 days 00:08:00), Node(Pola, ETA=0 days 00:07:00), Node(Broniewskiego, ETA=0 days 00:06:00), Node(Bałtycka, ETA=0 days 00:04:00), Node(Bezpieczna, ETA=0 days 00:02:00), Node(Obornicka (Wołowska), ETA=0 days 00:01:00), Node(Paprotna, ETA=0 days 00:00:00)]
