<a href="https://colab.research.google.com/github/yair-go/DataBusMangemnet/blob/main/BusManagement.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
import pandas as pd
import datetime

In [3]:
source_directory = '/content/drive/MyDrive/BusManagement/Buses_data/'

# Create XML

In [3]:
def get_xml_data(df, element_name):
    xml_data = []
    for field in df.index:
        xml_data.append('<{0}>'.format(element_name))  # Opening element tag
        for column in df.columns:
            xml_data.append('<{0}>{1}</{0}>'.format(column, df[column][field]))
        xml_data.append('</{0}>'.format(element_name))  # Closing element tag
    return xml_data


def save_xml_file(xml_data, root_element):
    filename = root_element + '.xml'
    xml_data.insert(0, '<{0}>'.format(root_element))
    xml_data.append('</{0}>'.format(root_element))
    with open(filename, 'w') as f:  # Writing in XML file
        for line in xml_data:
            f.write(line)

# Adjacent Stations

In [8]:
df = pd.read_csv(source_directory + 'stop_times.csv', header=0)

In [5]:
df.head()

Unnamed: 0,trip_id,arrival_time,departure_time,stop_id,stop_sequence,pickup_type,drop_off_type,shape_dist_traveled
0,10033625_091120,07:50:00,07:50:00,14050,1,0,1,0
1,10033625_091120,07:51:55,07:51:55,29369,2,0,1,457
2,10033625_091120,07:55:00,07:55:00,14073,3,0,1,1163
3,10033625_091120,07:55:42,07:55:42,14074,4,0,1,1337
4,10033625_091120,07:57:05,07:57:05,14072,5,0,1,1682


In [None]:
adj = df[["trip_id", "arrival_time", "stop_id", "stop_sequence", "shape_dist_traveled"]]

In [13]:
adj.head()

Unnamed: 0,trip_id,arrival_time,stop_id,stop_sequence,shape_dist_traveled
0,10033625_091120,07:50:00,14050,1,0
1,10033625_091120,07:51:55,29369,2,457
2,10033625_091120,07:55:00,14073,3,1163
3,10033625_091120,07:55:42,14074,4,1337
4,10033625_091120,07:57:05,14072,5,1682


In [15]:
grouped = adj.groupby(['trip_id'])

In [None]:
def get_AverageTime(frame: pd.DataFrame):
    avg_time = map(lambda t: t - pd.Timedelta(frame["arrival_time"][frame.index[0]]), pd.to_timedelta(frame["arrival_time"]))
    return list(map(lambda t: t.seconds, avg_time))[1:]

In [None]:
def get_distance(frame):
    a = frame["shape_dist_traveled"]
    dist = [t - s for s, t in zip(a, a[1:])]
    return dist

In [None]:
def create_AdjacentStationsList(grouped):
    num_of_groups = len(grouped)
    adjacent_stations = pd.DataFrame()

    i = 1
    for trip_id, frame in grouped:
        station1 = frame["stop_id"][:-1]
        station2 = frame["stop_id"].to_list()[1:]
        d = {"AverageTime": get_AverageTime(frame),
             "Distance": get_distance(frame),
             "Station1": station1,
             "Station2": station2,
             "key": [str(s) + str(t) for s, t in zip(station1, station2)]
             }
        df2 = pd.DataFrame.from_dict(d)
        adjacent_stations = pd.concat([adjacent_stations, df2]).drop_duplicates('key').reset_index(drop=True)
        if 0 == i % 1000:
            print('{0} / {1}'.format(i, num_of_groups))
        i += 1
    xml_data = get_xml_data(adjacent_stations, "AdjacentStations")
    save_xml_file(xml_data, "AdjacentStationsList")

In [None]:
create_AdjacentStationsList(grouped)

# Stops

In [4]:
stops = pd.read_csv(source_directory + 'stops.csv', header=0)

In [5]:
xml_data = get_xml_data(stops, "stop")
save_xml_file(xml_data, "stops")

# Lines

In [8]:
routes = pd.read_csv(source_directory + 'routes.csv', header=0)

In [9]:
routes.head(15)

Unnamed: 0,route_id,agency_id,route_short_name,route_long_name,route_desc,route_type,route_color
0,1,25,1,ת. רכבת יבנה מערב-יבנה<->ת. רכבת יבנה מזרח-יבנ...,67001-1-#,3,
1,2,25,1,ת. רכבת יבנה מזרח-יבנה<->ת. רכבת יבנה מערב-יבנ...,67001-2-#,3,
2,3,25,2,ת. רכבת יבנה מערב-יבנה<->ת. רכבת יבנה מזרח-יבנ...,56002-1-#,3,
3,5,25,2,ת. רכבת יבנה מזרח-יבנה<->ת. רכבת יבנה מערב-יבנ...,56002-2-#,3,
4,7,25,3,ת. רכבת יבנה מערב-יבנה<->ת. רכבת יבנה מזרח-יבנ...,49003-1-#,3,
5,8,25,3,ת. רכבת יבנה מזרח-יבנה<->ת. רכבת יבנה מערב-יבנ...,49003-2-#,3,
6,9,25,4,ת. רכבת יבנה מערב-יבנה<->ת. רכבת יבנה מזרח-יבנ...,47004-1-#,3,
7,10,25,4,ת. רכבת יבנה מזרח-יבנה<->ת. רכבת יבנה מערב-יבנ...,47004-2-#,3,
8,11,25,5,ת. רכבת יבנה מערב-יבנה<->הרימון/הברוש-בן זכאי-1#,41005-1-#,3,
9,12,25,5,הרימון/הברוש-בן זכאי<->ת. רכבת יבנה מערב-יבנה-2#,41005-2-#,3,


In [6]:
lines = pd.read_csv(source_directory + 'trips.csv', header=0)

In [14]:
lines.des()

Unnamed: 0,route_id,service_id,direction_id,shape_id
count,205637.0,205637.0,205637.0,205637.0
mean,12452.463501,6975.660937,0.455983,111107.454636
std,7871.797839,4400.811028,0.49806,10080.30877
min,1.0,1.0,0.0,51356.0
25%,6535.0,2732.0,0.0,108597.0
50%,11514.0,7355.0,0.0,114876.0
75%,17558.0,10516.0,1.0,117747.0
max,28418.0,15131.0,1.0,119528.0
