# Import des datasets SNCF pour INTERCITES, TER & TGV

#### Importer les données GTFS des INTERCITES

In [None]:
import pandas as pd

stops_ter = pd.read_csv('../data/input/ter_sncf/stops.csv')
trips_ter = pd.read_csv('../data/input/ter_sncf/trips.csv')
stop_times_ter = pd.read_csv('../data/input/ter_sncf/stop_times.csv')
routes_ter = pd.read_csv('../data/input/ter_sncf/routes.csv')
calendar_dates_ter = pd.read_csv('../data/input/ter_sncf/calendar_dates.csv')

stops_intercites = pd.read_csv('../data/input/intercites/stops.csv')
trips_intercites = pd.read_csv('../data/input/intercites/trips.csv')
stop_times_intercites = pd.read_csv('../data/input/intercites/stop_times.csv')
routes_intercites = pd.read_csv('../data/input/intercites/routes.csv')
calendar_dates_intercites = pd.read_csv('../data/input/intercites/calendar_dates.csv')

stops_tgv = pd.read_csv('../data/input/tgv_sncf/stops.txt')
trips_tgv = pd.read_csv('../data/input/tgv_sncf/trips.txt')
stop_times_tgv = pd.read_csv('../data/input/tgv_sncf/stop_times.txt')
routes_tgv = pd.read_csv('../data/input/tgv_sncf/routes.txt')
calendar_dates_tgv = pd.read_csv('../data/input/tgv_sncf/calendar_dates.txt')

# Calcul des temps de trajets en fonction des horaires et des trajets enregistrés

In [None]:
stops_filtered_ter = stops_ter[['stop_id', 'stop_name', 'stop_lat', 'stop_lon']]
stops_filtered_tgv = stops_tgv[['stop_id', 'stop_name', 'stop_lat', 'stop_lon']]

### Trajets TER

In [None]:
stop_times_trips_ter = pd.merge(stop_times_ter, trips_ter, on='trip_id')

stop_times_trips_ter['departure_time'] = pd.to_timedelta(stop_times_trips_ter['departure_time'])
stop_times_trips_ter['arrival_time'] = pd.to_timedelta(stop_times_trips_ter['arrival_time'])

stop_times_trips_ter['next_stop_id'] = stop_times_trips_ter.groupby('trip_id')['stop_id'].shift(-1)
stop_times_trips_ter['next_departure_time'] = stop_times_trips_ter.groupby('trip_id')['departure_time'].shift(-1)

stop_times_trips_ter['travel_time'] = (stop_times_trips_ter['next_departure_time'] - stop_times_trips_ter['departure_time']).dt.total_seconds() / 60.0

stop_times_trips_ter

### Trajets TGV

In [None]:
stop_times_trips_tgv = pd.merge(stop_times_tgv, trips_tgv, on='trip_id')

stop_times_trips_tgv['departure_time'] = pd.to_timedelta(stop_times_trips_tgv['departure_time'])
stop_times_trips_tgv['arrival_time'] = pd.to_timedelta(stop_times_trips_tgv['arrival_time'])

stop_times_trips_tgv['next_stop_id'] = stop_times_trips_tgv.groupby('trip_id')['stop_id'].shift(-1)
stop_times_trips_tgv['next_departure_time'] = stop_times_trips_tgv.groupby('trip_id')['departure_time'].shift(-1)

stop_times_trips_tgv['travel_time'] = (stop_times_trips_tgv['next_departure_time'] - stop_times_trips_tgv['departure_time']).dt.total_seconds() / 60.0

stop_times_trips_tgv

# Formatage des datasets et extraction du nom de la ville & positions GPS 

### Trajets TER

In [None]:
connections_ter = stop_times_trips_ter[['stop_id', 'next_stop_id', 'travel_time', 'trip_id']].dropna()

connections_ter = pd.merge(connections_ter, stops_filtered_ter, left_on='stop_id', right_on='stop_id')
connections_ter = pd.merge(connections_ter, stops_filtered_ter[['stop_id', 'stop_name']], left_on='next_stop_id', right_on='stop_id', suffixes=('_start', '_end'))

connections_ter = connections_ter.rename(columns={'stop_name_start': 'departure_station', 'stop_name_end': 'arrival_station'})

connections_ter = connections_ter[connections_ter['travel_time'] > 0].drop_duplicates()

connections_ter["trip_type"] = "TER"

connections_ter['departure_city'] = connections_ter["departure_station"].apply(lambda x: x.split(" ")[0] if x.split(" ")[0] not in ["La", "Le", "Les"] else x)
connections_ter['arrival_city'] = connections_ter['arrival_station'].apply(lambda x: x.split(" ")[0] if x.split(" ")[0] not in ["La", "Le"] else x)

connections_ter = connections_ter.merge(stops_filtered_ter, how='left', left_on='stop_id_start', right_on='stop_id', suffixes=('', '_start'))

connections_ter.rename(columns={'stop_lat': 'stop_lat_start', 'stop_lon': 'stop_lon_start'}, inplace=True)

stops_df_ter_end = stops_filtered_ter.rename(columns={'stop_id': 'stop_id_end', 'stop_lat': 'stop_lat_end', 'stop_lon': 'stop_lon_end'})

connections_ter = connections_ter.merge(stops_df_ter_end, how='left', on='stop_id_end', suffixes=('', '_end'))

stops_df_ter_end = stops_filtered_ter.rename(columns={'stop_id_start': 'stop_id_end', 'stop_lat': 'stop_lat_end', 'stop_lon': 'stop_lon_end'})

connections_ter = connections_ter[['stop_id_start', 'stop_id_end', 'travel_time', 'trip_id', 'departure_station',
         'stop_lat_start', 'stop_lon_start', 'stop_lat_end', 'stop_lon_end',
         'arrival_station', 'trip_type', 'departure_city', 'arrival_city']]

connections_ter = connections_ter.loc[:,~connections_ter.columns.duplicated()].copy()

connections_ter

### Trajets TGV

In [None]:
connections_tgv = stop_times_trips_tgv[['stop_id', 'next_stop_id', 'travel_time', 'trip_id']].dropna()

connections_tgv = pd.merge(connections_tgv, stops_filtered_tgv, left_on='stop_id', right_on='stop_id')
connections_tgv = pd.merge(connections_tgv, stops_filtered_tgv[['stop_id', 'stop_name']], left_on='next_stop_id', right_on='stop_id', suffixes=('_start', '_end'))

connections_tgv = connections_tgv.rename(columns={'stop_name_start': 'departure_station', 'stop_name_end': 'arrival_station'})

connections_tgv = connections_tgv[connections_tgv['travel_time'] > 0].drop_duplicates()

connections_tgv["trip_type"] = "TGV"

connections_tgv['departure_city'] = connections_tgv["departure_station"].apply(lambda x: x.split(" ")[0] if x.split(" ")[0] not in ["La", "Le", "Les"] else x)
connections_tgv['arrival_city'] = connections_tgv['arrival_station'].apply(lambda x: x.split(" ")[0] if x.split(" ")[0] not in ["La", "Le"] else x)

connections_tgv = connections_tgv.merge(stops_filtered_tgv, how='left', left_on='stop_id_start', right_on='stop_id', suffixes=('', '_start'))

connections_tgv.rename(columns={'stop_lat': 'stop_lat_start', 'stop_lon': 'stop_lon_start'}, inplace=True)

stops_df_tgv_end = stops_filtered_tgv.rename(columns={'stop_id': 'stop_id_end', 'stop_lat': 'stop_lat_end', 'stop_lon': 'stop_lon_end'})

connections_tgv = connections_tgv.merge(stops_df_tgv_end, how='left', on='stop_id_end', suffixes=('', '_end'))

stops_df_tgv_end = stops_filtered_tgv.rename(columns={'stop_id_start': 'stop_id_end', 'stop_lat': 'stop_lat_end', 'stop_lon': 'stop_lon_end'})

connections_tgv = connections_tgv[['stop_id_start', 'stop_id_end', 'travel_time', 'trip_id', 'departure_station',
         'stop_lat_start', 'stop_lon_start', 'stop_lat_end', 'stop_lon_end',
         'arrival_station', 'trip_type', 'departure_city', 'arrival_city']]

connections_tgv = connections_tgv.loc[:,~connections_tgv.columns.duplicated()].copy()



# Fusion des deux datasets

In [None]:
connections = pd.concat([connections_tgv, connections_ter], ignore_index=True)

connections = connections.loc[connections.groupby(['departure_station', 'arrival_station'])['travel_time'].idxmin()].reset_index(drop=True)

connections

connections[(connections["departure_city"] == "Bordeaux") & (connections["arrival_city"] == "Marseille")]

# Calcul de la distance en KM entre la ville de départ et la ville d'arrivée (Pas optimisée)

In [None]:
import geopy.distance

def calculate_distance(row):
    start_coords = (row['stop_lat_start'], row['stop_lon_start'])
    end_coords = (row['stop_lat_end'], row['stop_lon_end'])
    return geopy.distance.geodesic(start_coords, end_coords).kilometers

connections['distance_km'] = connections.apply(calculate_distance, axis=1)

# Génération du Graph d'après le dataset formaté

In [None]:
import networkx as nx

G = nx.DiGraph()

for index, row in connections.iterrows():
    G.add_edge(row['departure_station'], row['arrival_station'], weight=row['travel_time'])

In [None]:
from networkx import shortest_path, shortest_path_length

shortest_path(G, source='Bordeaux Saint-Jean', target='Marseille Saint-Charles', weight="weight"), shortest_path_length(G, source='Bordeaux Saint-Jean', target='Marseille Saint-Charles', weight="weight")