In [199]:
%load_ext dotenv
%dotenv

import os
import pandas as pd
from geopy.geocoders import GoogleV3
from geopy.extra.rate_limiter import RateLimiter
import numpy as np
from datetime import timedelta, datetime

import seaborn as sns
import matplotlib.pyplot as plt



The dotenv extension is already loaded. To reload it, use:
  %reload_ext dotenv


In [200]:
df = pd.read_csv('../data/exemple.csv', index_col=0)

In [201]:
df = df[df['od_happy_card'] == 'OUI']

In [202]:
def compute_travel_time(df):
    df['datetime_depart'] = df['date'] + ' ' + df['heure_depart']
    df['datetime_arrivee'] = df['date'] + ' ' + df['heure_arrivee']
    df['datetime_depart'] = pd.to_datetime(df['datetime_depart'])
    df['datetime_arrivee'] = pd.to_datetime(df['datetime_arrivee'])

    df['temps_trajet'] = df['datetime_arrivee'] - df['datetime_depart']
    df['temps_trajet'] = df['temps_trajet'].dt.total_seconds() // 60

    # if negative, the train is a night train
    df['night_train'] = False
    df.loc[df['temps_trajet'] < 0,'night_train'] = True
    df.loc[df['temps_trajet'] < 0,'datetime_arrivee'] = df.loc[df['temps_trajet'] < 0,'datetime_arrivee'] + timedelta(days = 1)
    
    # update temps de trajet accordingly
    df['temps_trajet'] = df['datetime_arrivee'] - df['datetime_depart']
    df['temps_trajet'] = df['temps_trajet'].dt.total_seconds() // 60

    return df

In [203]:
df = compute_travel_time(df)
df['date'] = pd.to_datetime(df['date'])

In [204]:
city_depart = 'PARIS'

In [205]:
days = pd.date_range(datetime.utcnow().date(), (datetime.utcnow()+timedelta(days = 30)).date(), freq = 'D')
week_ends = days[(days.dayofweek == 5) | (days.dayofweek == 6)]

In [206]:
leaving_friday = False
aller_retour = True
week_ends_only = True

# select

In [207]:
poss = df.copy()

In [208]:
if aller_retour:
    poss = poss[poss.origine.str.contains(city_depart) | poss.destination.str.contains(city_depart)]
else:
    poss = poss[poss.origine.str.contains(city_depart)]

In [209]:
def filter_datetime(df, date, hour, kind = 'before'):
    temp = df.loc[date]
    if kind == 'before':
        return temp[temp.index.hour < hour]
    elif kind == 'after':
        return temp[temp.index.hour > hour]
    else:
        return None

In [210]:
poss_al = poss[poss.origine.str.contains('PARIS')]
poss_re = poss[poss.destination.str.contains('PARIS')]

In [211]:
week_ends

DatetimeIndex(['2022-04-02', '2022-04-03', '2022-04-09', '2022-04-10',
               '2022-04-16', '2022-04-17', '2022-04-23', '2022-04-24'],
              dtype='datetime64[ns]', freq=None)

In [212]:
all_combs = []
if week_ends_only:
    for k in range(0,len(week_ends), 2):
        poss_al_we = poss_al[poss_al.date.isin(week_ends[k:k+2])]
        poss_re_we = poss_re[poss_re.date.isin(week_ends[k:k+2])]

        trips = pd.merge(poss_al_we, poss_re_we, left_on = 'destination', right_on = 'origine', how = 'inner', suffixes = ['_al','_re'])

        trips['total_temps_trajet'] = trips['temps_trajet_al'] + trips['temps_trajet_re']
        trips['total_temps_sur_place'] = trips['datetime_depart_re'] - trips['datetime_arrivee_al']

        a = trips.copy()
        combs = a[a['total_temps_sur_place'].dt.total_seconds() > 0]
        combs['count_nuits'] = combs['datetime_depart_re'].dt.date - combs['datetime_arrivee_al'].dt.date


        all_combs.append(combs)
        print(week_ends[k:k+2])
        print(combs.shape)

all_combs = pd.concat(all_combs)


DatetimeIndex(['2022-04-02', '2022-04-03'], dtype='datetime64[ns]', freq=None)
(979, 35)
DatetimeIndex(['2022-04-09', '2022-04-10'], dtype='datetime64[ns]', freq=None)
(908, 35)
DatetimeIndex(['2022-04-16', '2022-04-17'], dtype='datetime64[ns]', freq=None)
(136, 35)
DatetimeIndex(['2022-04-23', '2022-04-24'], dtype='datetime64[ns]', freq=None)
(17, 35)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


In [213]:
min_h_sur_place = 8

In [214]:
combs = all_combs.copy()

In [215]:
combs['share_trajet_sur_place'] = combs['total_temps_trajet'] / combs['total_temps_sur_place'].apply(lambda x: x.total_seconds() // 60)

In [216]:
combs = combs[combs['share_trajet_sur_place'] < 1]

In [218]:
combs = combs.sort_values('total_temps_sur_place', ascending=False)
max_temps = combs.groupby(['date_al','destination_al','count_nuits']).head(1)

# Plot

In [219]:
gg_key = os.getenv("MAPS_API_KEY")

df_cities = pd.read_csv('../data/cities_loc.csv', index_col=0)

In [220]:
df_plot = max_temps.copy()
df_plot['destination_al'] = df_plot['destination_al'] + ', FRANCE'
df_plot['origine_al'] = df_plot['origine_al'] + ', FRANCE'

In [221]:
df_plot = pd.merge(df_plot, df_cities[['lat','lon']], left_on='destination_al', right_index= True)
df_plot = df_plot.rename(columns={'lon':'destination_lon','lat':'destination_lat'})
df_plot = pd.merge(df_plot, df_cities[['lat','lon']], left_on='origine_al', right_index= True)
df_plot = df_plot.rename(columns={'lon':'origine_lon','lat':'origine_lat'})

In [222]:
df_plot[['total_temps_trajet', 'total_temps_sur_place', 'count_nuits','datetime_depart_al','datetime_depart_re']] = df_plot[['total_temps_trajet', 'total_temps_sur_place', 'count_nuits','datetime_depart_al','datetime_depart_re']].astype(str)

In [223]:
center_lat, center_lon = 46.871826, 2.459010

from bokeh.io import show
from bokeh.plotting import gmap, output_file
from bokeh.models import ColumnDataSource, HoverTool, ColorBar, GMapOptions
from bokeh.transform import linear_cmap
from bokeh.palettes import Plasma256 as palette

bokeh_width, bokeh_height = 500,400

def plot_circles(df, lat, lng, zoom=5, map_type='roadmap', save = False):
    gmap_options = GMapOptions(lat=lat, lng=lng, 
                               map_type=map_type, zoom=zoom)

    hover = HoverTool(
        tooltips = [
            # @price refers to the price column
            # in the ColumnDataSource. 
            ('ville', '@destination_al'),
            ('nombre nuit', '@count_nuits'),
            ('temps trajet', '@total_temps_trajet'),
            ('temps sur place', '@total_temps_sur_place'),
            ('train aller', '@datetime_depart_al'),
            ('train retour', '@datetime_depart_re'), 
        ]
    )
    p = gmap(gg_key, gmap_options, title='France', 
             width=bokeh_width, height=bokeh_height,
             tools=[hover, 'reset', 'wheel_zoom', 'pan'])
    # definition of the column data source: 
    source = ColumnDataSource(df)

    # see how we specify the x and y columns as strings, 
    # and how to declare as a source the ColumnDataSource:
    center = p.circle('destination_lon', 'destination_lat', size=10, alpha=1, 
                    source=source)
    if save:
        output_file('train_from_paris_map.html')
    show(p)
    return p

In [224]:
plot_circles(df_plot, center_lat, center_lon, save = False)