Getting the station list from page: https://www.veturilo.waw.pl/mapa-stacji/ it is going to provide us with the station adress, 
geo co-ordinates and the amount of available bikes. We don't want to send someone to the station with 0 bikes. Bikes availability is being update almost in real time. 

## Getting the stations co-ordinates

In [1]:
#importing libraries required for web scraping 

import requests
import lxml.html as lh
import pandas as pd

In [244]:
#Get the whole page
url='https://www.veturilo.waw.pl/mapa-stacji/'
page = requests.get(url)
doc = lh.fromstring(page.content)
tr_elements = doc.xpath('//tr')

In [245]:
#Check the length of the first 12 rows
[len(T) for T in tr_elements[:12]]

[5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5]

In [246]:
#Get headers
tr_elements = doc.xpath('//tr')
col=[]
i=0
for t in tr_elements[0]:
    i+=1
    name=t.text_content()
    col.append((name,[]))

In [247]:
col

[('Lokalizacja', []),
 ('Dostępne rowery', []),
 ('Ilość stojaków', []),
 ('Wolne stojaki', []),
 ('Współrzędne', [])]

In [248]:
#Get the table data
for j in range(1,len(tr_elements)):
    T=tr_elements[j] 
    if len(T)!=5:
        break
    i=0
    for t in T.iterchildren():
        data=t.text_content() 
        if i>0:
            try:
                data=int(data)
            except:
                pass
        col[i][1].append(data)
        i+=1

In [249]:
#check the lenght to make sure all of the columns have the same size
[len(C) for (title,C) in col]

[394, 394, 394, 394, 394]

In [250]:
#Assign header to the table
Dict={title:column for (title,column) in col}
Bike_Stations=pd.DataFrame(Dict)

## Cleaning data

In [251]:
#Translate column names from polish to english
Bike_Stations.columns = ['Adress', 'Available_Bikes', 'Bike_Stands','Empty_Stands','Coordinates']

In [252]:
#Drop the columns we don't need Bike_Stands, Empty_Stands.
del Bike_Stations['Bike_Stands']
del Bike_Stations['Empty_Stands']

In [253]:
#Splitting co-ordinates into 2 rows Latitude and Longtitude
Bike_Stations = Bike_Stations.join(Bike_Stations['Coordinates'].str.split(',',
    1, expand=True).rename(columns={0:'Latitude', 1:'Longitude'}))
                                  

In [254]:
#drop Coordinates column
del Bike_Stations['Coordinates']

In [255]:
#check the table to see if we have desired output
Bike_Stations.head()

Unnamed: 0,Adress,Available_Bikes,Latitude,Longitude
0,1 Sierpnia - DK Włochy,10,52.1946888,20.9667823
1,11 Listopada - Ratuszowa,23,52.261449,21.037614
2,11 listopada - Środkowa,13,52.263109,21.0381895
3,Abrahama - Kapelanów AK,14,52.22837,21.08581
4,Afrykańska - Egipska,12,52.2251969,21.0712892


In [256]:
#check types
Bike_Stations.dtypes

Adress             object
Available_Bikes     int64
Latitude           object
Longitude          object
dtype: object

In [257]:
#we need to convert Lat and Long to floats
Bike_Stations["Latitude"] = Bike_Stations.Latitude.astype(float)
Bike_Stations["Longitude"] = Bike_Stations.Longitude.astype(float)

In [258]:
Bike_Stations.dtypes

Adress              object
Available_Bikes      int64
Latitude           float64
Longitude          float64
dtype: object

## Put the stations on the map

#### Installing and importing libraries

In [17]:
pip install geopy

Note: you may need to restart the kernel to use updated packages.


In [18]:
pd.set_option('display.max_columns', None) #show all of the data of the DF to be able to preview better
pd.set_option('display.max_rows', None) #show all of the data of the DF to be able to preview better
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values
import folium # map rendering library
print('Libraries imported.')

Libraries imported.


In [259]:
#Get Warsaw Coordinates 
address = 'Warsaw, Poland'

geolocator = Nominatim(user_agent="warsaw_coordinates")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Warsaw are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Warsaw are 52.2337172, 21.07141112883227.


In [260]:
# create map of Warsaw with the statiotions that have available bikes
map_warsaw = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, adress, available_bikes in zip(Bike_Stations['Latitude'], Bike_Stations['Longitude'], Bike_Stations['Adress'], Bike_Stations['Available_Bikes']):
    label = '{},available bikes: {}'.format(adress, available_bikes)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_warsaw)  
    
map_warsaw

## Get start and end adress as an input and get adresses coordinates 

In [261]:
start_address = 'Lisa-Kuli Leopolda 10, Warsaw, Poland'

geolocator = Nominatim(user_agent="start_point_coordinates")
location = geolocator.geocode(start_address)
latitude_start = location.latitude
longitude_start = location.longitude
print('The geograpical coordinate of start adress are {}, {}.'.format(latitude_start, longitude_start))

end_address = 'Egejska 19, Warsaw, Poland'

geolocator = Nominatim(user_agent="end_point_coordinates")
location = geolocator.geocode(end_address)
latitude_end = location.latitude
longitude_end = location.longitude
print('The geograpical coordinate of end adress are {}, {}.'.format(latitude_end, longitude_end))


The geograpical coordinate of start adress are 52.262623, 20.989109770897436.
The geograpical coordinate of end adress are 52.1806331, 21.056111.


## Add Start and End point to the map

In [262]:
#add start point to the map
label_start = 'Start Point'
label_start = folium.Popup(label_start, parse_html=True)
folium.CircleMarker(
        [latitude_start, longitude_start],
        radius=7,
        popup=label_start,
        color='red',
        fill=True,
        fill_color='#FF0000',
        fill_opacity=0.7,
        parse_html=False).add_to(map_warsaw) 


<folium.features.CircleMarker at 0x15c59118e08>

In [263]:
#add end point to the map
label_end = 'End Point'
label_end = folium.Popup(label_end, parse_html=True)
folium.CircleMarker(
        [latitude_end, longitude_end],
        radius=7,
        popup=label_end,
        color='red',
        fill=True,
        fill_color='#FF0000',
        fill_opacity=0.7,
        parse_html=False).add_to(map_warsaw)


<folium.features.CircleMarker at 0x15c588fec88>

In [264]:
map_warsaw

In [265]:
from geopy import distance

coords_1 = (latitude_start, longitude_start)
coords_2 = (latitude_end, longitude_end)

print(distance.distance(coords_1, coords_2).km)

10.207679084931922


## Calculating distance between Start Point and bike stations with available bikes. 

In [266]:
#create table with stations that have only available bikes
Bike_Stations_Available = Bike_Stations[Bike_Stations.Available_Bikes != 0]

In [None]:
#Calculating distances 
for index, row in Bike_Stations_Available.iterrows():
        coords_1 = (latitude_start, longitude_start)
        coords_2 = (Bike_Stations_Available.loc[index,'Latitude'], Bike_Stations_Available.loc[index,'Longitude'])
        Bike_Stations_Available.loc[index,'Distance'] = distance.distance(coords_1, coords_2).km
        
    

In [268]:
#get top 5 closest stations 
Closes_Stations_Start = Bike_Stations_Available.sort_values('Distance').head(5)

In [269]:
Closes_Stations_Start

Unnamed: 0,Adress,Available_Bikes,Latitude,Longitude,Distance
240,Plac Inwalidów,14,52.264502,20.9896,0.211719
385,Zajączka - Bitwy Pod Rokitną,11,52.261257,20.986319,0.243702
237,Plac Grunwaldzki,17,52.262405,20.980255,0.605042
161,Metro Dworzec Gdański,3,52.257602,20.994498,0.668952
42,Arkadia,18,52.25574,20.984343,0.832194


## Calculating distance between End Point and bike stations(as we are going to return bike to this one of those station we don't care if there are available bikes)

LAaer on we can get the distance from distance matrix but this method is faster so I decided to get the distance like this first and after that run distance matrix only for top 5 results

In [270]:
#Calculating distances 
for index, row in Bike_Stations.iterrows():
        coords_1 = (latitude_end, longitude_end)
        coords_2 = (Bike_Stations.loc[index,'Latitude'], Bike_Stations.loc[index,'Longitude'])
        Bike_Stations.loc[index,'Distance'] = distance.distance(coords_1, coords_2).km
        

In [271]:
#get top 5 closest stations 
Closes_Stations_End = Bike_Stations.sort_values('Distance').head(5)

In [272]:
Closes_Stations_End

Unnamed: 0,Adress,Available_Bikes,Latitude,Longitude,Distance
332,Sobieskiego - św. Bonifacego,20,52.181847,21.056282,0.1356
287,Pętla Stegny,17,52.177916,21.049562,0.540431
331,Sobieskiego - Nałęczowska,13,52.17538,21.062904,0.746716
316,Sadyba Best Mall,17,52.1873,21.062551,0.862682
228,PKN Orlen - Śródziemnomorska,39,52.17668,21.04525,0.863388


## Calculate how long it is going to take to walk to the stations

In [72]:
pip install openrouteservice


Collecting openrouteservice
  Downloading openrouteservice-2.2.3-py2.py3-none-any.whl (37 kB)
Installing collected packages: openrouteservice
Successfully installed openrouteservice-2.2.3
Note: you may need to restart the kernel to use updated packages.


In [273]:
import openrouteservice as ors
client = ors.Client(key='key')

In [274]:
#Calculating walking duration from start location to top 5 stations
for index, row in Closes_Stations_Start.iterrows():
        coordinates = [[longitude_start, latitude_start], 
        [Closes_Stations_Start.loc[index,'Longitude'], Closes_Stations_Start.loc[index,'Latitude']]]
        matrix = client.distance_matrix(
            locations=coordinates,
            profile='foot-walking',
            metrics=['duration'],
            validate=False)
        Closes_Stations_Start.loc[index,'Duration_sec'] = matrix['durations'][0][-1]
Closes_Stations_Start

Unnamed: 0,Adress,Available_Bikes,Latitude,Longitude,Distance,Duration_sec
240,Plac Inwalidów,14,52.264502,20.9896,0.211719,234.37
385,Zajączka - Bitwy Pod Rokitną,11,52.261257,20.986319,0.243702,245.81
237,Plac Grunwaldzki,17,52.262405,20.980255,0.605042,521.79
161,Metro Dworzec Gdański,3,52.257602,20.994498,0.668952,669.08
42,Arkadia,18,52.25574,20.984343,0.832194,1051.21


In [275]:
#Calculating walking duration from top 5 bike end bike stations and end location
for index, row in Closes_Stations_End.iterrows():
        coordinates = [[Closes_Stations_End.loc[index,'Longitude'], Closes_Stations_End.loc[index,'Latitude']],
            [longitude_end, latitude_end]]
        matrix = client.distance_matrix(
            locations=coordinates,
            profile='foot-walking',
            metrics=['duration'],
            validate=False)
        Closes_Stations_End.loc[index,'Duration_sec'] = matrix['durations'][0][-1]
Closes_Stations_End

Unnamed: 0,Adress,Available_Bikes,Latitude,Longitude,Distance,Duration_sec
332,Sobieskiego - św. Bonifacego,20,52.181847,21.056282,0.1356,112.18
287,Pętla Stegny,17,52.177916,21.049562,0.540431,461.89
331,Sobieskiego - Nałęczowska,13,52.17538,21.062904,0.746716,565.83
316,Sadyba Best Mall,17,52.1873,21.062551,0.862682,755.99
228,PKN Orlen - Śródziemnomorska,39,52.17668,21.04525,0.863388,722.76


## Calculate the cycling time from top 5 station close to start to top 5 stations close to finish

In [327]:
#Create new data frame with start aadresses crossed with finish adresses
Cycling_Times = Closes_Stations_Start[["Adress", "Latitude","Longitude"]].copy()
Finish_Stations = Closes_Stations_End[["Adress", "Latitude","Longitude"]].copy()
Cycling_Times['key'] = 1
Finish_Stations['key'] = 1
Cycling_Times = pd.merge(Cycling_Times,Finish_Stations, on='key')
del Cycling_Times['key']
Cycling_Times.columns = ['Start_Adress', 'Start_Latitude', 'Start_Longitude',
                         'End_Address', 'End_Latitude', 'End_Longitude']

In [328]:
#calculate how long it takes to cycle from every start point to every end point
for index, row in Cycling_Times.iterrows():
            coordinates = [[Cycling_Times.loc[index,'Start_Longitude'], Cycling_Times.loc[index,'Start_Latitude']],
            [Cycling_Times.loc[index,'End_Longitude'], Cycling_Times.loc[index,'End_Latitude']]]
            matrix = client.distance_matrix(
            locations=coordinates,
            profile='cycling-regular',
            metrics=['duration'],
            validate=False)
            Cycling_Times.loc[index,'Duration_sec'] = matrix['durations'][0][-1]
Cycling_Times

Unnamed: 0,Start_Adress,Start_Latitude,Start_Longitude,End_Address,End_Latitude,End_Longitude,Duration_sec
0,Plac Inwalidów,52.264502,20.9896,Sobieskiego - św. Bonifacego,52.181847,21.056282,2380.18
1,Plac Inwalidów,52.264502,20.9896,Pętla Stegny,52.177916,21.049562,2527.88
2,Plac Inwalidów,52.264502,20.9896,Sobieskiego - Nałęczowska,52.17538,21.062904,2546.56
3,Plac Inwalidów,52.264502,20.9896,Sadyba Best Mall,52.1873,21.062551,2305.06
4,Plac Inwalidów,52.264502,20.9896,PKN Orlen - Śródziemnomorska,52.17668,21.04525,2495.68
5,Zajączka - Bitwy Pod Rokitną,52.261257,20.986319,Sobieskiego - św. Bonifacego,52.181847,21.056282,2375.01
6,Zajączka - Bitwy Pod Rokitną,52.261257,20.986319,Pętla Stegny,52.177916,21.049562,2522.71
7,Zajączka - Bitwy Pod Rokitną,52.261257,20.986319,Sobieskiego - Nałęczowska,52.17538,21.062904,2541.39
8,Zajączka - Bitwy Pod Rokitną,52.261257,20.986319,Sadyba Best Mall,52.1873,21.062551,2299.89
9,Zajączka - Bitwy Pod Rokitną,52.261257,20.986319,PKN Orlen - Śródziemnomorska,52.17668,21.04525,2490.51


## Find the shortest duration of 3 values: duration of walk from start adress to near by stations + cycling duration + duration of walk from near by stations to end address.

In [407]:
#Add Duration of the walk from start address to near by stations 
Cycling_Times_1 = pd.merge(Cycling_Times,Closes_Stations_Start[['Adress','Duration_sec']],
                               left_on = 'Start_Adress' , right_on = 'Adress')

In [408]:
#Add Duration of the walk from near by stations to end address
Cycling_Times_2 = pd.merge(Cycling_Times_1,Closes_Stations_End[['Adress','Duration_sec']], 
                                   left_on = 'End_Address' , right_on = 'Adress')

       

In [409]:
Final_Data = Cycling_Times_2.drop(['Adress_x','Adress_y'], axis = 1) 

In [410]:
Final_Data.columns = ['Station_Start_Adress', 'Station_Start_Latitude', 'Station_Start_Longitude',
                         'Station_End_Address', 'Station_End_Latitude', 'Station_End_Longitude','Cycling_Duration_sec',
                         'Start_Walk_Duration_sec',
                         'End_Walk_Duration_sec']

In [411]:
#Cteate column with total value for durations
Final_Data['Total_Duration_sec'] = Final_Data['Cycling_Duration_sec'] + Final_Data['Start_Walk_Duration_sec'] + Final_Data['End_Walk_Duration_sec']

In [412]:
# finally, select the shortest one and create df to be able to create a route map
Fastest_Way = Final_Data[Final_Data.Total_Duration_sec == Final_Data.Total_Duration_sec.min()]

In [413]:
Fastest_Way

Unnamed: 0,Station_Start_Adress,Station_Start_Latitude,Station_Start_Longitude,Station_End_Address,Station_End_Latitude,Station_End_Longitude,Cycling_Duration_sec,Start_Walk_Duration_sec,End_Walk_Duration_sec,Total_Duration_sec
0,Plac Inwalidów,52.264502,20.9896,Sobieskiego - św. Bonifacego,52.181847,21.056282,2380.18,234.37,112.18,2726.73


## Create the map with fastest route

In [441]:
# create map of Warsaw with the statiotions that have available bikes
final_route = folium.Map(location=[latitude, longitude], tiles='cartodbpositron',zoom_start=11)

In [442]:
#add start point to the map
label_start = 'Start Point'
label_start = folium.Popup(label_start, parse_html=True)
folium.CircleMarker(
        [latitude_start, longitude_start],
        radius=4,
        popup=label_start,
        color='red',
        fill=True,
        fill_color='#FF0000',
        fill_opacity=0.3,
        parse_html=False).add_to(final_route) 

<folium.features.CircleMarker at 0x15c594de148>

In [443]:
#add end point to the map
label_end = 'End Point'
label_end = folium.Popup(label_end, parse_html=True)
folium.CircleMarker(
        [latitude_end, longitude_end],
        radius=4,
        popup=label_end,
        color='red',
        fill=True,
        fill_color='#FF0000',
        fill_opacity=0.3,
        parse_html=False).add_to(final_route)


<folium.features.CircleMarker at 0x15c592e52c8>

In [444]:
#add start station to the map
label_start_s = Fastest_Way['Station_Start_Adress'].values[0]
label_start_s = folium.Popup(label_start_s, parse_html=True)
folium.CircleMarker(
        [Fastest_Way['Station_Start_Latitude'].values[0], Fastest_Way['Station_Start_Longitude'].values[0]],
        radius=4,
        popup=label_start_s,
        color='blue',
        fill=True,
        fill_color='#0000FF',
        fill_opacity=0.3,
        parse_html=False).add_to(final_route)

<folium.features.CircleMarker at 0x15c58538408>

In [445]:
#add end station to the map
label_end_s = Fastest_Way['Station_End_Address'].values[0]
label_end_s = folium.Popup(label_end_s, parse_html=True)
folium.CircleMarker(
        [Fastest_Way['Station_End_Latitude'].values[0], Fastest_Way['Station_End_Longitude'].values[0]],
        radius=4,
        popup=label_end_s,
        color='blue',
        fill=True,
        fill_color='#0000FF',
        fill_opacity=0.3,
        parse_html=False).add_to(final_route)

<folium.features.CircleMarker at 0x15c5928c208>

In [449]:
#Add first walking route
coordinates = [[longitude_start, latitude_start],
               [Fastest_Way['Station_Start_Longitude'].values[0], Fastest_Way['Station_Start_Latitude'].values[0]]]

route = client.directions(
    coordinates=coordinates,
    profile='foot-walking',
    format='geojson',
    validate=False,
)
folium.PolyLine(locations=[list(reversed(coord)) 
                           for coord in 
                           route['features'][0]['geometry']['coordinates']]).add_to(final_route)

<folium.features.PolyLine at 0x15c57375808>

In [446]:
#add bike route
coordinates = [[Fastest_Way['Station_Start_Longitude'].values[0], Fastest_Way['Station_Start_Latitude'].values[0]],
               [Fastest_Way['Station_End_Longitude'].values[0], Fastest_Way['Station_End_Latitude'].values[0]]]

route = client.directions(
    coordinates=coordinates,
    profile='cycling-regular',
    format='geojson',
    validate=False,
)
folium.PolyLine(locations=[list(reversed(coord)) 
                           for coord in 
                           route['features'][0]['geometry']['coordinates']]).add_to(final_route)
 

<folium.features.PolyLine at 0x15c594918c8>

In [447]:
#add end walking route
coordinates = [[Fastest_Way['Station_End_Longitude'].values[0], Fastest_Way['Station_End_Latitude'].values[0]],
               [longitude_end,latitude_end]]

route = client.directions(
    coordinates=coordinates,
    profile='foot-walking',
    format='geojson',
    validate=False,
)
folium.PolyLine(locations=[list(reversed(coord)) 
                           for coord in 
                           route['features'][0]['geometry']['coordinates']]).add_to(final_route)

<folium.features.PolyLine at 0x15c5947c4c8>

In [450]:
#Show the map 
final_route

In [453]:
print('Fastes way is going to tale {} minutes'.format(Fastest_Way['Total_Duration_sec'].values[0] /60)) 

Fastes way is going to tale 45.445499999999996 minutes
