Getting the station list from page: https://www.veturilo.waw.pl/mapa-stacji/ it is going to provide us with the station adress, 
geo co-ordinates and the amount of available bikes. We don't want to send someone to the station with 0 bikes. Bikes availability is being update almost in real time. 

## Getting the stations co-ordinates

In [77]:
#importing libraries required for web scraping 

import requests
import lxml.html as lh
import pandas as pd

In [78]:
#Get the whole page
url='https://www.veturilo.waw.pl/mapa-stacji/'
page = requests.get(url)
doc = lh.fromstring(page.content)
tr_elements = doc.xpath('//tr')

In [79]:
#Check the length of the first 12 rows
[len(T) for T in tr_elements[:12]]

[5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5]

In [80]:
#Get headers
tr_elements = doc.xpath('//tr')
col=[]
i=0
for t in tr_elements[0]:
    i+=1
    name=t.text_content()
    col.append((name,[]))

In [81]:
col

[('Lokalizacja', []),
 ('Dostępne rowery', []),
 ('Ilość stojaków', []),
 ('Wolne stojaki', []),
 ('Współrzędne', [])]

In [82]:
#Get the table data
for j in range(1,len(tr_elements)):
    T=tr_elements[j] 
    if len(T)!=5:
        break
    i=0
    for t in T.iterchildren():
        data=t.text_content() 
        if i>0:
            try:
                data=int(data)
            except:
                pass
        col[i][1].append(data)
        i+=1

In [83]:
#check the lenght to make sure all of the columns have the same size
[len(C) for (title,C) in col]

[394, 394, 394, 394, 394]

In [84]:
#Assign header to the table
Dict={title:column for (title,column) in col}
Bike_Stations=pd.DataFrame(Dict)

## Cleaning data

In [85]:
#Translate column names from polish to english
Bike_Stations.columns = ['Adress', 'Available_Bikes', 'Bike_Stands','Empty_Stands','Coordinates']

In [45]:
#Drop the columns we don't need Bike_Stands, Empty_Stands.
#Even if there are no emty stands you can leave the bike on the station

In [86]:
del Bike_Stations['Bike_Stands']
del Bike_Stations['Empty_Stands']

In [87]:
#Splitting co-ordinates into 2 rows Latitude and Longtitude
Bike_Stations = Bike_Stations.join(Bike_Stations['Coordinates'].str.split(',', 1, expand=True).rename(columns={0:'Latitude', 1:'Longitude'}))
                                  

In [88]:
#drop Coordinates column
del Bike_Stations['Coordinates']

In [89]:
#check the table to see if we have desired output
Bike_Stations.head()

Unnamed: 0,Adress,Available_Bikes,Latitude,Longitude
0,1 Sierpnia - DK Włochy,14,52.1946888,20.9667823
1,11 Listopada - Ratuszowa,20,52.261449,21.037614
2,11 listopada - Środkowa,14,52.263109,21.0381895
3,Abrahama - Kapelanów AK,12,52.22837,21.08581
4,Afrykańska - Egipska,16,52.2251969,21.0712892


In [90]:
#check types
Bike_Stations.dtypes

Adress             object
Available_Bikes     int64
Latitude           object
Longitude          object
dtype: object

In [91]:
#we need to convert Lat and Long to floats
Bike_Stations["Latitude"] = Bike_Stations.Latitude.astype(float)
Bike_Stations["Longitude"] = Bike_Stations.Longitude.astype(float)

In [92]:
Bike_Stations.dtypes

Adress              object
Available_Bikes      int64
Latitude           float64
Longitude          float64
dtype: object

## Put the stations on the map

#### Installing and importing libraries

In [93]:
pip install geopy

Note: you may need to restart the kernel to use updated packages.


In [94]:
pd.set_option('display.max_columns', None) #show all of the data of the DF to be able to preview better
pd.set_option('display.max_rows', None) #show all of the data of the DF to be able to preview better
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values
import folium # map rendering library
print('Libraries imported.')

Libraries imported.


In [95]:
#Get Warsaw Coordinates 
address = 'Warsaw, Poland'

geolocator = Nominatim(user_agent="warsaw_coordinates")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Warsaw are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Warsaw are 52.2337172, 21.07141112883227.


In [104]:
# create map of Warsaw with the statiotions that have available bikes
map_warsaw = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, adress, available_bikes in zip(Bike_Stations['Latitude'], Bike_Stations['Longitude'], Bike_Stations['Adress'], Bike_Stations['Available_Bikes']):
    label = '{},available bikes: {}'.format(adress, available_bikes)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_warsaw)  
    
map_warsaw

## Get start and end adress as an input and get adresses coordinates 

In [105]:
start_address = 'Lisa-Kuli Leopolda 10, Warsaw, Poland'

geolocator = Nominatim(user_agent="start_point_coordinates")
location = geolocator.geocode(start_address)
latitude_start = location.latitude
longitude_start = location.longitude
print('The geograpical coordinate of start adress are {}, {}.'.format(latitude_start, longitude_start))

end_address = 'Egejska 19, Warsaw, Poland'

geolocator = Nominatim(user_agent="end_point_coordinates")
location = geolocator.geocode(end_address)
latitude_end = location.latitude
longitude_end = location.longitude
print('The geograpical coordinate of end adress are {}, {}.'.format(latitude_end, longitude_end))


The geograpical coordinate of start adress are 52.262623, 20.989109770897436.
The geograpical coordinate of end adress are 52.1806331, 21.056111.


## Add Start and End point to the map

In [106]:
#add start point to the map
label_start = 'Start Point'
label_start = folium.Popup(label_start, parse_html=True)
folium.CircleMarker(
        [latitude_start, longitude_start],
        radius=7,
        popup=label_start,
        color='red',
        fill=True,
        fill_color='#FF0000',
        fill_opacity=0.7,
        parse_html=False).add_to(map_warsaw) 


<folium.features.CircleMarker at 0x1ea37053dc8>

In [108]:
#add end point to the map
label_end = 'End Point'
label_end = folium.Popup(label_end, parse_html=True)
folium.CircleMarker(
        [latitude_end, longitude_end],
        radius=7,
        popup=label_end,
        color='red',
        fill=True,
        fill_color='#FF0000',
        fill_opacity=0.7,
        parse_html=False).add_to(map_warsaw)


<folium.features.CircleMarker at 0x1ea371362c8>

In [109]:
map_warsaw

In [117]:
from geopy import distance

coords_1 = (latitude_start, longitude_start)
coords_2 = (latitude_end, longitude_end)

print(distance.distance(coords_1, coords_2).km)

10.207679084931922


## Calculating distance between Start Point and bike stations with available bikes. 

In [126]:
#create table with stations that have only available bikes
Bike_Stations_Available = Bike_Stations[Bike_Stations.Available_Bikes != 0]

In [130]:
#Calculating distances 
for index, row in Bike_Stations_Available.iterrows():
        coords_1 = (latitude_start, longitude_start)
        coords_2 = (Bike_Stations_Available.loc[index,'Latitude'], Bike_Stations_Available.loc[index,'Longitude'])
        Bike_Stations_Available.loc[index,'Distance'] = distance.distance(coords_1, coords_2).km
        
    

In [132]:
#get top 5 closest stations 
Closes_Stations_Start = Bike_Stations_Available.sort_values('Distance').head(5)

In [133]:
Closes_Stations_Start

Unnamed: 0,Adress,Available_Bikes,Latitude,Longitude,Distance
240,Plac Inwalidów,12,52.264502,20.9896,0.211719
385,Zajączka - Bitwy Pod Rokitną,11,52.261257,20.986319,0.243702
237,Plac Grunwaldzki,18,52.262405,20.980255,0.605042
161,Metro Dworzec Gdański,5,52.257602,20.994498,0.668952
42,Arkadia,28,52.25574,20.984343,0.832194


## Calculating distance between End Point and bike stations(as we are going to return bike to this one of those station we don't care if there are available bikes)

In [134]:
#Calculating distances 
for index, row in Bike_Stations.iterrows():
        coords_1 = (latitude_end, longitude_end)
        coords_2 = (Bike_Stations.loc[index,'Latitude'], Bike_Stations.loc[index,'Longitude'])
        Bike_Stations.loc[index,'Distance'] = distance.distance(coords_1, coords_2).km
        

In [136]:
#get top 5 closest stations 
Closes_Stations_End = Bike_Stations.sort_values('Distance').head(5)

In [137]:
Closes_Stations_End

Unnamed: 0,Adress,Available_Bikes,Latitude,Longitude,Distance
332,Sobieskiego - św. Bonifacego,28,52.181847,21.056282,0.1356
287,Pętla Stegny,27,52.177916,21.049562,0.540431
331,Sobieskiego - Nałęczowska,16,52.17538,21.062904,0.746716
316,Sadyba Best Mall,19,52.1873,21.062551,0.862682
228,PKN Orlen - Śródziemnomorska,44,52.17668,21.04525,0.863388
