In [310]:
import requests # library to handle requests
import pandas as pd # library for data analsysis
import numpy as np # library to handle data in a vectorized manner
import random # library for random number generation
import csv 

from bs4 import BeautifulSoup # library for scraping data 

# libraries for displaying images
from IPython.display import Image 
from IPython.core.display import HTML 

from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

# Matplotlib and associated plotting modules
import matplotlib.cm as cm
import matplotlib.colors as colors
    
# tranforming json file into a pandas dataframe library
from pandas.io.json import json_normalize

import folium # plotting library

print('Folium installed')
print('Libraries imported.')

Folium installed
Libraries imported.


In [311]:
#getting data of bicycle usage

def scrape_data(url):     #defining the function scrape_data with parameter url

    response = requests.get(url, timeout=10)
    soup = BeautifulSoup(response.content, 'html.parser')

    table = soup.find_all('table')[0]

    rows = table.select('tbody > tr')     #selects all the tr elements from tbody

    header = ['City', 'Usage', 'Year']   #defines headers of the table

    with open('bicycle.csv', 'w') as csv_file:
        writer = csv.writer(csv_file)
        writer.writerow(header)
        for row in rows[1:]:    #looping through the table to write data to the file
            data = [th.text.rstrip() for th in row.find_all('td')]
            writer.writerow(data)


if __name__=="__main__":
    url = "https://ecf.com/resources/cycling-facts-and-figures"
    scrape_data(url)
    
bicycle_usage = pd.read_csv('bicycle.csv')
bicycle_usage.head()

Unnamed: 0,City,Usage,Year
0,Copenhagen,35%,2010
1,Amsterdam,32%,2012
2,Berlin,13%,2008
3,Ljubjana,12%,2013
4,Helsinki,11%,2013


In [312]:
#dropping the column Year since I don't need this one

bicycle_use = bicycle_usage.drop('Year', axis=1)
bicycle_use.head()

Unnamed: 0,City,Usage
0,Copenhagen,35%
1,Amsterdam,32%
2,Berlin,13%
3,Ljubjana,12%
4,Helsinki,11%


In [313]:
#removing % signs
bicycle_use['Usage'] = bicycle_use['Usage'].str.replace('%', '')

#cleaning Paris row so only usage number is left
bicycle_use.loc[14,'Usage'] = 2
bicycle_use['Usage'] = bicycle_use['Usage'].astype('float')
bicycle_use.head()

Unnamed: 0,City,Usage
0,Copenhagen,35.0
1,Amsterdam,32.0
2,Berlin,13.0
3,Ljubjana,12.0
4,Helsinki,11.0


In [314]:
#getting data of average temperature

def scrape_data(url):

    response = requests.get(url, timeout=10)
    soup = BeautifulSoup(response.content, 'html.parser')

    table = soup.find_all('table')[2]

    rows = table.select('tr')

    header = [th.text.rstrip() for th in rows[0].find_all('th')]  #taking header from the original table

    with open('temperature.csv', 'w') as csv_file:
        writer = csv.writer(csv_file)
        writer.writerow(header)
        for row in rows[1:]:
            data = [th.text.strip() for th in row.find_all('td')]
            writer.writerow(data)


if __name__=="__main__":
    url = "https://en.wikipedia.org/wiki/List_of_cities_by_average_temperature#Europe"
    scrape_data(url)


In [315]:
temperature = pd.read_csv('temperature.csv')
temperature.head()

Unnamed: 0,Country,City,Jan,Feb,Mar,Apr,May,Jun,Jul,Aug,Sep,Oct,Nov,Dec,Year,Ref.
0,Albania,Tirana,6.7(44.1),7.8(46.0),10.0(50.0),13.4(56.1),18.0(64.4),21.6(70.9),24.0(75.2),23.8(74.8),20.7(69.3),16.0(60.8),11.7(53.1),8.1(46.6),15.2(59.4),[180]
1,Austria,Vienna,0.3(32.5),1.5(34.7),5.7(42.3),10.7(51.3),15.7(60.3),18.7(65.7),20.8(69.4),20.2(68.4),15.4(59.7),10.2(50.4),5.1(41.2),1.1(34.0),10.4(50.7),[181]
2,Belarus,Minsk,−4.5(23.9),−4.4(24.1),0.0(32.0),7.2(45.0),13.3(55.9),16.4(61.5),18.5(65.3),17.5(63.5),12.1(53.8),6.6(43.9),0.6(33.1),−3.4(25.9),6.7(44.1),[182]
3,Belgium,Brussels,3.3(37.9),3.7(38.7),6.8(44.2),9.8(49.6),13.6(56.5),16.2(61.2),18.4(65.1),18.0(64.4),14.9(58.8),11.1(52.0),6.8(44.2),3.9(39.0),10.5(50.9),[183]
4,Bosnia and Herzegovina,Sarajevo,−0.5(31.1),1.4(34.5),5.7(42.3),10.0(50.0),14.8(58.6),17.7(63.9),19.7(67.5),19.7(67.5),15.3(59.5),11.0(51.8),5.4(41.7),0.9(33.6),10.1(50.2),[184]


In [316]:
#cleaning the table - I will only need City and Year columns

av_temp = temperature.drop(temperature.columns[2:14], axis=1)
average_temp = av_temp.drop(['Country', 'Ref.'], axis=1)
average_temp.head()

Unnamed: 0,City,Year
0,Tirana,15.2(59.4)
1,Vienna,10.4(50.7)
2,Minsk,6.7(44.1)
3,Brussels,10.5(50.9)
4,Sarajevo,10.1(50.2)


In [317]:
# leaving only temperature in C (first number in Year column)

average_temp['Year'] = average_temp['Year'].astype(str).str[:-6]
average_temp.head()

Unnamed: 0,City,Year
0,Tirana,15.2
1,Vienna,10.4
2,Minsk,6.7
3,Brussels,10.5
4,Sarajevo,10.1


In [318]:
#I will return the value of average temperature of Frankfurt since it was deleted 
#when I removed temperature in Farenheit from the rows 

average_temp.loc[17, 'Year'] = 10

In [319]:
#scrapping the first table from the website (sunshine hours)
def scrape_data(url):

    response = requests.get(url, timeout=10)
    soup = BeautifulSoup(response.content, 'html.parser')

    table = soup.find_all('table')[5]

    rows = table.select('tr')

    header = [th.text.rstrip() for th in rows[0].find_all('th')]  #taking header from the original table

    with open('sunshine.csv', 'w') as csv_file:
        writer = csv.writer(csv_file)
        writer.writerow(header)
        for row in rows[1:]:
            data = [th.text.rstrip() for th in row.find_all('td')]
            writer.writerow(data)


if __name__=="__main__":
    url = "https://en.wikipedia.org/wiki/List_of_cities_by_sunshine_duration#Europe"
    scrape_data(url)


In [320]:
sunshine = pd.read_csv('sunshine.csv')
sunshine.head()

Unnamed: 0,Country,City,Jan,Feb,Mar,Apr,May,Jun,Jul,Aug,Sep,Oct,Nov,Dec,Year,Ref.
0,Albania,Tirana,124.0,125.0,165.0,191.0,263.0,298.0,354.0,327.0,264.0,218.0,127.0,88.0,2544,
1,Armenia,Yerevan,93.0,108.0,162.0,177.0,242.0,297.0,343.0,332.0,278.0,212.0,138.0,92.0,2474,
2,Austria,Vienna,66.0,106.0,128.0,183.0,239.0,228.0,260.0,251.0,168.0,139.0,66.0,51.0,1884,[104]
3,Belarus,Minsk,34.0,72.0,133.0,185.0,270.0,267.0,271.0,251.0,154.0,103.0,39.0,28.0,1807,[105]
4,Belgium,Brussels,59.0,77.0,114.0,159.0,191.0,188.0,201.0,190.0,143.0,113.0,66.0,45.0,1546,[106]


In [321]:
#cleaning the table - I will only need City and Year columns

sunshine = sunshine.drop(sunshine.columns[2:14], axis=1)
Sunshine = sunshine.drop(['Country', 'Ref.'], axis=1)
Sunshine.head()

Unnamed: 0,City,Year
0,Tirana,2544
1,Yerevan,2474
2,Vienna,1884
3,Minsk,1807
4,Brussels,1546


In [322]:
#removing ',' sign and converting numbers in Year column to float
Sunshine['Year'] = Sunshine['Year'].str.replace(',', '')
Sunshine['Year'] = Sunshine['Year'].astype(float)
Sunshine.head()

Unnamed: 0,City,Year
0,Tirana,2544.0
1,Yerevan,2474.0
2,Vienna,1884.0
3,Minsk,1807.0
4,Brussels,1546.0


In [323]:
#loading data regarding population size
population = pd.read_csv('city_pop.csv')
population.head()

Unnamed: 0,City,Value
0,Brussels,1199095
1,Sofia,1236047
2,Prague,1294513
3,Copenhagen,559440
4,Berlin,3574830


In [324]:
#removing ',' sign
population['Value'] = population['Value'].str.replace(',', '')
population['Value'] = population['Value'].astype(str)
population.head()

Unnamed: 0,City,Value
0,Brussels,1199095
1,Sofia,1236047
2,Prague,1294513
3,Copenhagen,559440
4,Berlin,3574830


## Merging tables

In [325]:
#I will upload table with a list of EU capitals
EU = pd.read_csv('Capitals.csv')
EU

Unnamed: 0,City
0,Copenhagen
1,Amsterdam
2,Berlin
3,Ljubljana
4,Helsinki
5,Zagreb
6,Stockholm
7,Dublin
8,Vienna
9,Riga


### Creating Bicycle table

In [326]:
Bicycle = pd.merge(EU, bicycle_use, how='left', on='City')
Bicycle

Unnamed: 0,City,Usage
0,Copenhagen,35.0
1,Amsterdam,32.0
2,Berlin,13.0
3,Ljubljana,
4,Helsinki,11.0
5,Zagreb,10.1
6,Stockholm,9.0
7,Dublin,7.9
8,Vienna,6.0
9,Riga,4.0


In [327]:
#it looks like Ljubljana and Valletta values are missing. I will manually return value of Ljubljana and
#assign the value of Nicosia to Valletta
Bicycle.loc[3, 'Usage'] = 12.0
Bicycle.loc[27, 'Usage'] = 2.0

In [328]:
#adding the 3rd row which will show share of Usage column
Bicycle['Bicycle Share'] = Bicycle['Usage']/Bicycle['Usage'].sum() * 100
Bicycle.head()

Unnamed: 0,City,Usage,Bicycle Share
0,Copenhagen,35.0,20.515826
1,Amsterdam,32.0,18.757327
2,Berlin,13.0,7.620164
3,Ljubljana,12.0,7.033998
4,Helsinki,11.0,6.447831


In [329]:
#leaving only City and Bicycle Share columns
Bicycle_final = Bicycle.drop(['Usage'], axis=1)
Bicycle_final.head()

Unnamed: 0,City,Bicycle Share
0,Copenhagen,20.515826
1,Amsterdam,18.757327
2,Berlin,7.620164
3,Ljubljana,7.033998
4,Helsinki,6.447831


### Creating Temperature table

In [330]:
Temperature = pd.merge(EU, average_temp, how='left', on='City')
Temperature.loc[11, 'Year'] = 9.3
Temperature.head()

Unnamed: 0,City,Year
0,Copenhagen,9.1
1,Amsterdam,10.2
2,Berlin,10.3
3,Ljubljana,10.9
4,Helsinki,5.9


In [331]:
#adding the 3rd row which will show share of Temp column
Temperature['Year'] = Temperature['Year'].astype(float)
Temperature['Temp Share'] = Temperature['Year']/Temperature['Year'].sum() * 100
Temperature.head()

Unnamed: 0,City,Year,Temp Share
0,Copenhagen,9.1,2.944031
1,Amsterdam,10.2,3.299903
2,Berlin,10.3,3.332255
3,Ljubljana,10.9,3.526367
4,Helsinki,5.9,1.908767


In [332]:
#leaving only City and Temp Share columns
Temperature_final = Temperature.drop(['Year'], axis=1)
Temperature_final.head()

Unnamed: 0,City,Temp Share
0,Copenhagen,2.944031
1,Amsterdam,3.299903
2,Berlin,3.332255
3,Ljubljana,3.526367
4,Helsinki,1.908767


### Creating Sunshine table

In [333]:
Sunshine_hours = pd.merge(EU, Sunshine, how='left', on='City')
Sunshine_hours.loc[11,'Year'] = 1487
Sunshine_hours.head()

Unnamed: 0,City,Year
0,Copenhagen,1739.0
1,Amsterdam,1662.0
2,Berlin,1626.0
3,Ljubljana,1974.0
4,Helsinki,1858.0


In [334]:
#adding the 3rd row which will show share of Sunshine column

Sunshine_hours['Sunshine Share'] = Sunshine_hours['Year']/Sunshine_hours['Year'].sum() * 100
Sunshine_hours.head()

Unnamed: 0,City,Year,Sunshine Share
0,Copenhagen,1739.0,3.091715
1,Amsterdam,1662.0,2.954819
2,Berlin,1626.0,2.890816
3,Ljubljana,1974.0,3.509514
4,Helsinki,1858.0,3.303281


In [335]:
#leaving only City and Sunshine Share columns
Sunshine_final = Sunshine_hours.drop(['Year'], axis=1)
Sunshine_final.head()

Unnamed: 0,City,Sunshine Share
0,Copenhagen,3.091715
1,Amsterdam,2.954819
2,Berlin,2.890816
3,Ljubljana,3.509514
4,Helsinki,3.303281


### Creating Population table

In [336]:
Population = pd.merge(EU, population, how='left', on='City')
Population.head()

Unnamed: 0,City,Value
0,Copenhagen,559440
1,Amsterdam,960402
2,Berlin,3574830
3,Ljubljana,288919
4,Helsinki,635181


In [337]:
#adding the 3rd row which will show share of Population column
Population['Value'] = Population['Value'].astype(float)
Population['Pop Share'] = Population['Value']/Population['Value'].sum() * 100
Population.head()

Unnamed: 0,City,Value,Pop Share
0,Copenhagen,559440.0,1.391205
1,Amsterdam,960402.0,2.38831
2,Berlin,3574830.0,8.88982
3,Ljubljana,288919.0,0.718478
4,Helsinki,635181.0,1.579556


In [338]:
#leaving only City and Pop Share columns
Population_final = Population.drop(['Value'], axis=1)
Population_final.head()

Unnamed: 0,City,Pop Share
0,Copenhagen,1.391205
1,Amsterdam,2.38831
2,Berlin,8.88982
3,Ljubljana,0.718478
4,Helsinki,1.579556


### Creating one table

Now I have 4 different tables, which I would like to combine into one.

In [339]:
#Firstly I will merge Bicycle and Temperature into one table 
new_data = pd.merge(Bicycle_final, Temperature_final, how='left', on='City')
new_data.head()

Unnamed: 0,City,Bicycle Share,Temp Share
0,Copenhagen,20.515826,2.944031
1,Amsterdam,18.757327,3.299903
2,Berlin,7.620164,3.332255
3,Ljubljana,7.033998,3.526367
4,Helsinki,6.447831,1.908767


In [340]:
#Now I will merge new table and Sunshine 
new_data1 = pd.merge(new_data, Sunshine_final, how='left', on='City')
new_data1.head()

Unnamed: 0,City,Bicycle Share,Temp Share,Sunshine Share
0,Copenhagen,20.515826,2.944031,3.091715
1,Amsterdam,18.757327,3.299903,2.954819
2,Berlin,7.620164,3.332255,2.890816
3,Ljubljana,7.033998,3.526367,3.509514
4,Helsinki,6.447831,1.908767,3.303281


In [341]:
#Finally I will merge new data1 and Population 
stat_table = pd.merge(new_data1, Population_final, how='right', on='City')
stat_table.head()

Unnamed: 0,City,Bicycle Share,Temp Share,Sunshine Share,Pop Share
0,Copenhagen,20.515826,2.944031,3.091715,1.391205
1,Amsterdam,18.757327,3.299903,2.954819,2.38831
2,Berlin,7.620164,3.332255,2.890816,8.88982
3,Ljubljana,7.033998,3.526367,3.509514,0.718478
4,Helsinki,6.447831,1.908767,3.303281,1.579556


## K-means Clustering

In [342]:
from sklearn.cluster import KMeans

bicycle_clustering = stat_table.drop('City', 1)

# set number of clusters
clust_num = 5

# run k-means clustering
kmeans = KMeans(n_clusters=clust_num, random_state=0).fit(bicycle_clustering)

# check cluster labels generated for each row in the dataframe
kmeans.labels_ 

array([1, 1, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 3, 0, 3, 0, 2, 0, 0, 0,
       3, 0, 3, 3, 3, 0], dtype=int32)

In [343]:
# add clustering labels
stat_table.insert(0, 'Cluster Labels', kmeans.labels_)
final_cluster = stat_table
final_cluster['Total'] = final_cluster['Bicycle Share']+final_cluster['Temp Share']+final_cluster['Sunshine Share']+final_cluster['Pop Share']
final_cluster.head()

Unnamed: 0,Cluster Labels,City,Bicycle Share,Temp Share,Sunshine Share,Pop Share,Total
0,1,Copenhagen,20.515826,2.944031,3.091715,1.391205,27.942777
1,1,Amsterdam,18.757327,3.299903,2.954819,2.38831,27.400359
2,4,Berlin,7.620164,3.332255,2.890816,8.88982,22.733054
3,4,Ljubljana,7.033998,3.526367,3.509514,0.718478,14.788357
4,4,Helsinki,6.447831,1.908767,3.303281,1.579556,13.239436


In [344]:
zero_cluster = stat_table.loc[stat_table['Cluster Labels'] == 0]
zero_cluster

Unnamed: 0,Cluster Labels,City,Bicycle Share,Temp Share,Sunshine Share,Pop Share,Total
9,0,Riga,2.344666,2.005823,3.118383,1.594527,9.063399
10,0,Brussels,2.051583,3.396959,2.748586,2.981887,11.179014
11,0,Luxembourg,2.051583,3.008735,2.643692,0.225919,7.929929
12,0,Sofia,1.758499,3.429311,3.870422,3.073778,12.13201
13,0,Nicosia,1.172333,6.373342,5.892037,0.584145,14.021857
15,0,Athens,1.172333,5.952766,5.063372,1.651337,13.839808
17,0,Bratislava,1.172333,3.396959,3.623298,1.059177,9.251767
19,0,Prague,0.586166,2.717567,2.965486,3.21917,9.48839
20,0,Tallinn,0.586166,1.908767,3.24639,1.060707,6.80203
21,0,Vilnius,0.586166,1.941119,2.823257,1.355992,6.706535


In [345]:
first_cluster = stat_table.loc[stat_table['Cluster Labels'] == 1]
first_cluster

Unnamed: 0,Cluster Labels,City,Bicycle Share,Temp Share,Sunshine Share,Pop Share,Total
0,1,Copenhagen,20.515826,2.944031,3.091715,1.391205,27.942777
1,1,Amsterdam,18.757327,3.299903,2.954819,2.38831,27.400359


In [346]:
second_cluster = stat_table.loc[stat_table['Cluster Labels'] == 2]
second_cluster

Unnamed: 0,Cluster Labels,City,Bicycle Share,Temp Share,Sunshine Share,Pop Share,Total
18,2,London,1.172333,3.332255,2.903261,21.877034,29.284882


In [347]:
third_cluster = stat_table.loc[stat_table['Cluster Labels'] == 3]
third_cluster

Unnamed: 0,Cluster Labels,City,Bicycle Share,Temp Share,Sunshine Share,Pop Share,Total
14,3,Paris,1.172333,3.979295,2.954819,5.573737,13.680183
16,3,Budapest,1.172333,3.655775,3.534404,4.358591,12.721103
22,3,Warsaw,0.586166,2.749919,2.793033,4.288215,10.417334
24,3,Bucharest,0.586166,3.494015,3.760194,5.253283,13.093658
25,3,Rome,0.3517,4.917502,4.396671,7.145751,16.811624
26,3,Madrid,0.0,4.852798,4.92292,7.915377,17.691096


In [348]:
fourth_cluster = stat_table.loc[stat_table['Cluster Labels'] == 4]
fourth_cluster

Unnamed: 0,Cluster Labels,City,Bicycle Share,Temp Share,Sunshine Share,Pop Share,Total
2,4,Berlin,7.620164,3.332255,2.890816,8.88982,22.733054
3,4,Ljubljana,7.033998,3.526367,3.509514,0.718478,14.788357
4,4,Helsinki,6.447831,1.908767,3.303281,1.579556,13.239436
5,4,Zagreb,5.920281,3.461663,3.401064,1.996142,14.77915
6,4,Stockholm,5.275498,2.135231,3.2375,2.32668,12.974909
7,4,Dublin,4.630715,3.170495,2.583244,1.283813,11.668268
8,4,Vienna,3.516999,3.364607,3.349506,4.393511,14.624622


## Selecting Top City

Looking at final tables with cluster labels, it looks like cities in 1st and 2nd clusters are the most attractive. London has overall the highest total number - even though bicycle usage is not so large as in Copenhagen and Amsterdam, its vast population and good climate make this city a good option to open a bike shop.

#### Use geopy library to get the latitude and longitude values of London

In [349]:
address = 'London, UK' 

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of London are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of London are 51.4893335, -0.144055084527687.


I will use the Foursquare API to explore the competitive environment for the Bicycle Inc.

In [350]:
CLIENT_ID = '' # your Foursquare ID
CLIENT_SECRET = '' # your Foursquare Secret
VERSION = '20190701' # Foursquare API version


In [351]:
ID = '4bf58dd8d48988d115951735' #Foursquare category ID for Bike shops
limit = 100
url_london = 'https://api.foursquare.com/v2/venues/search/?&client_id={}&client_secret={}&v={}&ll={},{}&categoryId={}&limit={}'.format(
    CLIENT_ID, 
    CLIENT_SECRET, 
    VERSION, 
    latitude1, 
    longitude1,
    ID,
    limit)

In [352]:
results_london = requests.get(url_london).json()

In [353]:
# assign relevant part of JSON to venues
venues = results_london['response']['venues']

# tranform venues into a dataframe
dataframe = json_normalize(venues)
dataframe[2:2]

Unnamed: 0,id,name,categories,referralId,hasPerk,location.address,location.lat,location.lng,location.labeledLatLngs,location.distance,location.postalCode,location.cc,location.city,location.state,location.country,location.formattedAddress,location.crossStreet,location.neighborhood,venuePage.id


In [354]:
# keep only columns that include venue name, and anything that is associated with location
filtered_columns = ['name', 'categories'] + [col for col in dataframe.columns if col.startswith('location.')] + ['id']
dataframe_filtered = dataframe.loc[:, filtered_columns]

# function that extracts the category of the venue
def get_category_type(row):
    try:
        categories_list = row['categories']
    except:
        categories_list = row['venue.categories']
        
    if len(categories_list) == 0:
        return None
    else:
        return categories_list[0]['name']

# filter the category for each row
dataframe_filtered['categories'] = dataframe_filtered.apply(get_category_type, axis=1)

# clean column names by keeping only last term
dataframe_filtered.columns = [column.split('.')[-1] for column in dataframe_filtered.columns]

dataframe_filtered.head()

Unnamed: 0,name,categories,address,lat,lng,labeledLatLngs,distance,postalCode,cc,city,state,country,formattedAddress,crossStreet,neighborhood,id
0,Decathlon,Sporting Goods Shop,55-57 Garratt Lane,51.454381,-0.192866,"[{'label': 'display', 'lat': 51.4543813, 'lng'...",5156,SW18 4TF,GB,London,Greater London,United Kingdom,"[55-57 Garratt Lane, London, Greater London, S...",,,55897485498ed8423f6b80c8
1,Decathlon Staines,Sporting Goods Shop,"Unit MSU 2, The Elmsleigh Centre",51.433066,-0.508622,"[{'label': 'display', 'lat': 51.4330661, 'lng'...",26049,TW18 4QB,GB,Staines-upon-Thames,Surrey,United Kingdom,"[Unit MSU 2, The Elmsleigh Centre (South St), ...",South St,,5ae0109dfd16bb599c8151d6
2,Rapha Cycle Club,Bike Shop,85 Brewer St,51.510842,-0.136704,"[{'label': 'display', 'lat': 51.51084194680312...",2447,W1F 9ZN,GB,Soho,Greater London,United Kingdom,"[85 Brewer St (Air St), Soho, Greater London, ...",Air St,,4f182f1aa17c202e1058990c
3,Paradise Cycles,Bike Shop,63 Roman Rd,51.527972,-0.050719,"[{'label': 'display', 'lat': 51.52797232526196...",7766,E2 0QN,GB,London,Greater London,United Kingdom,"[63 Roman Rd, London, Greater London, E2 0QN, ...",,Mile End and Globe Town,5631e2af498ea8b14fbbfb0a
4,Decathlon Brent Cross,Sporting Goods Shop,"Unit D1, Geron Way, Staples Corner Retail Park",51.568135,-0.22731,"[{'label': 'display', 'lat': 51.5681346, 'lng'...",10497,NW2 6LW,GB,London,Greater London,United Kingdom,"[Unit D1, Geron Way, Staples Corner Retail Par...",,,5a7f18fda35dce7538ba513a


In [355]:
#I will leave only name, categories and lat,lng columns
london_bike = dataframe_filtered.drop(['address','crossStreet'], axis=1)
london_bike = london_bike.drop(london_bike.columns[4:15], axis=1)
london_bike.head()

Unnamed: 0,name,categories,lat,lng
0,Decathlon,Sporting Goods Shop,51.454381,-0.192866
1,Decathlon Staines,Sporting Goods Shop,51.433066,-0.508622
2,Rapha Cycle Club,Bike Shop,51.510842,-0.136704
3,Paradise Cycles,Bike Shop,51.527972,-0.050719
4,Decathlon Brent Cross,Sporting Goods Shop,51.568135,-0.22731


In [356]:
# create map of London using latitude and longitude values
map_london = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng in zip(london_bike['lat'], london_bike['lng']):
    label = '{}, {}'.format(lat, lng)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_london)  
    
map_london

It looks like the neighborhoods up north from the river Thames have higher concentration of bike shops in London. Therefore, even though competition is higher in these areas, it is also more likely that Bicycle Inc will be able to get more clients if operating in one of these neighborhoods. 