In [8]:
from bs4 import BeautifulSoup

In [9]:
import requests
import pandas as pd

In [10]:
r = requests.get(' https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M')

In [11]:
soup = BeautifulSoup(r.text,'lxml')

In [12]:
tab = soup.find('table')
cols = tab.find_all('td')
lenn = len(cols)
postcode = []
borough = []
neighborhood = []

for i in range(0, lenn, 3):
    postcode.append(cols[i].text.strip())
    borough.append(cols[i+1].text.strip())
    neighborhood.append(cols[i+2].text.strip())

In [14]:
df = pd.DataFrame(data=[postcode, borough, neighborhood]).transpose()
df.columns = ['Postcode', 'Borough', 'Neighborhood']

In [15]:
df.drop(df[df['Borough'] == 'Not assigned'].index, inplace=True)
df.loc[df.Neighborhood == 'Not assigned', "Neighborhood"] = df.Borough

In [17]:
df2 = df.groupby(['Postcode', 'Borough'])['Neighborhood'].apply(', '.join).reset_index()
df2.columns = ['Postcode', 'Borough', 'Neighborhood']

In [18]:
df

Unnamed: 0,Postcode,Borough,Neighborhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M5A,Downtown Toronto,Regent Park
6,M6A,North York,Lawrence Heights
7,M6A,North York,Lawrence Manor
8,M7A,Queen's Park,Queen's Park
10,M9A,Etobicoke,Islington Avenue
11,M1B,Scarborough,Rouge
12,M1B,Scarborough,Malvern


In [19]:
df3 = pd.read_csv('https://cocl.us/Geospatial_data')
df3.columns = ['Postcode', 'Latitude', 'Longitude']

In [20]:
df_joined = pd.merge(df2, df3, on=['Postcode'], how='inner')

In [21]:
df_joined

Unnamed: 0,Postcode,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
5,M1J,Scarborough,Scarborough Village,43.744734,-79.239476
6,M1K,Scarborough,"East Birchmount Park, Ionview, Kennedy Park",43.727929,-79.262029
7,M1L,Scarborough,"Clairlea, Golden Mile, Oakridge",43.711112,-79.284577
8,M1M,Scarborough,"Cliffcrest, Cliffside, Scarborough Village West",43.716316,-79.239476
9,M1N,Scarborough,"Birch Cliff, Cliffside West",43.692657,-79.264848


In [22]:
last = df_joined[['Borough', 'Neighborhood', 'Latitude', 'Longitude']].copy()

In [24]:
print('The dataframe has {} boroughs and {} neighborhoods.'.format(
        len(last['Borough'].unique()),
        last.shape[0]
    )
)

The dataframe has 11 boroughs and 103 neighborhoods.


In [26]:
from geopy.geocoders import Nominatim
import matplotlib.cm as cm
from sklearn.cluster import KMeans
import folium 

In [27]:


address = 'Toronto, Canada'

geolocator = Nominatim()
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))



The geograpical coordinate of Toronto are 43.653963, -79.387207.


In [28]:
# create map of Toronto using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(last['Latitude'], last['Longitude'], last['Borough'], last['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto