**Importing essential libraries**

In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

**Parsing the URL for wiki page**

In [2]:
wiki = requests.get("https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M")
soup = BeautifulSoup(wiki.content, 'html.parser')

In [13]:
table = soup.find('tbody')
rows = table.select('tr')
row = [r.get_text() for r in rows]
df = pd.DataFrame(row)
df1 = df[0].str.split('\n', expand=True)
df2 = df1.rename(columns=df1.iloc[0]).drop(df1.index[0])
df2 = df2[df2.Borough != 'Not assigned']
df2.head()

Unnamed: 0,Unnamed: 1,Postcode,Borough,Neighbourhood,Unnamed: 5
3,,M3A,North York,Parkwoods,
4,,M4A,North York,Victoria Village,
5,,M5A,Downtown Toronto,Harbourfront,
6,,M6A,North York,Lawrence Heights,
7,,M6A,North York,Lawrence Manor,


In [14]:
df2 = df2.groupby(['Postcode', 'Borough'], sort = False).agg(','.join)
df2.reset_index(inplace = True)
df2.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,Harbourfront
3,M6A,North York,"Lawrence Heights,Lawrence Manor"
4,M7A,Queen's Park,Not assigned


In [15]:
df3 = df2.replace("Not assigned", "Queen's Park")
df3.head()
df3.shape

(103, 3)

**Now importing the dataset with postocde information and merging with current data frame**

In [16]:
url = 'http://cocl.us/Geospatial_data'
df1 = pd.read_csv(url)

In [17]:
df1.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [19]:
df1.rename(columns={'Postal Code': 'Postcode'}, inplace=True)
df1.head()

Unnamed: 0,Postcode,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [23]:
df3 = pd.merge(df1, df2, on='Postcode')
df3.head(10)

Unnamed: 0,Postcode,Latitude,Longitude,Borough,Neighbourhood
0,M1B,43.806686,-79.194353,Scarborough,"Rouge,Malvern"
1,M1C,43.784535,-79.160497,Scarborough,"Highland Creek,Rouge Hill,Port Union"
2,M1E,43.763573,-79.188711,Scarborough,"Guildwood,Morningside,West Hill"
3,M1G,43.770992,-79.216917,Scarborough,Woburn
4,M1H,43.773136,-79.239476,Scarborough,Cedarbrae
5,M1J,43.744734,-79.239476,Scarborough,Scarborough Village
6,M1K,43.727929,-79.262029,Scarborough,"East Birchmount Park,Ionview,Kennedy Park"
7,M1L,43.711112,-79.284577,Scarborough,"Clairlea,Golden Mile,Oakridge"
8,M1M,43.716316,-79.239476,Scarborough,"Cliffcrest,Cliffside,Scarborough Village West"
9,M1N,43.692657,-79.264848,Scarborough,"Birch Cliff,Cliffside West"


**Now creating a new dataset to visualise the clusters**

In [25]:
dfcluster =df3[df3['Borough'].str.contains('Toronto')]
dfcluster

Unnamed: 0,Postcode,Latitude,Longitude,Borough,Neighbourhood
37,M4E,43.676357,-79.293031,East Toronto,The Beaches
41,M4K,43.679557,-79.352188,East Toronto,"The Danforth West,Riverdale"
42,M4L,43.668999,-79.315572,East Toronto,"The Beaches West,India Bazaar"
43,M4M,43.659526,-79.340923,East Toronto,Studio District
44,M4N,43.72802,-79.38879,Central Toronto,Lawrence Park
45,M4P,43.712751,-79.390197,Central Toronto,Davisville North
46,M4R,43.715383,-79.405678,Central Toronto,North Toronto West
47,M4S,43.704324,-79.38879,Central Toronto,Davisville
48,M4T,43.689574,-79.38316,Central Toronto,"Moore Park,Summerhill East"
49,M4V,43.686412,-79.400049,Central Toronto,"Deer Park,Forest Hill SE,Rathnelly,South Hill,..."


**Now we import essential libraries to visualise the data**

In [26]:
# install geopy & folium
!conda install -c conda-forge geopy --yes
!conda install -c conda-forge folium=0.5.0 --yes

Solving environment: done

## Package Plan ##

  environment location: /opt/conda/envs/Python36

  added / updated specs: 
    - geopy


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    openssl-1.1.1d             |       h516909a_0         2.1 MB  conda-forge
    geopy-1.20.0               |             py_0          57 KB  conda-forge
    ca-certificates-2019.11.28 |       hecc5488_0         145 KB  conda-forge
    geographiclib-1.50         |             py_0          34 KB  conda-forge
    certifi-2019.11.28         |           py36_0         149 KB  conda-forge
    ------------------------------------------------------------
                                           Total:         2.5 MB

The following NEW packages will be INSTALLED:

    geographiclib:   1.50-py_0         conda-forge
    geopy:           1.20.0-py_0       conda-forge

The following packages will be UPDATED:

    ca-

In [30]:
from geopy.geocoders import Nominatim
import folium
address = 'Toronto'
geolocator = Nominatim(user_agent="Toronto_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude

Toronto_map = folium.Map(location=[latitude, longitude], zoom_start=10)

for lat, lng, borough, neighborhood in zip(dfcluster['Latitude'], dfcluster['Longitude'], 
                                           dfcluster['Borough'], dfcluster['Neighbourhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(Toronto_map)  
    
Toronto_map