In [None]:
#install Beautiful Soup and requests for Web Scaping
# !pip install BeautifulSoup4
# !pip install requests

In [50]:
from bs4 import BeautifulSoup
import requests
import pandas as pd
import numpy as np

source = requests.get("https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M")
soup = BeautifulSoup(source.text, 'lxml')


data = []
columns = []
table = soup.find(class_='wikitable')
for index, tr in enumerate(table.find_all('tr')):
    section = []
    for td in tr.find_all(['th','td']):
        section.append(td.text.rstrip())
    
    if (index == 0):
        columns = section
    else:
        data.append(section)

can_df = pd.DataFrame(data = data,columns = columns)
can_df.head(10)

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M6A,North York,Lawrence Heights
6,M6A,North York,Lawrence Manor
7,M7A,Downtown Toronto,Queen's Park
8,M8A,Not assigned,Not assigned
9,M9A,Etobicoke,Islington Avenue


In [51]:
can_df = can_df[can_df['Borough'] != 'Not assigned']
can_df.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M6A,North York,Lawrence Heights
6,M6A,North York,Lawrence Manor


In [53]:
can_df["Neighbourhood"] = can_df.groupby("Postcode")["Neighbourhood"].transform(lambda x: ', '.join(x))

can_df = can_df.drop_duplicates()

can_df.reset_index(inplace=True)


can_df
# can_df.shape

Unnamed: 0,index,Postcode,Borough,Neighbourhood
0,2,M3A,North York,Parkwoods
1,3,M4A,North York,Victoria Village
2,4,M5A,Downtown Toronto,Harbourfront
3,5,M6A,North York,"Lawrence Heights, Lawrence Manor"
4,7,M7A,Downtown Toronto,Queen's Park
5,9,M9A,Etobicoke,Islington Avenue
6,10,M1B,Scarborough,"Rouge, Malvern"
7,13,M3B,North York,Don Mills North
8,14,M4B,East York,"Woodbine Gardens, Parkview Hill"
9,16,M5B,Downtown Toronto,"Ryerson, Garden District"


In [54]:
can_df['Neighbourhood'].replace("Not assigned", can_df["Borough"],inplace=True)
can_df.head()

Unnamed: 0,index,Postcode,Borough,Neighbourhood
0,2,M3A,North York,Parkwoods
1,3,M4A,North York,Victoria Village
2,4,M5A,Downtown Toronto,Harbourfront
3,5,M6A,North York,"Lawrence Heights, Lawrence Manor"
4,7,M7A,Downtown Toronto,Queen's Park


This work, we can scraping data from websites using many ways such as BeautifulSoup package.

First scraping data, convert it to a table, and convert it to dataframe with pandas. After that u can clean up the data.

In [16]:
can_df.shape

(103, 2)

In [17]:
!conda install -c conda-forge geocoder --yes
print("Installation Done!")
import geocoder # import geocoder
print("Geo Coder imported!")

Solving environment: done

# All requested packages already installed.

Installation Done!
Geo Coder imported!


In [18]:
def get_geocoder(postal_code):
    lat_lng_coords = None
    while(lat_lng_coords is None):
        g = geocoder.arcgis('{}, Toronto, Ontario'.format(postal_code.strip()))
        lat_lng_coords = g.latlng
        latitude = lat_lng_coords[0]
        longitude = lat_lng_coords[1]
    return latitude,longitude

In [57]:
can_df['Latitude'], can_df['Longitude'] = zip(*can_df['Postcode'].apply(get_geocoder))
can_df.head()

Unnamed: 0,index,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,2,M3A,North York,Parkwoods,43.75242,-79.329242
1,3,M4A,North York,Victoria Village,43.7306,-79.313265
2,4,M5A,Downtown Toronto,Harbourfront,43.650295,-79.359166
3,5,M6A,North York,"Lawrence Heights, Lawrence Manor",43.72327,-79.451286
4,7,M7A,Downtown Toronto,Queen's Park,43.66115,-79.391715


In [60]:
from geopy.geocoders import Nominatim

address = 'Toronto, Ontario'

geolocator = Nominatim(user_agent="toronto_ontario")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinates of Toronto, Ontario are {}, {}.'.format(latitude, longitude))


The geograpical coordinates of Toronto, Ontario are 43.653963, -79.387207.


In [66]:
import folium # map rendering library

map_toronto = folium.Map(location=[latitude, longitude], zoom_start=11)

for lat, long, post, borough, neigh in zip(can_df['Latitude'], can_df['Longitude'], can_df['Postcode'], can_df['Borough'], can_df['Neighbourhood']):
    label = "{} ({}): {}".format(borough, post, neigh)
    popup = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, long],
        radius=5,
        popup=popup,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)
    
map_toronto