NB. Scroll down for Problem #2 code cells

# Problem #1

We only use `requests` and `pandas` libraries, not `bs4`, for clean and explainable notebook.

In [1]:
# imports
import requests
import pandas as pd

In [2]:
# read data from URL 
wiki_url = 'https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
wiki_page = requests.get(wiki_url)

# convert html into pandas dataframe, excluding 'Not assigned' boroughs
wiki_raw = pd.read_html(wiki_page.content, header = 0)[0]
df_temp = wiki_raw[wiki_raw.Borough != 'Not assigned']
df_temp.reset_index(inplace = True)

# rename column name 'Postal Code' into 'PostalCode' as mentioned in assignment
# and drop 'index' column
df = df_temp.rename(columns = {'Postal Code': 'PostalCode', 'Neighbourhood': 'Neighborhood'}, inplace=False).drop(columns=['index'])
df

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"
...,...,...,...
98,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North"
99,M4Y,Downtown Toronto,Church and Wellesley
100,M7Y,East Toronto,"Business reply mail Processing Centre, South C..."
101,M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu..."


Seems like Wikipedia table already had been grouped by boroughs/codes:


In [3]:
df.groupby(['PostalCode']).first()


Unnamed: 0_level_0,Borough,Neighborhood
PostalCode,Unnamed: 1_level_1,Unnamed: 2_level_1
M1B,Scarborough,"Malvern, Rouge"
M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek"
M1E,Scarborough,"Guildwood, Morningside, West Hill"
M1G,Scarborough,Woburn
M1H,Scarborough,Cedarbrae
...,...,...
M9N,York,Weston
M9P,Etobicoke,Westmount
M9R,Etobicoke,"Kingsview Village, St. Phillips, Martin Grove ..."
M9V,Etobicoke,"South Steeles, Silverstone, Humbergate, Jamest..."


In [4]:
len(df['PostalCode'].unique())

103

In [5]:
# In the last cell of your notebook, use the .shape method to print the number of rows of your dataframe.

df.shape

(103, 3)

# Problem #2

First, we try to use Geocoder package to obtain the coordinates for every postal code

In [6]:
# installs and imports

!pip install geocoder 
import geocoder


Collecting geocoder
[?25l  Downloading https://files.pythonhosted.org/packages/4f/6b/13166c909ad2f2d76b929a4227c952630ebaf0d729f6317eb09cbceccbab/geocoder-1.38.1-py2.py3-none-any.whl (98kB)
[K     |███▎                            | 10kB 9.5MB/s eta 0:00:01[K     |██████▋                         | 20kB 14.2MB/s eta 0:00:01[K     |██████████                      | 30kB 11.5MB/s eta 0:00:01[K     |█████████████▎                  | 40kB 10.9MB/s eta 0:00:01[K     |████████████████▋               | 51kB 7.8MB/s eta 0:00:01[K     |████████████████████            | 61kB 7.2MB/s eta 0:00:01[K     |███████████████████████▎        | 71kB 7.2MB/s eta 0:00:01[K     |██████████████████████████▋     | 81kB 7.9MB/s eta 0:00:01[K     |██████████████████████████████  | 92kB 8.6MB/s eta 0:00:01[K     |████████████████████████████████| 102kB 4.7MB/s 
Collecting ratelim
  Downloading https://files.pythonhosted.org/packages/f2/98/7e6d147fd16a10a5f821db6e25f192265d6ecca3d82957a4fdd592ca

In [7]:
# collect coordinates into a dict of tuples

coordinates = {}

for postal_code in list(df.PostalCode):
    # initialize your variable to None
    lat_lng_coords = None

    # loop until you get the coordinates
    while(lat_lng_coords is None):
        g = geocoder.google('{}, Toronto, Ontario'.format(postal_code))
        lat_lng_coords = g.latlng
    latitude = lat_lng_coords[0]
    longitude = lat_lng_coords[1]
    
    coordinates[postal_code] = (latitude, longitude)

coordinates

KeyboardInterrupt: ignored

**But it didn't work... =(**

So we use a `.csv` file provided in the assignment

In [8]:
geo_coor = pd.read_csv('/content/Geospatial_Coordinates.csv', header = 0)
geo_coor = geo_coor.rename(columns = {'Postal Code': 'PostalCode'}, inplace=False)
geo_coor
                           

Unnamed: 0,PostalCode,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476
...,...,...,...
98,M9N,43.706876,-79.518188
99,M9P,43.696319,-79.532242
100,M9R,43.688905,-79.554724
101,M9V,43.739416,-79.588437


In [9]:
df_1 = pd.merge(df, geo_coor, on=['PostalCode'])

df_1

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.654260,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
...,...,...,...,...,...
98,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North",43.653654,-79.506944
99,M4Y,Downtown Toronto,Church and Wellesley,43.665860,-79.383160
100,M7Y,East Toronto,"Business reply mail Processing Centre, South C...",43.662744,-79.321558
101,M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu...",43.636258,-79.498509


In [10]:
df_1.to_csv('/content/Toronto_neighborhoods.csv', index=False)

In [11]:
df_2 = pd.read_csv('/content/Toronto_neighborhoods.csv', header=0)
df_2

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.753259,-79.329656
1,M4A,North York,Victoria Village,43.725882,-79.315572
2,M5A,Downtown Toronto,"Regent Park, Harbourfront",43.654260,-79.360636
3,M6A,North York,"Lawrence Manor, Lawrence Heights",43.718518,-79.464763
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government",43.662301,-79.389494
...,...,...,...,...,...
98,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North",43.653654,-79.506944
99,M4Y,Downtown Toronto,Church and Wellesley,43.665860,-79.383160
100,M7Y,East Toronto,"Business reply mail Processing Centre, South C...",43.662744,-79.321558
101,M8Y,Etobicoke,"Old Mill South, King's Mill Park, Sunnylea, Hu...",43.636258,-79.498509
