In [1]:
!conda install -c conda-forge geocoder -y

Solving environment: done

## Package Plan ##

  environment location: /home/jupyterlab/conda

  added / updated specs: 
    - geocoder


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    orderedset-2.0             |           py36_0         231 KB  conda-forge
    ratelim-0.1.6              |           py36_0           5 KB  conda-forge
    openssl-1.0.2p             |       h470a237_2         3.1 MB  conda-forge
    geocoder-1.38.1            |             py_0          52 KB  conda-forge
    ------------------------------------------------------------
                                           Total:         3.4 MB

The following NEW packages will be INSTALLED:

    geocoder:   1.38.1-py_0       conda-forge
    orderedset: 2.0-py36_0        conda-forge
    ratelim:    0.1.6-py36_0      conda-forge

The following packages will be UPDATED:

    openssl:    1.0.2p-h470a237_1 conda-forge --

# Segmenting and Clustering Neighborhoods in Toronto
Applied Data Science Capstone - Coursera
This notebook contains Part 2 of my submission for the Week 3 Assignment: Segmenting and Clustering Neighborhoods in Toronto from the Applied Data Science Capstone course.

In [3]:
import pandas as pd
import numpy as np
import geocoder

## Part 2 - Add Lat/Lon coordinates based on the postal code to the dataframe created on part 1
The dataframe from part 1 of the assignment was stored on a .csv file called Toronto_Postcodes.csv. Here, I will import it again and add the Lat/Lon coordinates using the Geocoder Python package instead


In [6]:
#Read postcodes into a pandas dataframe
df_neighborhoods = pd.read_csv('Toronto_Postcodes.csv',index_col=[0])
df_neighborhoods.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1B,Scarborough,"Rouge, Malvern"
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


In [7]:
#Create function to get coordinates of a specific postcode. Added "Max Iterations" parameter to limit the number of
#iterations for the API call, and a "Debug" parameter to show the progress of the for loop

def get_coordinates_toronto(postal_code, max_iterations, debug):
    lat_lng_coords = None
    success=False
    for i in range(max_iterations):
        g = geocoder.google('{}, Toronto, Ontario'.format(postal_code))
        lat_lng_coords = g.latlng
        if debug:
            print('Attempt #: {}, Coordinates: {}'.format(i+1, lat_lng_coords))
        if lat_lng_coords is not None:
            success=True
            break
    if success:
        latitude = lat_lng_coords[0]
        longitude = lat_lng_coords[1]
        return latitude, longitude
    else:
        return 'Failed to get coordinates'

In [8]:
#Test the function (10 iterations, print the progress)
get_coordinates_toronto('M1C',10,True)

Attempt #: 1, Coordinates: None
Attempt #: 2, Coordinates: None
Attempt #: 3, Coordinates: None
Attempt #: 4, Coordinates: None
Attempt #: 5, Coordinates: None
Attempt #: 6, Coordinates: None
Attempt #: 7, Coordinates: None
Attempt #: 8, Coordinates: None
Attempt #: 9, Coordinates: None
Attempt #: 10, Coordinates: None


'Failed to get coordinates'

In [9]:
#Test the function (200 iterations, do not printing the progress
get_coordinates_toronto('M1C',200,False)

'Failed to get coordinates'

In [10]:
#Manually test the API call
g = geocoder.google('Mountain View, CA')
print(g.latlng)

None


### Note
Since the geocoder API is not working at the moment of preparing this notebook I shall use the data in the following link: http://cocl.us/Geospatial_data

In [11]:
#Read CSV file from link and load into dataframe
url_csv = 'http://cocl.us/Geospatial_data'
df_coordinates = pd.read_csv(url_csv)
df_coordinates.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [12]:
# Make sure both dataframes have the same 
df_coordinates.rename(columns={'Postal Code': 'PostalCode'}, inplace=True)
df_neighborhoods.rename(columns={'Postcode': 'PostalCode'}, inplace=True)

In [13]:
# Merge both datasets
df_neighborhoods_coordinates = pd.merge(df_neighborhoods, df_coordinates, on='PostalCode')
df_neighborhoods_coordinates.head()

Unnamed: 0,PostalCode,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


In [14]:
# Check coordinates for a couple of neighborhoods
df_neighborhoods_coordinates[(df_neighborhoods_coordinates['PostalCode']=='M5G') |
                             (df_neighborhoods_coordinates['PostalCode']=='M2H') ]

Unnamed: 0,PostalCode,Borough,Neighbourhood,Latitude,Longitude
17,M2H,North York,Hillcrest Village,43.803762,-79.363452
57,M5G,Downtown Toronto,Central Bay Street,43.657952,-79.387383


In [15]:
#Export to .CSV
df_neighborhoods_coordinates.to_csv('Toronto_Postcodes_2.csv')