## Applied_Data_Science_Capstone-week3-Toronto_neighbourhood

### import libraries:

In [1]:
import numpy as np 
import pandas as pd 
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

### Scrape table from Wikipedia:

In [2]:
url='https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M'
df=pd.read_html(url, header=0)[0]
df.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"


### Only process the cells that have an assigned borough. Ignore cells with a borough that is Not assigned.

In [3]:
df=df[df['Borough']!='Not assigned'].reset_index(drop=True)
df.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


### Combine rows that have the same "Postal Code",  and their "Neighbourhood" will be combined together and seperaed by ",":

In [4]:
df=df.groupby('Postal Code').sum()
df=df.reset_index()
df.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M1B,Scarborough,"Malvern, Rouge"
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


### Check rows with "Not assigned"  "Neighbourhood":

In [5]:
df[df['Neighbourhood']=='Not assigned']

Unnamed: 0,Postal Code,Borough,Neighbourhood


### If a cell has a borough but a Not assigned  neighborhood, then the neighborhood will be the same as the borough (actually here there is no "Not assigned" "Neighbourhood"):

In [6]:
df.loc[df['Neighbourhood']=='Not assigned','Neighbourhood']=df.loc[df['Neighbourhood']=='Not assigned','Borough']

In [7]:
df.shape

(103, 3)

In [8]:
df.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood
0,M1B,Scarborough,"Malvern, Rouge"
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


###  Get the latitude and the longitude coordinates of each neighborhood using oogle Maps Geocoding API . However it does not work for me.

In [10]:
#! pip install geocoder
#import geocoder
#for ind in range(df.shape[0]):
#    lat_lng_coords = None
    
#    while(lat_lng_coords is None):
#        g = geocoder.google('{}, Toronto, Ontario'.format(df.loc[ind,'Postal Code']))
#        lat_lng_coords = g.latlng
    
#    df.loc[ind,'Latitude'] = lat_lng_coords[0]
#    df.loc[ind,'Longitude']  = lat_lng_coords[1]

### I am not able to get the geographical coordinates of the neighborhoods using the Geocoder package, so I have to read them from the csv file.

In [9]:
postcodeLatLng=pd.read_csv('http://cocl.us/Geospatial_data')
postcodeLatLng.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


### Use pandas.merge to add latitude and longitude to the neiborhood dataframe.

In [10]:
df=pd.merge(df, postcodeLatLng, on='Postal Code')
df.head()

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Malvern, Rouge",43.806686,-79.194353
1,M1C,Scarborough,"Rouge Hill, Port Union, Highland Creek",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


In [11]:
df.shape

(103, 5)

### Simplify the data and segment and cluster only  boroughs that contain the word Toronto.

In [16]:
Toronto_data=df[df['Borough'].str.contains('Toronto')].reset_index(drop=True)
Toronto_data

Unnamed: 0,Postal Code,Borough,Neighbourhood,Latitude,Longitude
0,M4E,East Toronto,The Beaches,43.676357,-79.293031
1,M4K,East Toronto,"The Danforth West, Riverdale",43.679557,-79.352188
2,M4L,East Toronto,"India Bazaar, The Beaches West",43.668999,-79.315572
3,M4M,East Toronto,Studio District,43.659526,-79.340923
4,M4N,Central Toronto,Lawrence Park,43.72802,-79.38879
5,M4P,Central Toronto,Davisville North,43.712751,-79.390197
6,M4R,Central Toronto,"North Toronto West, Lawrence Park",43.715383,-79.405678
7,M4S,Central Toronto,Davisville,43.704324,-79.38879
8,M4T,Central Toronto,"Moore Park, Summerhill East",43.689574,-79.38316
9,M4V,Central Toronto,"Summerhill West, Rathnelly, South Hill, Forest...",43.686412,-79.400049


### Get the geographical coordinates of neighborhood.

In [22]:
!conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

address = 'Toronto, Ontario'
geolocator = Nominatim(user_agent="to_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))


usage: conda-script.py [-h] [-V] command ...
conda-script.py: error: unrecognized arguments: # uncomment this line if you haven't completed the Foursquare API lab


The geograpical coordinate of Toronto are 43.6534817, -79.3839347.


### Create a map of Toronto with neighborhoods superimposed on top.

In [25]:
!conda install -c conda-forge folium=0.12.0 --yes 
import folium # map rendering library

map_toronto = folium.Map(location=[latitude, longitude], zoom_start=11)
# add markers to map
for lat, lng, label in zip(Toronto_data['Latitude'], Toronto_data['Longitude'], Toronto_data['Neighbourhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto

Collecting package metadata (current_repodata.json): ...working... done
Solving environment: ...working... done

## Package Plan ##

  environment location: D:\software\anaconda3

  added / updated specs:
    - folium=0.12.0


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    folium-0.12.0              |     pyhd8ed1ab_0          64 KB  conda-forge
    ------------------------------------------------------------
                                           Total:          64 KB

The following packages will be UPDATED:

  folium                                         0.5.0-py_0 --> 0.12.0-pyhd8ed1ab_0



Downloading and Extracting Packages

folium-0.12.0        | 64 KB     |            |   0% 
folium-0.12.0        | 64 KB     | ##5        |  25% 
folium-0.12.0        | 64 KB     | ########## | 100% 
Preparing transaction: ...working... done
Verifying transaction: ...working... done
Executing

### We can see that the neighbourhoods are not equally distributed in the map. 