# <center>Peer-graded Assignment - Week 3<center>
## <center>Segmenting and Clustering Neighborhoods in Toronto<center>
### <center>Yunqian Guo<center>

In [1]:
# import needed libraries 
import pandas as pd
import requests
from bs4 import BeautifulSoup

# use BeautifulSoup4 to request web Table
res = requests.get("https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M")
soup = BeautifulSoup(res.content,'lxml')
table = soup.find_all('table')[0] 
df = pd.read_html(str(table))

# convert html table to DataFrame
df = pd.DataFrame(df[0])

# Unselect "Not assigned" Borough
df = df[df.Borough != 'Not assigned']
df.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M5A,Downtown Toronto,Regent Park
6,M6A,North York,Lawrence Heights


In [2]:
# group the dataframe by Postcode and Borough, and concatenate same postcode Neighbourhoods with seperated comma
df = df.groupby(['Postcode','Borough'])['Neighbourhood'].apply(list).reset_index()
df.head(12)

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M1B,Scarborough,"[Rouge, Malvern]"
1,M1C,Scarborough,"[Highland Creek, Rouge Hill, Port Union]"
2,M1E,Scarborough,"[Guildwood, Morningside, West Hill]"
3,M1G,Scarborough,[Woburn]
4,M1H,Scarborough,[Cedarbrae]
5,M1J,Scarborough,[Scarborough Village]
6,M1K,Scarborough,"[East Birchmount Park, Ionview, Kennedy Park]"
7,M1L,Scarborough,"[Clairlea, Golden Mile, Oakridge]"
8,M1M,Scarborough,"[Cliffcrest, Cliffside, Scarborough Village West]"
9,M1N,Scarborough,"[Birch Cliff, Cliffside West]"


In [3]:
!conda install -c conda-forge geocoder

Solving environment: done

## Package Plan ##

  environment location: /opt/conda/envs/Python36

  added / updated specs: 
    - geocoder


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    certifi-2019.9.11          |           py36_0         147 KB  conda-forge
    ratelim-0.1.6              |             py_2           6 KB  conda-forge
    geocoder-1.38.1            |             py_1          53 KB  conda-forge
    openssl-1.1.1c             |       h516909a_0         2.1 MB  conda-forge
    ca-certificates-2019.9.11  |       hecc5488_0         144 KB  conda-forge
    ------------------------------------------------------------
                                           Total:         2.5 MB

The following NEW packages will be INSTALLED:

    geocoder:        1.38.1-py_1       conda-forge
    ratelim:         0.1.6-py_2        conda-forge

The following packages will be UPDATED:

    

In [4]:
# read geographical coordinates of each postal code
geo = pd.read_csv("http://cocl.us/Geospatial_data")
geo.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [5]:
# Merge df with Geo
df_new = pd.merge(df, geo, left_on='Postcode', right_on='Postal Code', how='left').drop(columns=['Postal Code'])
df_new.head()

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"[Rouge, Malvern]",43.806686,-79.194353
1,M1C,Scarborough,"[Highland Creek, Rouge Hill, Port Union]",43.784535,-79.160497
2,M1E,Scarborough,"[Guildwood, Morningside, West Hill]",43.763573,-79.188711
3,M1G,Scarborough,[Woburn],43.770992,-79.216917
4,M1H,Scarborough,[Cedarbrae],43.773136,-79.239476


In [6]:
!conda install -c conda-forge folium=0.5.0 --yes
import folium

print('Folium installed and imported!')

Solving environment: done

## Package Plan ##

  environment location: /opt/conda/envs/Python36

  added / updated specs: 
    - folium=0.5.0


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    folium-0.5.0               |             py_0          45 KB  conda-forge
    vincent-0.4.4              |             py_1          28 KB  conda-forge
    altair-3.2.0               |           py36_0         770 KB  conda-forge
    branca-0.3.1               |             py_0          25 KB  conda-forge
    ------------------------------------------------------------
                                           Total:         868 KB

The following NEW packages will be INSTALLED:

    altair:  3.2.0-py36_0 conda-forge
    branca:  0.3.1-py_0   conda-forge
    folium:  0.5.0-py_0   conda-forge
    vincent: 0.4.4-py_1   conda-forge


Downloading and Extracting Packages
folium-0.5.0         | 45 KB    

In [7]:
df_new = df_new[df_new['Borough'].str.contains("Toronto")].reset_index()

In [8]:
df_new

Unnamed: 0,index,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,37,M4E,East Toronto,[The Beaches],43.676357,-79.293031
1,41,M4K,East Toronto,"[The Danforth West, Riverdale]",43.679557,-79.352188
2,42,M4L,East Toronto,"[The Beaches West, India Bazaar]",43.668999,-79.315572
3,43,M4M,East Toronto,[Studio District],43.659526,-79.340923
4,44,M4N,Central Toronto,[Lawrence Park],43.72802,-79.38879
5,45,M4P,Central Toronto,[Davisville North],43.712751,-79.390197
6,46,M4R,Central Toronto,[North Toronto West],43.715383,-79.405678
7,47,M4S,Central Toronto,[Davisville],43.704324,-79.38879
8,48,M4T,Central Toronto,"[Moore Park, Summerhill East]",43.689574,-79.38316
9,49,M4V,Central Toronto,"[Deer Park, Forest Hill SE, Rathnelly, South H...",43.686412,-79.400049


In [11]:
# Toronto latitude and longitude values
latitude = 43.6532
longitude = -79.3832
# create map and display it
Toronto_map = folium.Map(location=[latitude, longitude], zoom_start=12)

# display the map of Toronto
# Toronto_map

In [12]:
# instantiate a feature group for the incidents in the dataframe
Borough = folium.map.FeatureGroup()

# loop through the 100 crimes and add each to the incidents feature group
for lat, lng, in zip(df_new.Latitude, df_new.Longitude):
    Borough.add_child(
        folium.features.CircleMarker(
            [lat, lng],
            radius=5, # define how big you want the circle markers to be
           # color='yellow',
            fill=True,
            fill_color='blue',
            fill_opacity=0.6
        )
    ) 
    
# mark out boroughs on the map
Toronto_map.add_child(Borough)