# Segmenting and Clustering Neighborhoods in Toronto

### 1. Requirements

In [27]:
import numpy as np # library to handle data in a vectorized manner
import pandas as pd # library for data analsysis
import requests  # library to handle requests

# Installing the BeautifulSoup package
!conda install -c conda-forge beautifulsoup4 --yes
from bs4 import BeautifulSoup

# Installing the geocoder package
!conda install -c conda-forge geocoder --yes 
import geocoder 

Solving environment: done


  current version: 4.5.11
  latest version: 4.8.2

Please update conda by running

    $ conda update -n base -c defaults conda



# All requested packages already installed.

Solving environment: done


  current version: 4.5.11
  latest version: 4.8.2

Please update conda by running

    $ conda update -n base -c defaults conda



## Package Plan ##

  environment location: /home/jupyterlab/conda/envs/python

  added / updated specs: 
    - geocoder


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    future-0.18.2              |           py36_0         713 KB  conda-forge
    click-7.0                  |             py_0          61 KB  conda-forge
    ratelim-0.1.6              |             py_2           6 KB  conda-forge
    geocoder-1.38.1            |             py_1          53 KB  conda-forge
    ------------------------------------------------------

### 2. Scraping an Cleaning data

In [28]:
res = requests.get("https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M")
soup = BeautifulSoup(res.content,"html.parser")
table = soup.find_all('table')[0] 

# Scrape table and pass data to array
board_members = []
for row in table.find_all('tr'):
    cols = row.find_all('td')
    if cols != []:
        board_members.append((cols[0].text.strip(), cols[1].text.strip(), cols[2].text.strip()))
board_array = np.asarray(board_members)

# Convert array to dataframe
df = pd.DataFrame(board_array)
df.columns = ['PostalCode', 'Borough', 'Neighborhood']

# Ignoring cells with a borough that is Not assigned
df.drop(df[df.Borough == 'Not assigned'].index, inplace = True)
df.reset_index(drop=True, inplace=True)

# Neighborhood with Not assigned value
for i in range(df.shape[0]):
    if df.loc[i, 'Neighborhood'] == 'Not assigned':
        df.loc[i, 'Neighborhood'] = df.loc[i, 'Borough']

# Combinning rows
j = 0
while j < df.shape[0]-1:
    if df.loc[j, 'PostalCode'] == df.loc[j+1, 'PostalCode']:
        df.loc[j, 'Neighborhood'] = df.loc[j, 'Neighborhood'] + ", " + df.loc[j+1, 'Neighborhood']
        df.drop([j+1], axis = 0, inplace = True)
        df.reset_index(drop=True, inplace=True)
        j = j-1
    j = j+1

df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,Harbourfront
3,M6A,North York,"Lawrence Heights, Lawrence Manor"
4,M7A,Downtown Toronto,Queen's Park


### 3. Adding the latitude and the longitude coordinates of each neighborhood

In [54]:
# Addin latitude and longitude columns
df['Latitude'] = ''
df['Longitude'] = ''

# Rading CSV file
lat_lng = pd.read_csv('Geospatial_Coordinates.csv')

for i in range(df.shape[0]):
    df.loc[i, 'Latitude'] = lat_lng.loc[lat_lng[lat_lng['Postal Code'] == df.loc[i, 'PostalCode']].index.values[0], 'Latitude']
    df.loc[i, 'Longitude'] = lat_lng.loc[lat_lng[lat_lng['Postal Code'] == df.loc[i, 'PostalCode']].index.values[0], 'Longitude']
    
df

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.7533,-79.3297
1,M4A,North York,Victoria Village,43.7259,-79.3156
2,M5A,Downtown Toronto,Harbourfront,43.6543,-79.3606
3,M6A,North York,"Lawrence Heights, Lawrence Manor",43.7185,-79.4648
4,M7A,Downtown Toronto,Queen's Park,43.6623,-79.3895
...,...,...,...,...,...
98,M8X,Etobicoke,"The Kingsway, Montgomery Road, Old Mill North",43.6537,-79.5069
99,M4Y,Downtown Toronto,Church and Wellesley,43.6659,-79.3832
100,M7Y,East Toronto,Business Reply Mail Processing Centre 969 Eastern,43.6627,-79.3216
101,M8Y,Etobicoke,"Humber Bay, King's Mill Park, Kingsway Park So...",43.6363,-79.4985
