In [9]:
from bs4 import BeautifulSoup
import requests
import pandas as pd

In [10]:
res=requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M')

In [11]:
soup=BeautifulSoup(res.text,'lxml')

In [12]:
postal=[]
borough=[]
neighborhoods=[]

In [14]:
for rows in soup.find('table').find_all('tr'):
    data=rows.find_all('td')
    if(len(data)>0):
        postal.append(data[0].text)
        borough.append(data[1].text)
        neighborhoods.append(data[2].text)
print(len(postal),len(borough),len(neighborhoods))

288 288 288


In [15]:
print(postal[:5],borough[:5],neighborhoods[:5])

['M1A', 'M2A', 'M3A', 'M4A', 'M5A'] ['Not assigned', 'Not assigned', 'North York', 'North York', 'Downtown Toronto'] ['Not assigned\n', 'Not assigned\n', 'Parkwoods\n', 'Victoria Village\n', 'Harbourfront\n']


In [16]:
tdf=pd.DataFrame()

In [17]:
tdf['PostalCode']=postal

In [18]:
tdf['Borough']=borough

In [19]:
tdf['Neighborhood']=neighborhoods

In [20]:
tdf.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1A,Not assigned,Not assigned\n
1,M2A,Not assigned,Not assigned\n
2,M3A,North York,Parkwoods\n
3,M4A,North York,Victoria Village\n
4,M5A,Downtown Toronto,Harbourfront\n


### Now let's remove rows with Borough 'Not assigned'

In [21]:
df=tdf[tdf['Borough']!='Not assigned']

In [22]:
df.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
2,M3A,North York,Parkwoods\n
3,M4A,North York,Victoria Village\n
4,M5A,Downtown Toronto,Harbourfront\n
5,M5A,Downtown Toronto,Regent Park\n
6,M6A,North York,Lawrence Heights\n


In [23]:
dff=df.groupby(['PostalCode','Borough'],as_index=False).sum()

In [24]:
dff.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1B,Scarborough,Rouge\nMalvern\n
1,M1C,Scarborough,Highland Creek\nRouge Hill\nPort Union\n
2,M1E,Scarborough,Guildwood\nMorningside\nWest Hill\n
3,M1G,Scarborough,Woburn\n
4,M1H,Scarborough,Cedarbrae\n


In [25]:
li=dff['Neighborhood']
for i in range(0,len(li)):
    li[i]=li[i].replace('\n',', ')
    li[i]=li[i].rstrip(', ')

In [26]:
li[0:50]

0                                        Rouge, Malvern
1                Highland Creek, Rouge Hill, Port Union
2                     Guildwood, Morningside, West Hill
3                                                Woburn
4                                             Cedarbrae
5                                   Scarborough Village
6           East Birchmount Park, Ionview, Kennedy Park
7                       Clairlea, Golden Mile, Oakridge
8       Cliffcrest, Cliffside, Scarborough Village West
9                           Birch Cliff, Cliffside West
10    Dorset Park, Scarborough Town Centre, Wexford ...
11                                    Maryvale, Wexford
12                                            Agincourt
13              Clarks Corners, Sullivan, Tam O'Shanter
14    Agincourt North, L'Amoreaux East, Milliken, St...
15                                      L'Amoreaux West
16                                          Upper Rouge
17                                    Hillcrest 

In [27]:
dff.drop(columns=['Neighborhood'])

Unnamed: 0,PostalCode,Borough
0,M1B,Scarborough
1,M1C,Scarborough
2,M1E,Scarborough
3,M1G,Scarborough
4,M1H,Scarborough
5,M1J,Scarborough
6,M1K,Scarborough
7,M1L,Scarborough
8,M1M,Scarborough
9,M1N,Scarborough


In [28]:
dff['Neighborhood']=li

In [29]:
dff.head()

Unnamed: 0,PostalCode,Borough,Neighborhood
0,M1B,Scarborough,"Rouge, Malvern"
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union"
2,M1E,Scarborough,"Guildwood, Morningside, West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


In [30]:
dff[dff['PostalCode']=='M5A']

Unnamed: 0,PostalCode,Borough,Neighborhood
53,M5A,Downtown Toronto,"Harbourfront, Regent Park"


In [31]:
dff.shape

(103, 3)

### Getting latitude and longitude values for each neighborhood

In [32]:
from geopy.geocoders import Nominatim

In [33]:
import folium


In [34]:
postal_codes=dff.PostalCode

In [35]:
coords=pd.read_csv('Geospatial_Coordinates.csv')

In [36]:
coords.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [37]:
pc2=coords['Postal Code']

In [38]:
pc1=dff['PostalCode']

In [39]:
(pc1==pc2).value_counts()

True    103
dtype: int64

### So both of them have same postal codes...now we can merge them

In [40]:
df_merged=pd.concat([dff,coords],axis=1)

In [41]:
df_merged.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Postal Code,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",M1B,43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",M1C,43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",M1E,43.763573,-79.188711
3,M1G,Scarborough,Woburn,M1G,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,M1H,43.773136,-79.239476


In [42]:
df_merged.drop(columns=['Postal Code'],inplace=True)

In [43]:
df_merged.head()

Unnamed: 0,PostalCode,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
