# Import Canada Borough Data

In [29]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np

In [30]:
website_url = requests.get('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M').text
soup = BeautifulSoup(website_url,'lxml')

In [31]:
#print(soup.prettify())

## Pull Out Table

In [32]:
tables = soup.find_all('table', class_='sortable')

### Extract Data by Heading

In [33]:
for table in tables:
    ths = table.find_all('th')
    headings = [th.text.strip() for th in ths]
    if headings[:3] == ['Postcode', 'Borough', 'Neighbourhood']:
        break

In [34]:
#print(table)

### Put Data in Dataframe

In [35]:
P = []
B = []
N = []
with open('CanadaArea.txt', 'w') as fo:
    for tr in table.find_all('tr'):
        tds = tr.find_all('td')
        if not tds:
            continue
        Postcode, Borough, Neighbourhood = [td.text.strip() for td in tds[:3]]
        P.append(Postcode)
        B.append(Borough)
        N.append(Neighbourhood)

In [36]:
d = pd.DataFrame({'Postcode':P,'Borough':B,'Neighbourhood':N})
d.head()
d.shape

(288, 3)

### Remove Not Assigned Stuff

In [37]:
drop = []
for dems in range(len(d)):
    if d['Borough'].iloc[dems] == 'Not assigned':
        drop.append(dems)

In [38]:
df = d.drop(d.index[drop])
df.shape

(211, 3)

In [39]:
df.head()


Unnamed: 0,Postcode,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Harbourfront
5,M5A,Downtown Toronto,Regent Park
6,M6A,North York,Lawrence Heights


### Combine similar postcodes

In [40]:
drop2 = []
for dems in range(len(df)-1):
    if df['Postcode'].iloc[dems] == df['Postcode'].iloc[dems+1]:
        df['Neighbourhood'].iloc[dems] =  df['Neighbourhood'].iloc[dems] + ', ' + df['Neighbourhood'].iloc[dems+1]
        drop2.append(dems+1)

In [41]:
df2 = df.drop(df.index[drop2])

### Replace the one and only 'not assigned' neighborhood with Borough

In [42]:
for i in range(len(df2)):
    if df2['Neighbourhood'].iloc[i] == 'Not assigned':
        df2['Neighbourhood'].iloc[i] = df2['Borough'].iloc[i]

In [43]:
df2.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Harbourfront, Regent Park"
6,M6A,North York,"Lawrence Heights, Lawrence Manor"
8,M7A,Queen's Park,Queen's Park


### Reset Index so it looks nice

In [44]:
df2 = df2.reset_index(drop=True)

In [45]:
df2.shape

(103, 3)

### GEO from google doesn't work, so using excel doc

In [46]:
import geocoder # import geocoder

# initialize your variable to None
lat_lng_coords = None
latitude = []
longitude = []

for data in range(2):
    postal_code=df2.iloc[data]

    # loop until you get the coordinates
    #while(lat_lng_coords is None):
    g = geocoder.google('{}, Toronto, Ontario'.format(postal_code))
    lat_lng_coords = g.latlng

    latitude[i] = lat_lng_coords[0]
    longitude[i] = lat_lng_coords[1]
    
#add lat/lon to array
#add array to dataframe

TypeError: 'NoneType' object is not subscriptable

In [144]:
import csv
geodata = []
output=[]
with open('Geospatial_Coordinates.csv', newline='') as csvfile:
    georeader = csv.reader(csvfile, delimiter=',')
    for row in georeader:
        geodata.append(row)

In [145]:
dfg = pd.DataFrame(geodata,columns={'Postal Code':0,'Latitude':0,'Longitude':0})

In [146]:
dfg.head(1)

Unnamed: 0,Postal Code,Latitude,Longitude
0,Postal Code,Latitude,Longitude


In [147]:
dfg = dfg[1:]

In [148]:
df2.head()

Unnamed: 0,Postcode,Borough,Neighbourhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Harbourfront, Regent Park"
3,M6A,North York,"Lawrence Heights, Lawrence Manor"
4,M7A,Queen's Park,Queen's Park


In [149]:
dfg.head(2)

Unnamed: 0,Postal Code,Latitude,Longitude
1,M1B,43.8066863,-79.1943534
2,M1C,43.7845351,-79.1604971


In [150]:
df2.iloc[0]['Postcode']

'M3A'

In [151]:
dfg.iloc[0]['Postal Code']

'M1B'

In [152]:
lat=[]
lon=[]
for row in range(len(df2)):
    for codes in range(len(dfg)):
        if df2.iloc[row]['Postcode'] == dfg.iloc[codes]['Postal Code']:
            lat.append(dfg.iloc[codes]['Latitude'])
            lon.append(dfg.iloc[codes]['Longitude'])

In [153]:
ll = pd.DataFrame({'Latitude':lat,'Longitude':lon})

In [154]:
result = pd.concat([df2, ll], axis=1, join='inner')

In [157]:
result.head(12)

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.7532586,-79.3296565
1,M4A,North York,Victoria Village,43.7258823,-79.3155716
2,M5A,Downtown Toronto,"Harbourfront, Regent Park",43.6542599,-79.3606359
3,M6A,North York,"Lawrence Heights, Lawrence Manor",43.718518,-79.4647633
4,M7A,Queen's Park,Queen's Park,43.6623015,-79.3894938
5,M9A,Etobicoke,Islington Avenue,43.6678556,-79.5322424
6,M1B,Scarborough,"Rouge, Malvern",43.8066863,-79.1943534
7,M3B,North York,Don Mills North,43.7459058,-79.352188
8,M4B,East York,"Woodbine Gardens, Parkview Hill",43.7063972,-79.309937
9,M5B,Downtown Toronto,"Ryerson, Garden District",43.6571618,-79.3789371
