# Wikipedia page to dataframe
We can just read the page using pandas read_html 

In [37]:
import pandas as pd
link = "https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"
df = pd.read_html(link,header=0)[0]
df.head()


Unnamed: 0,Postal code,Borough,Neighborhood
0,M1A,Not assigned,
1,M2A,Not assigned,
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Regent Park / Harbourfront


## Convert the dataframe to appropriate datatypes for subsequent operations

In [None]:
df.dtypes
df=df.astype('str')


## Remove the rows that are not assigned in Boroughs

In [25]:
df2=df[~df.Borough.str.contains("Not assigned")]
df2.head()

Unnamed: 0,Postal code,Borough,Neighborhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,Regent Park / Harbourfront
5,M6A,North York,Lawrence Manor / Lawrence Heights
6,M7A,Downtown Toronto,Queen's Park / Ontario Provincial Government


## Find the neighborhood that are not assigned but has borough
### There is no unassigned neighborhood that has a borough

In [29]:
df2.Neighborhood.str.count("Not assigned").sum()

0

## Get the final data, aka shape

In [30]:
df2.shape

(103, 3)

In [33]:
df2.Neighborhood=df2.Neighborhood.str.replace('/',',')
df2.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self[name] = value


Unnamed: 0,Postal code,Borough,Neighborhood
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park , Harbourfront"
5,M6A,North York,"Lawrence Manor , Lawrence Heights"
6,M7A,Downtown Toronto,"Queen's Park , Ontario Provincial Government"


## Get the geospatial data

In [36]:
!wget -q -O 'Geospatial Coordinates.csv' https://cocl.us/Geospatial_data
g1=pd.read_csv('Geospatial Coordinates.csv')
g1.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


## Add Latitude and Longitude to the previous dataframe using dictionary

In [42]:
d1=dict(zip(g1['Postal Code'],g1.Latitude))
d2=dict(zip(g1['Postal Code'],g1.Longitude))
df2['Latitude']=df2['Postal code'].map(d1)
df2['Longitude']=df2['Postal code'].map(d2)
df2.head()

Unnamed: 0,Postal code,Borough,Neighborhood,Latitude,Longitude
2,M3A,North York,Parkwoods,43.753259,-79.329656
3,M4A,North York,Victoria Village,43.725882,-79.315572
4,M5A,Downtown Toronto,"Regent Park , Harbourfront",43.65426,-79.360636
5,M6A,North York,"Lawrence Manor , Lawrence Heights",43.718518,-79.464763
6,M7A,Downtown Toronto,"Queen's Park , Ontario Provincial Government",43.662301,-79.389494
