### Importing Libraries including BeautifulSoup which can read and transform HTML tables to Pandas Dataframes

In [1]:
import pandas as pd
import numpy as np
import requests
from bs4 import BeautifulSoup

__ We will read the URL and put in a pandas dataframe. If there is more than one table, different dataframes are created and hence we are using dfs.. please note extra s there __

In [2]:
res = requests.get("https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M")
soup = BeautifulSoup(res.content,'lxml')
table = soup.find_all('table')[0] 
dfs = pd.read_html(str(table))


In [3]:
# We are just reading the first table and then setting first row as column and rest as rows
df = dfs[0]
df.columns = df.iloc[0]
df = df[1:]
df

Unnamed: 0,Postcode,Borough,Neighbourhood
1,M1A,Not assigned,Not assigned
2,M2A,Not assigned,Not assigned
3,M3A,North York,Parkwoods
4,M4A,North York,Victoria Village
5,M5A,Downtown Toronto,Harbourfront
6,M5A,Downtown Toronto,Regent Park
7,M6A,North York,Lawrence Heights
8,M6A,North York,Lawrence Manor
9,M7A,Queen's Park,Not assigned
10,M8A,Not assigned,Not assigned


__ We will capture only records where Borough is anything other than 'Not assigned' and then resetting the index using a method other than reset_index  __

In [4]:
df = df[df['Borough'] != 'Not assigned']
df.index = np.arange(0, len(df))

#### We will not set the Neighbourhood to the Borough's value of that row if it is Not assigned. Else, we will keep it as is.

In [5]:
df.loc[df['Neighbourhood'] == 'Not assigned', 'Neighbourhood'] = df['Borough']

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if __name__ == '__main__':


#### We will not groupBy with Post Code and join all other columns with a comma
##### Then, we will define a function and read only upto first Comma for the Borough Column so that it does not contain duplicate values

df = df.groupby('Postcode', as_index=False).agg(lambda x: ', '.join(x))

def returnUpComma(boroughValue):
    if(boroughValue.find(',')==-1):
        return boroughValue
    else:
        return boroughValue.split(",")[0]

df['Borough'] = df['Borough'].apply(lambda x: returnUpComma(x))
df

### Printing the shape of the dataframe

In [7]:
df.shape

(103, 3)

## Validating by checking one the records from the Question i.e Problem Statement from Coursera itself

In [8]:
df[df['Postcode'] == 'M4B']

Unnamed: 0,Postcode,Borough,Neighbourhood
35,M4B,East York,"Woodbine Gardens, Parkview Hill"


### Reading the Latitude & Longitude from the External CSV file now

In [9]:
import pandas as pd
import io
import requests
url="http://cocl.us/Geospatial_data"
geodatacontent=requests.get(url).content
geodatadf=pd.read_csv(io.StringIO(geodatacontent.decode('utf-8')))
geodatadf.rename(columns={'Postal Code':'Postcode'}, inplace=True)
geodatadf

Unnamed: 0,Postcode,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476
5,M1J,43.744734,-79.239476
6,M1K,43.727929,-79.262029
7,M1L,43.711112,-79.284577
8,M1M,43.716316,-79.239476
9,M1N,43.692657,-79.264848


### Now, merging the Latitude & Longitude columns into main DF

In [10]:
pd.merge(left=df,right=geodatadf)

Unnamed: 0,Postcode,Borough,Neighbourhood,Latitude,Longitude
0,M1B,Scarborough,"Rouge, Malvern",43.806686,-79.194353
1,M1C,Scarborough,"Highland Creek, Rouge Hill, Port Union",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood, Morningside, West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476
5,M1J,Scarborough,Scarborough Village,43.744734,-79.239476
6,M1K,Scarborough,"East Birchmount Park, Ionview, Kennedy Park",43.727929,-79.262029
7,M1L,Scarborough,"Clairlea, Golden Mile, Oakridge",43.711112,-79.284577
8,M1M,Scarborough,"Cliffcrest, Cliffside, Scarborough Village West",43.716316,-79.239476
9,M1N,Scarborough,"Birch Cliff, Cliffside West",43.692657,-79.264848
