In [1]:
# setting up the imports required for the notebook
import pandas as pd
import requests
from urllib.request import urlopen as uReq
from bs4 import BeautifulSoup as soup
import io
import folium
from geopy.geocoders import Nominatim
%matplotlib inline


# Part 1 of Week 3 Submission for Coursera Course Data Sciemce Capstone Project for IBM

#setting up the page which needs to be scraped from wikipedia using wikipedia library as wp
my_url = "https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M"
uClient = uReq(my_url)
page_html = uClient.read()
uClient.close()
page_soup = soup(page_html,"html.parser")

# Using pandas dataframe to get all the tables on webpage
dfs = pd.read_html('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M')

# finding out number of tables in the dfs and glimpse of what it stores
for df in dfs :
    print(df.head(5))

# From above results we find that the first dataframe is the table we need
df = dfs[0]
df.head(10)

# Now we need to filter out the rows which have not assigned in Borough and Neighbourhood
df = df[df.Borough != 'Not assigned']
df.head(10)

# Now we have the rows which were not helpful deleted from the table
#Now we would group them by Boroughs and join the neighbourhoods by , 
df = df.groupby(['Postcode','Borough'])['Neighborhood'].apply(', '.join).reset_index()
df.columns = ['Postcode','Borough','Neighbourhood']
df.head(5)

# To find the total number of rows and columns
df.shape


# Part 2 of Week 3 Submission for Coursera Course Data Sciemce Capstone Project for IBM

# getting the geospatial data for the Boroughs 
geo_url = "http://cocl.us/Geospatial_data"
s = requests.get(geo_url).content
geo_data = pd.read_csv(io.StringIO(s.decode('utf-8')))

# Checking the dataframe made for columns names and data
geo_data.head(5)

# Changing column name of postal code to postcode for merge with original df
geo_data.columns = ['Postcode','Latitude','Longitude']

geo_data.head(5)
df = pd.merge(geo_data,df , on ='Postcode')
df.head(5)


# Reordering the columns in datafrmae
df = df[['Postcode','Borough','Neighbourhood','Latitude','Longitude']]
df.head(5)

# Finding out unique boroughs and number of neighbourhoods in dataframe
print('The dataframe has {} boroughs and {} neighbourhoods.'.format(
        len(df['Borough'].unique()),
        df.shape[0]))








  Postcode           Borough      Neighborhood
0      M1A      Not assigned      Not assigned
1      M2A      Not assigned      Not assigned
2      M3A        North York         Parkwoods
3      M4A        North York  Victoria Village
4      M5A  Downtown Toronto      Harbourfront
                                                  0   \
0                                                NaN   
1  NL NS PE NB QC ON MB SK AB BC NU/NT YT A B C E...   
2                                                 NL   
3                                                  A   

                                                  1   \
0                              Canadian postal codes   
1  NL NS PE NB QC ON MB SK AB BC NU/NT YT A B C E...   
2                                                 NS   
3                                                  B   

                                                  2    3    4    5    6    7   \
0                                                NaN  NaN  NaN  NaN  NaN  N

In [2]:
# Finding Toronto coordinates
address = 'Toronto'
geolocator = Nominatim(user_agent = "Toronto_explorer")
location = geolocator.geocode(address)
latitude_toronto = location.latitude
longitude_toronto = location.longitude

# Print the coordiantes of the Toronto city
print('The geographical coordinates of Toronto city are Latitude:{} and Longitude:{}'.
     format(latitude_toronto,longitude_toronto))

The geographical coordinates of Toronto city are Latitude:43.653963 and Longitude:-79.387207


In [3]:
# creating map of Toronto using latitude and longitude in dataframe
#latitude_toronto = 43.651070
#longitude_toronto = -79.347015
map_toronto = folium.Map(Location =[latitude_toronto,longitude_toronto], zoom_start=10)

# adding markers to map made above 
for lat, lon, borough, neighbourhood  in zip(df['Latitude'],df['Longitude'],df['Borough'],df['Neighbourhood']):
    label = '{},{}'.format(neighbourhood,borough)
    label = folium.Popup(label,parse_html= True)
    folium.CircleMarker(
        [lat,lon],
        radius = 5,
        popup = label,
        color = 'red',
        fill = True,
        fill_color = 'lightred',
        fill_opacity = 0.6,
        parse_html= True).add_to(map_toronto)
    
    
map_toronto