## Parsing Web Page

In [115]:
import pandas as pd
import requests
from bs4 import BeautifulSoup as BS
page = requests.get("https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M")
soup = BS(page.content, "html.parser")

In [116]:
s1 = soup.find("tbody")
s2 = s1.find_all("tr")

In [117]:
s3 =[]
for i in s2:
    s3.append(i.get_text().replace("\n\n", ",").replace("\n",""))
s4 = []
for x in s3:
    s4.append(x.split(","))

## Data Frame

In [118]:
data = pd.DataFrame()

In [119]:
Postal_Code = []
Borough = []
Neighborhood = []
for x in s4:
    Postal_Code.append(x[0])
    Borough.append(x[1])
    Neighborhood.append(x[2])
data["Postal_Code"]=Postal_Code
data["Borough"]=Borough
data["Neighborhood"]=Neighborhood

In [120]:
data

Unnamed: 0,Postal_Code,Borough,Neighborhood
0,Postal Code,Borough,Neighborhood
1,M1A,Not assigned,Not assigned
2,M2A,Not assigned,Not assigned
3,M3A,North York,Parkwoods
4,M4A,North York,Victoria Village
...,...,...,...
176,M5Z,Not assigned,Not assigned
177,M6Z,Not assigned,Not assigned
178,M7Z,Not assigned,Not assigned
179,M8Z,Etobicoke,Mimico NW


### Cleaning Data

Only process the cells that have an assigned borough. Ignore cells with a borough that is Not assigned.

In [121]:
data.drop([0], axis=0, inplace=True) 
data2 = data[data["Borough"]!="Not assigned"]
data2

Unnamed: 0,Postal_Code,Borough,Neighborhood
3,M3A,North York,Parkwoods
4,M4A,North York,Victoria Village
5,M5A,Downtown Toronto,Regent Park
6,M6A,North York,Lawrence Manor
7,M7A,Downtown Toronto,Queen's Park
...,...,...,...
161,M8X,Etobicoke,The Kingsway
166,M4Y,Downtown Toronto,Church and Wellesley
169,M7Y,East Toronto,Business reply mail Processing Centre
170,M8Y,Etobicoke,Old Mill South


If a cell has a borough but a Not assigned neighborhood, then the neighborhood will be the same as the borough.

In [122]:
for x in data2["Neighborhood"]:
    if x=="Not assigned":
        print("Clean data")
print("All the values in Neighborhood are assigned")

All the values in Neighborhood are assigned


In [123]:
data2.shape

(103, 3)

## Geocoder package

In [124]:
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

In [150]:
lat = []
lon = []
for x in range(0,len(data2)):
    geolocator = Nominatim(user_agent="ny_explorer")
    try:
        location = geolocator.geocode("Canada Toronto " + data2.iloc[x,2])
        latitude = location.latitude
        longitude = location.longitude
        lat.append(location.latitude)
        lon.append(location.longitude)
        print('The geograpical coordinates: {}, {}, {}, {}.'.format(x, data2.iloc[x,2],latitude, longitude))
    
    except:
        location = geolocator.geocode("Canada Toronto " + data2.iloc[x,1])
        latitude = location.latitude
        longitude = location.longitude
        lat.append(location.latitude)
        lon.append(location.longitude)
        print('The geograpical coordinates: {}, {}, {}, {}.'.format(x, data2.iloc[x,2],latitude, longitude))

The geograpical coordinates: 0, Parkwoods, 43.7587999, -79.3201966.
The geograpical coordinates: 1, Victoria Village, 43.732658, -79.3111892.
The geograpical coordinates: 2, Regent Park, 43.6607056, -79.3604569.
The geograpical coordinates: 3, Lawrence Manor, 43.7220788, -79.4375067.
The geograpical coordinates: 4, Queen's Park, 43.659659, -79.3903399.
The geograpical coordinates: 5, Islington Avenue, 43.6794838, -79.5389092.
The geograpical coordinates: 6, Malvern, 43.8091955, -79.2217008.
The geograpical coordinates: 7, Don Mills, 43.775347, -79.3459439.
The geograpical coordinates: 8, Parkview Hill, 43.6534817, -79.3839347.
The geograpical coordinates: 9, Garden District, 43.6564995, -79.3771141.
The geograpical coordinates: 10, Glencairn, 43.7087117, -79.4406853.
The geograpical coordinates: 11, West Deane Park, 43.6631995, -79.5685684.
The geograpical coordinates: 12, Rouge Hill, 43.7802711, -79.1304992.
The geograpical coordinates: 13, Don Mills, 43.775347, -79.3459439.
The geogr

In [151]:
data2["latitude"]=lat
data2["longitude"]=lon

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


In [152]:
data2.head(22)

Unnamed: 0,Postal_Code,Borough,Neighborhood,latitude,longitude
3,M3A,North York,Parkwoods,43.7588,-79.320197
4,M4A,North York,Victoria Village,43.732658,-79.311189
5,M5A,Downtown Toronto,Regent Park,43.660706,-79.360457
6,M6A,North York,Lawrence Manor,43.722079,-79.437507
7,M7A,Downtown Toronto,Queen's Park,43.659659,-79.39034
9,M9A,Etobicoke,Islington Avenue,43.679484,-79.538909
10,M1B,Scarborough,Malvern,43.809196,-79.221701
12,M3B,North York,Don Mills,43.775347,-79.345944
13,M4B,East York,Parkview Hill,43.653482,-79.383935
14,M5B,Downtown Toronto,Garden District,43.6565,-79.377114


## Map Visulation

In [153]:
address = 'Toronto'

geolocator = Nominatim(user_agent="ny_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.6534817, -79.3839347.


In [154]:
import folium

In [173]:
# create map of Toronto latitude and longitude values
map_toronto = folium.Map(location=[43.6534817, -79.3839347], zoom_start=12)


In [156]:
# instantiate a feature group for the incidents in the dataframe
incidents = folium.map.FeatureGroup()

for lat, lng, in zip(data2.latitude, data2.longitude):
    incidents.add_child(
        folium.features.CircleMarker(
            [lat, lng],
            radius=3, # define how big you want the circle markers to be
            color='yellow',
            fill=True,
            fill_color='red',
            fill_opacity=0.6
        )
    )
# add incidents to map
map_toronto.add_child(incidents)

In [172]:
# instantiate a feature group for the incidents in the dataframe
incidents = folium.map.FeatureGroup()

data3 = data2[data2["Borough"]=="Central Toronto"]

for lat, lng, in zip(data3.latitude, data3.longitude):
    incidents.add_child(
        folium.features.CircleMarker(
            [lat, lng],
            radius=5, # define how big you want the circle markers to be
            color='blue',
            fill=True,
            fill_color='red',
            fill_opacity=0.6
        )
    )

data4 = data2[data2["Borough"]!="Central Toronto"]   
for lat, lng, in zip(data4.latitude, data4.longitude):
    incidents.add_child(
        folium.features.CircleMarker(
            [lat, lng],
            radius=5, # define how big you want the circle markers to be
            color='yellow',
            fill=True,
            fill_color='blue',
            fill_opacity=0.6
        )
    )        

# add incidents to map
map_toronto.add_child(incidents)

In [178]:
# create map of Toronto latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, borough, neighborhood in zip(data3['latitude'], data3['longitude'], data3['Borough'], data3['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        #radius=5,
        popup=label,
        color='red',
        fill=True,
        fill_color='orange',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
for lat, lng, borough, neighborhood in zip(data4['latitude'], data4['longitude'], data4['Borough'], data4['Neighborhood']):
    label = '{}, {}'.format(neighborhood, borough)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        #radius=5,
        popup=label,
        color='green',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto