### This notebook is use for the capstone project

In [81]:
import pandas as pd
import numpy as np
from geopy.geocoders import Nominatim
import matplotlib.cm as cm
import matplotlib.colors as colors
import folium

## Read Data From the wikipedia page

In [82]:
data = pd.read_html('https://en.wikipedia.org/wiki/List_of_postal_codes_of_Canada:_M',header=0)[0]

## Lets Print Data

In [83]:
data.head()

Unnamed: 0,Postal Code,Community,Neighbourhood
0,M1A,Not assigned,Not assigned
1,M2A,Not assigned,Not assigned
2,M3A,North York,Parkwoods
3,M4A,North York,Victoria Village
4,M5A,Downtown Toronto,"Regent Park, Harbourfront"


### Delete the raw which have borough value not assigned

In [84]:
data = data[data['Borough'] != 'Not assigned']

KeyError: 'Borough'

### Group by  the Barough

In [None]:
data_selected = data.groupby(['Borough', 'Postal Code'], as_index=False).agg(','.join)

In [85]:
data_selected.head()

Unnamed: 0,Borough,Postal Code,Neighbourhood
0,Central Toronto,M4N,Lawrence Park
1,Central Toronto,M4P,Davisville North
2,Central Toronto,M4R,"North Toronto West, Lawrence Park"
3,Central Toronto,M4S,Davisville
4,Central Toronto,M4T,"Moore Park, Summerhill East"


   ### Assign value to Neighbourhood if Neighborehood value is not assigned

In [86]:
data_selected['Neighbourhood'] = np.where(data_selected['Neighbourhood'] == 'Not assigned',data_selected['Borough'],data_selected['Neighbourhood'])

In [87]:
data_selected.head()

Unnamed: 0,Borough,Postal Code,Neighbourhood
0,Central Toronto,M4N,Lawrence Park
1,Central Toronto,M4P,Davisville North
2,Central Toronto,M4R,"North Toronto West, Lawrence Park"
3,Central Toronto,M4S,Davisville
4,Central Toronto,M4T,"Moore Park, Summerhill East"


In [88]:
data_selected.shape

(103, 3)

In [89]:
data_selected.describe()

Unnamed: 0,Borough,Postal Code,Neighbourhood
count,103,103,103
unique,10,103,99
top,North York,M5C,Downsview
freq,24,1,4


# Q2. Asigned the latitude and longitude columns by using geospatial data  

In [90]:
url = 'https://cocl.us/Geospatial_data'
geospatial_data = pd.read_csv(url)

In [91]:
geospatial_data.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


In [92]:
merge_data = pd.merge(data_selected,geospatial_data,on='Postal Code')

In [93]:
merge_data

Unnamed: 0,Borough,Postal Code,Neighbourhood,Latitude,Longitude
0,Central Toronto,M4N,Lawrence Park,43.728020,-79.388790
1,Central Toronto,M4P,Davisville North,43.712751,-79.390197
2,Central Toronto,M4R,"North Toronto West, Lawrence Park",43.715383,-79.405678
3,Central Toronto,M4S,Davisville,43.704324,-79.388790
4,Central Toronto,M4T,"Moore Park, Summerhill East",43.689574,-79.383160
...,...,...,...,...,...
98,York,M6C,Humewood-Cedarvale,43.693781,-79.428191
99,York,M6E,Caledonia-Fairbanks,43.689026,-79.453512
100,York,M6M,"Del Ray, Mount Dennis, Keelsdale and Silverthorn",43.691116,-79.476013
101,York,M6N,"Runnymede, The Junction North",43.673185,-79.487262


# Q3


In [94]:
merge_data['Coordinates'] = list(zip(merge_data['Latitude'],merge_data['Longitude']))

In [95]:
merge_data.head()

Unnamed: 0,Borough,Postal Code,Neighbourhood,Latitude,Longitude,Coordinates
0,Central Toronto,M4N,Lawrence Park,43.72802,-79.38879,"(43.7280205, -79.3887901)"
1,Central Toronto,M4P,Davisville North,43.712751,-79.390197,"(43.7127511, -79.3901975)"
2,Central Toronto,M4R,"North Toronto West, Lawrence Park",43.715383,-79.405678,"(43.7153834, -79.40567840000001)"
3,Central Toronto,M4S,Davisville,43.704324,-79.38879,"(43.7043244, -79.3887901)"
4,Central Toronto,M4T,"Moore Park, Summerhill East",43.689574,-79.38316,"(43.6895743, -79.38315990000001)"


### get the location of toronto for initilization of map

In [101]:
address  = 'Toronto,Canada'
geolocator = Nominatim(user_agent = 'Toronto_explorer')
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print(latitude,longitude)


43.6534817 -79.3839347


### ploting the toronto map with Borough and Neighborhood details

In [104]:
toronto_map = folium.Map(location=[latitude,longitude],zoom_start=10)
for lat,lng,borough,neighborhood in zip(merge_data['Latitude'],merge_data['Longitude'],merge_data['Borough'],merge_data['Neighbourhood']):
    label = '{},{}'.format(borough,neighborhood)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
    [lat,lng],
    radius=5,
    Popup=label,
    color='blue',
    fill=True,
    fill_color='red',
    fill_capacity=0.7,
    parse_html=False).add_to(toronto_map)
    
toronto_map

### now we can see how the Toronto data is arranged