## Merging datasets & creating a new dataset

#### Importing the required libraries

In [27]:
import pandas as pd
import numpy as np
# !conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values
import folium

#### Importing the Geospatial Coordinates Dataset

In [28]:
gc = pd.read_csv('Geospatial_Coordinates.csv')
gc.head()

Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


#### Sorting by 'Postal Code'

Sorting this dataset by 'Postal Code' to match the sequence of the other dataset which will also be sorted with th common column 'Postal Code'.

In [29]:
gc.sort_values(by='Postal Code', inplace = True) #Sorting values by 'Postal Code'
print(gc.shape) #Printing the shape of the dataset
gc.head()

(103, 3)


Unnamed: 0,Postal Code,Latitude,Longitude
0,M1B,43.806686,-79.194353
1,M1C,43.784535,-79.160497
2,M1E,43.763573,-79.188711
3,M1G,43.770992,-79.216917
4,M1H,43.773136,-79.239476


#### Importing the prepared prepared dataset

In [30]:
df = pd.read_csv('Data Wrangling.csv') #Importing the refined dataset from Data Wrangling
df.drop(columns = ['Unnamed: 0'],inplace = True) #Dropping the unrequired column
df.head()

Unnamed: 0,Postal code,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park , Harbourfront"
3,M6A,North York,"Lawrence Manor , Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park , Ontario Provincial Government"


#### Sorting by 'Postal code' to match the sequence

In [31]:
df.sort_values(by='Postal code', inplace = True)
df.set_index(['Postal code'], inplace = True)
df.reset_index(level = 'Postal code', inplace = True)
df.head()

Unnamed: 0,Postal code,Borough,Neighborhood
0,M1B,Scarborough,"Malvern , Rouge"
1,M1C,Scarborough,"Rouge Hill , Port Union , Highland Creek"
2,M1E,Scarborough,"Guildwood , Morningside , West Hill"
3,M1G,Scarborough,Woburn
4,M1H,Scarborough,Cedarbrae


#### Concatenating both the datasets

In [32]:
df = pd.concat([df,gc.reindex(df.index)], axis = 1)# Concatenate df and gc and assigns the index of df to the new data set df
df.drop(columns = ['Postal Code'], inplace = True) # Drop the duplicate 'Postal Code' column
df.head()

Unnamed: 0,Postal code,Borough,Neighborhood,Latitude,Longitude
0,M1B,Scarborough,"Malvern , Rouge",43.806686,-79.194353
1,M1C,Scarborough,"Rouge Hill , Port Union , Highland Creek",43.784535,-79.160497
2,M1E,Scarborough,"Guildwood , Morningside , West Hill",43.763573,-79.188711
3,M1G,Scarborough,Woburn,43.770992,-79.216917
4,M1H,Scarborough,Cedarbrae,43.773136,-79.239476


#### Creating the dataset of Toronto.

In [33]:
df_Toronto = df[df['Borough'].str.contains('Toronto')] # Searching for row's with values containing the string 'Toronto' 
df_Toronto.set_index(['Postal code'], inplace = True) # Set 'Postal code' as index
df_Toronto.reset_index(level = 'Postal code', inplace = True) # Reset indexs
df_Toronto.to_csv('Toronto Dataset.csv') #Saving the Toronto Data Set
df_Toronto.head()

Unnamed: 0,Postal code,Borough,Neighborhood,Latitude,Longitude
0,M4E,East Toronto,The Beaches,43.676357,-79.293031
1,M4K,East Toronto,"The Danforth West , Riverdale",43.679557,-79.352188
2,M4L,East Toronto,"India Bazaar , The Beaches West",43.668999,-79.315572
3,M4M,East Toronto,Studio District,43.659526,-79.340923
4,M4N,Central Toronto,Lawrence Park,43.72802,-79.38879


#### Locate the co-ordinates of Toronto using Geolocator

In [34]:
address = 'Toronto, ONTARIO' # Address whose co-ordinates are needed
#Using Nominatim to create a geolocator and pass the address to find the co-ordinates of Toronto
geolocator = Nominatim(user_agent="to_explorer")
location = geolocator.geocode(address)
latitude = location.latitude
longitude = location.longitude
print('The geograpical coordinate of Toronto are {}, {}.'.format(latitude, longitude))

The geograpical coordinate of Toronto are 43.6534817, -79.3839347.


#### Plotting the Neighborhoods on the map of Toronto

In [35]:
# create map of Toronto using latitude and longitude values
map_toronto = folium.Map(location=[latitude, longitude], zoom_start=11)

# add markers to map
for lat, lng, label in zip(df_Toronto['Latitude'], df_Toronto['Longitude'], df_Toronto['Neighborhood']):
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=5,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_toronto)  
    
map_toronto