# segmenting and clustering neighborhoods in toronto

#### In this second notebook we get geographical coordinates of neighborhoods in toronto
#### we will use data.csv file created in previous notebook

In [2]:
# import neccessary libraries
import pandas as pd
#!conda install -c conda-forge geopy --yes
from geopy.geocoders import Nominatim # convert an address into longitude and latitude

In [3]:
data=pd.read_csv('data.csv')

In [4]:
data.head()

Unnamed: 0.1,Unnamed: 0,postal_code,Borough,Neighborhood
0,0,M3A,North York,Parkwoods
1,1,M4A,North York,Victoria Village
2,2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


In [5]:
data.drop('Unnamed: 0', axis=1, inplace=True)

In [6]:
data.head()

Unnamed: 0,postal_code,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,"Regent Park, Harbourfront"
3,M6A,North York,"Lawrence Manor, Lawrence Heights"
4,M7A,Downtown Toronto,"Queen's Park, Ontario Provincial Government"


#### split multiple neighborhood in neighborhood columns

In [7]:
data_new=(data.set_index(data.columns.drop('Neighborhood', 1).to_list()).Neighborhood.str.split(',', expand=True).stack().reset_index().rename(columns={0:'Neighborhood'}).loc[:, data.columns])
data_new.head()

Unnamed: 0,postal_code,Borough,Neighborhood
0,M3A,North York,Parkwoods
1,M4A,North York,Victoria Village
2,M5A,Downtown Toronto,Regent Park
3,M5A,Downtown Toronto,Harbourfront
4,M6A,North York,Lawrence Manor


In [8]:
data_new['Latitude']=''
data_new['Longitude']=''
data_new.head()

Unnamed: 0,postal_code,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,,
1,M4A,North York,Victoria Village,,
2,M5A,Downtown Toronto,Regent Park,,
3,M5A,Downtown Toronto,Harbourfront,,
4,M6A,North York,Lawrence Manor,,


In [9]:
data_new.shape

(217, 5)

In [12]:
# now we get geographical coordinates of neighborhoods
drop_unknown = []
geolocator = Nominatim(user_agent="toronto_explorer")
for index, row in data_new.iterrows():
    address = row['Neighborhood'] + ', Toronto'
    try:
        location = geolocator.geocode(address)
        latitude = location.latitude
        longitude = location.longitude
        print('The geograpical coordinate of {} are {}, {}.'.format(address, latitude, longitude))
        data_new.loc[index, 'Latitude'] = latitude
        data_new.loc[index, 'Longitude'] = longitude
    except AttributeError:
        print('Cannot do: {}, will drop index: {}'.format(address, index))
        drop_unknown.append(index)

The geograpical coordinate of Parkwoods, Toronto are 43.7611243, -79.3240594.
The geograpical coordinate of Victoria Village, Toronto are 43.732658, -79.3111892.
The geograpical coordinate of Regent Park, Toronto are 43.6607056, -79.3604569.
The geograpical coordinate of  Harbourfront, Toronto are 43.6400801, -79.3801495.
The geograpical coordinate of Lawrence Manor, Toronto are 43.7220788, -79.4375067.
The geograpical coordinate of  Lawrence Heights, Toronto are 43.7227784, -79.4509332.
The geograpical coordinate of Queen's Park, Toronto are 43.659659, -79.3903399.
Cannot do:  Ontario Provincial Government, Toronto, will drop index: 7
The geograpical coordinate of Islington Avenue, Toronto are 43.6214816, -79.5136854.
The geograpical coordinate of  Humber Valley Village, Toronto are 43.6664717, -79.5243136.
The geograpical coordinate of Malvern, Toronto are 43.8091955, -79.2217008.
The geograpical coordinate of  Rouge, Toronto are 43.8049304, -79.1658374.
The geograpical coordinate of

In [13]:
data_new.head()

Unnamed: 0,postal_code,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.7611,-79.3241
1,M4A,North York,Victoria Village,43.7327,-79.3112
2,M5A,Downtown Toronto,Regent Park,43.6607,-79.3605
3,M5A,Downtown Toronto,Harbourfront,43.6401,-79.3801
4,M6A,North York,Lawrence Manor,43.7221,-79.4375


In [14]:
drop_unknown

[7, 39, 103, 105, 139, 176, 182, 188, 202, 203, 214]

In [15]:
data_new.iloc[7, :]

postal_code                                M7A
Borough                       Downtown Toronto
Neighborhood     Ontario Provincial Government
Latitude                                      
Longitude                                     
Name: 7, dtype: object

In [16]:
import numpy as np
data_new['Latitude'].replace('', np.nan, inplace=True)
data_new.dropna(subset=['Latitude'], inplace=True)
data_new.shape

(206, 5)

In [17]:
data_new.reset_index(drop=True)

Unnamed: 0,postal_code,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.761124,-79.3241
1,M4A,North York,Victoria Village,43.732658,-79.3112
2,M5A,Downtown Toronto,Regent Park,43.660706,-79.3605
3,M5A,Downtown Toronto,Harbourfront,43.640080,-79.3801
4,M6A,North York,Lawrence Manor,43.722079,-79.4375
...,...,...,...,...,...
201,M8Y,Etobicoke,Kingsway Park South East,43.647381,-79.5113
202,M8Z,Etobicoke,Mimico NW,43.616677,-79.4968
203,M8Z,Etobicoke,The Queensway West,43.623618,-79.5148
204,M8Z,Etobicoke,Kingsway Park South West,43.647381,-79.5113


In [18]:
data_new.head()

Unnamed: 0,postal_code,Borough,Neighborhood,Latitude,Longitude
0,M3A,North York,Parkwoods,43.761124,-79.3241
1,M4A,North York,Victoria Village,43.732658,-79.3112
2,M5A,Downtown Toronto,Regent Park,43.660706,-79.3605
3,M5A,Downtown Toronto,Harbourfront,43.64008,-79.3801
4,M6A,North York,Lawrence Manor,43.722079,-79.4375


In [19]:
# now our dataframe is ready with geographical coordinates
data_new.to_csv('geo_data.csv')# save dataframe in geo_data file in csv format