# Import Libraries

In [1]:
import numpy as np
import pandas as pd
import requests

## Data Collection & Extraction

### Demographics Data Loading and Extraction 

In [2]:
 # Get the dataset metadata by passing package_id to the package_search endpoint
url = "https://ckan0.cf.opendata.inter.prod-toronto.ca/api/3/action/package_show" # package_search endpoint
params = { "id": "6e19a90f-971c-46b3-852c-0c48c436d1fc"}  # package_id 
package = requests.get(url, params = params).json()
demographics_meta_data = package["result"]
 
# Get the data by passing the resource_id to the datastore_search endpoint
# See https://docs.ckan.org/en/latest/maintaining/datastore.html for detailed parameters options
 
for idx, resource in enumerate(package["result"]["resources"]):
    if resource["datastore_active"]:  # check if the data store is still active
        url = "https://ckan0.cf.opendata.inter.prod-toronto.ca/api/3/action/datastore_search" # datastore_search endpoint
        p = { "id": resource["id"]}  # resource_id 
        data = requests.get(url, params = p).json() # get the data for the first 100 data samples 
        df = pd.DataFrame(data["result"]["records"])    # save the first 100 data samples as a dataframe 
        demographics_features_description_dict = data['result']['fields'] # get the features description
        for i in range(100, data["result"]["total"], 100): # looping over all the data
            p = { "id": resource["id"], "offset": i} # get the next 100 data samples 
            data = requests.get(url, params = p).json()
            df2 = pd.DataFrame(data["result"]["records"]) # save them in a new dataframe 
            if i == 100:
                demographics_df = df.append(df2)    # if it is the first loop, save them to the old dataframe 
            else:
                demographics_df = demographics_df.append(df2)  # else add them to the main dataframe 
        break
        
demographics_df.reset_index(inplace=True, drop=True)  # reset the index 
print("The shape of the demograhiics 2016 dataset", demographics_df.shape) # print the shape of the final dataframe 
demographics_df.head(5)  # print the first five rows of the dataframe 

The shape of the demograhiics 2016 dataset (2383, 146)


Unnamed: 0,_id,Category,Topic,Data Source,Characteristic,City of Toronto,Agincourt North,Agincourt South-Malvern West,Alderwood,Annex,...,Willowdale West,Willowridge-Martingrove-Richview,Woburn,Woodbine Corridor,Woodbine-Lumsden,Wychwood,Yonge-Eglinton,Yonge-St.Clair,York University Heights,Yorkdale-Glen Park
0,1,Neighbourhood Information,Neighbourhood Information,City of Toronto,Neighbourhood Number,,129,128,20,95,...,37,7,137,64,60,94,100,97,27,31
1,2,Neighbourhood Information,Neighbourhood Information,City of Toronto,TSNS2020 Designation,,No Designation,No Designation,No Designation,No Designation,...,No Designation,No Designation,NIA,No Designation,No Designation,No Designation,No Designation,No Designation,NIA,Emerging Neighbourhood
2,3,Population,Population and dwellings,Census Profile 98-316-X2016001,"Population, 2016",2731571,29113,23757,12054,30526,...,16936,22156,53485,12541,7865,14349,11817,12528,27593,14804
3,4,Population,Population and dwellings,Census Profile 98-316-X2016001,"Population, 2011",2615060,30279,21988,11904,29177,...,15004,21343,53350,11703,7826,13986,10578,11652,27713,14687
4,5,Population,Population and dwellings,Census Profile 98-316-X2016001,Population Change 2011-2016,4.50%,-3.90%,8.00%,1.30%,4.60%,...,12.90%,3.80%,0.30%,7.20%,0.50%,2.60%,11.70%,7.50%,-0.40%,0.80%


In [3]:
print('The number of Categoreis in the data', len(demographics_df.Category.unique()))
demographics_df.Category.unique()

The number of Categoreis in the data 15


array(['Neighbourhood Information', 'Population',
       'Families, households and marital status', 'Language', 'Income',
       'Immigration and citizenship', 'Visible minority', 'Ethnic origin',
       'Aboriginal peoples', 'Education', 'Housing', 'Language of work',
       'Labour', 'Journey to work', 'Mobility'], dtype=object)

In [4]:
Language_features = demographics_df[demographics_df['Category']=='Language']['Characteristic'].unique()

Ethnocultural_diversity_features = demographics_df[demographics_df['Category']=='Ethnocultural diversity']['Characteristic'].unique()

Income_features = demographics_df[demographics_df['Category']=='Income']['Characteristic'].unique()

Immigration_citizenship_features = demographics_df[demographics_df['Category']=='Immigration and citizenship']['Characteristic'].unique()

Families_households_marital_status_features = demographics_df[demographics_df['Category']=='Families, households and marital status']['Characteristic'].unique()

Housing_features = demographics_df[demographics_df['Category']=='Housing']['Characteristic'].unique()

Labour_features = demographics_df[demographics_df['Category']=='Labour']['Characteristic'].unique()

Education_features = demographics_df[demographics_df['Category']=='Education']['Characteristic'].unique()

Language_work_features = demographics_df[demographics_df['Category']=='Language of work']['Characteristic'].unique()

Journey_work_features = demographics_df[demographics_df['Category']=='Journey to work']['Characteristic'].unique()

Mobility_features = demographics_df[demographics_df['Category']=='Mobility']['Characteristic'].unique()

Aboriginal_Peoples_features = demographics_df[demographics_df['Category']=='Aboriginal Peoples']['Characteristic'].unique()

Visible_minority_features = demographics_df[demographics_df['Category']=='Visible minority']['Characteristic'].unique()

In [5]:
demographics_df.drop(['_id', 'Category', 'Topic', 'Data Source'], axis=1 ,inplace=True) # drop the category and topic column
demographics_df = demographics_df.T # transpose the dataframe 

# make the first row as the header
demographics_df = demographics_df.rename(columns=demographics_df.iloc[0]).drop(demographics_df.index[0]) 
demographics_df = demographics_df.reset_index().rename(columns={'index':'Neighbourhood'})
demographics_df.drop(index=demographics_df.index[0], 
                    axis=0, 
                    inplace=True)

# sorting the dataframe alphabetically with the neighbourhood and stored in a new dataframe neighbourhood
demographics_df = demographics_df.sort_values(by=['Neighbourhood']).reset_index(drop=True)
demographics_df.drop(columns=['Neighbourhood Number', 'TSNS2020 Designation'], inplace=True)
neighbourhood_df = demographics_df
neighbourhood_df.head()

Unnamed: 0,Neighbourhood,"Population, 2016","Population, 2011",Population Change 2011-2016,Total private dwellings,Private dwellings occupied by usual residents,Population density per square kilometre,Land area in square kilometres,Children (0-14 years),Youth (15-24 years),...,Intraprovincial migrants,Interprovincial migrants,External migrants,Total - Mobility status 5 years ago - 25% sample data,Non-movers,Movers,Non-migrants,Migrants,Interprovincial migrants.1,External migrants.1
0,Agincourt North,29113,30279,-3.90%,9371,9120,3929,7.41,3840,3705,...,275,75,605,27490,18865,8610,5445,3170,135,2280
1,Agincourt South-Malvern West,23757,21988,8.00%,8535,8136,3034,7.83,3075,3360,...,320,90,490,22325,13565,8775,5610,3145,220,2170
2,Alderwood,12054,11904,1.30%,4732,4616,2435,4.95,1760,1235,...,220,40,70,11370,8235,3130,2200,925,70,245
3,Annex,30526,29177,4.60%,18109,15934,10863,2.81,2360,3750,...,900,385,835,27715,12980,14735,8340,6390,1310,2460
4,Banbury-Don Mills,27695,26918,2.90%,12473,12124,2775,9.98,3605,2730,...,345,75,380,25925,16300,9625,6480,3140,220,1735


### Loading and Extracting the Crime rate data

In [6]:
# Get the dataset metadata by passing package_id to the package_search endpoint
 
url = "https://ckan0.cf.opendata.inter.prod-toronto.ca/api/3/action/package_show" # package_search endpoint
params = { "id": "fc4d95a6-591f-411f-af17-327e6c5d03c7"}  # define the package_id
package = requests.get(url, params = params).json()
crime_rates_meta_data = package["result"] # quering the metadata of the crime rates dataset
 
# Get the data by passing the resource_id to the datastore_search endpoint
# See https://docs.ckan.org/en/latest/maintaining/datastore.html for detailed parameters options

for idx, resource in enumerate(package["result"]["resources"]):
    if resource["datastore_active"]: # check if the data still avaliable or not.
        url = "https://ckan0.cf.opendata.inter.prod-toronto.ca/api/3/action/datastore_search"  # datastore_search endpoint
        p = { "id": resource["id"] }  # the resource_id for the first 100 data point
        p_rest = {"id": resource["id"], 'offset':100} # the resource_id for the last 40 data point
        data = requests.get(url, params = p).json() # the first 100 data point
        data_rest = requests.get(url, params = p_rest).json() # the rest of the data
        crime_rates_features_description_dict = data['result']['fields']  # the features description
        crime_rates_df = pd.DataFrame(data["result"]["records"]) # save the first 100 data samples as a dataframe 
        crime_rates_df =crime_rates_df.append(pd.DataFrame(data_rest["result"]["records"])) # append the rest of the data points to the dataframe 
        
crime_rates_df.sort_values(by='Neighbourhood', inplace=True) # sort the dataframe by the Neighbourhoods
crime_rates_df.reset_index(inplace=True, drop=True) # Reseting the index
crime_rates_df.head()

Unnamed: 0,_id,OBJECTID,Neighbourhood,Hood_ID,F2020_Population_Projection,Assault_2014,Assault_2015,Assault_2016,Assault_2017,Assault_2018,...,Shootings_2019,Shootings_2020,Shooting_Rate2014,Shootings_Rate2015,Shootings_Rate2016,Shootings_Rate2017,Shootings_Rate2018,Shootings_Rate2019,Shootings_Rate2020,geometry
0,80,80,Agincourt North,129,31618,67,77,78,73,80,...,2,2,0.0,0.0,0.0,3.294567,0.0,6.406971,6.325511,"{""type"": ""Polygon"", ""coordinates"": [[[-79.2420..."
1,81,81,Agincourt South-Malvern West,128,27406,104,103,133,111,118,...,2,1,0.0,4.143188,12.23441,15.86043,19.26263,7.495409,3.648836,"{""type"": ""Polygon"", ""coordinates"": [[[-79.2549..."
2,87,87,Alderwood,20,13242,45,47,39,21,30,...,1,0,16.17076,0.0,0.0,0.0,0.0,7.662248,0.0,"{""type"": ""Polygon"", ""coordinates"": [[[-79.5486..."
3,57,57,Annex,95,34680,242,255,279,280,269,...,1,0,0.0,0.0,15.77884,3.085753,6.030818,2.947766,0.0,"{""type"": ""Polygon"", ""coordinates"": [[[-79.3941..."
4,85,85,Banbury-Don Mills,42,31186,60,77,86,111,77,...,2,2,3.554292,0.0,7.034822,3.437844,0.0,6.562757,6.413134,"{""type"": ""Polygon"", ""coordinates"": [[[-79.3305..."


In [8]:
print('The first 52 columns \n', crime_rates_df.columns[0:52])


The first 52 columns 
 Index(['_id', 'OBJECTID', 'Neighbourhood', 'Hood_ID',
       'F2020_Population_Projection', 'Assault_2014', 'Assault_2015',
       'Assault_2016', 'Assault_2017', 'Assault_2018', 'Assault_2019',
       'Assault_2020', 'Assault_Rate2014', 'Assault_Rate2015',
       'Assault_Rate2016', 'Assault_Rate2017', 'Assault_Rate2018',
       'Assault_Rate2019', 'Assault_Rate2020', 'AutoTheft_2014',
       'AutoTheft_2015', 'AutoTheft_2016', 'AutoTheft_2017', 'AutoTheft_2018',
       'AutoTheft_2019', 'AutoTheft_2020', 'AutoTheft_Rate2014',
       'AutoTheft_Rate2015', 'AutoTheft_Rate2016', 'AutoTheft_Rate2017',
       'AutoTheft_Rate2018', 'AutoTheft_Rate2019', 'AutoTheft_Rate2020',
       'BreakAndEnter_2014', 'BreakAndEnter_2015', 'BreakAndEnter_2016',
       'BreakAndEnter_2017', 'BreakAndEnter_2018', 'BreakAndEnter_2019',
       'BreakAndEnter_2020', 'BreakAndEnter_Rate2014',
       'BreakAndEnter_Rate2015', 'BreakAndEnter_Rate2016',
       'BreakAndEnter_Rate2017', 'Bre

In [9]:
print('The last 52 columns \n', crime_rates_df.columns[52:104])


The last 52 columns 
 Index(['Robbery_2019', 'Robbery_2020', 'Robbery_Rate2014', 'RobberyRate_2015',
       'Robbery_Rate2016', 'Robbery_Rate2017', 'Robbery_Rate2018',
       'Robbery_Rate2019', 'Robbery_Rate2020', 'TheftOver_2014',
       'TheftOver_2015', 'TheftOver_2016', 'TheftOver_2017', 'TheftOver_2018',
       'TheftOver_2019', 'TheftOver_2020', 'TheftOver_Rate2014',
       'TheftOver_Rate2015', 'TheftOver_Rate2016', 'TheftOver_Rate2017',
       'TheftOver_Rate2018', 'TheftOver_Rate2019', 'TheftOver_Rate2020',
       'Homicide_2014', 'Homicide_2015', 'Homicide_2016', 'Homicide_2017',
       'Homicide_2018', 'Homicide_2019', 'Homicide_2020', 'Homicide_Rate2014',
       'Homicide_Rate2015', 'Homicide_Rate2016', 'Homicide_Rate2017',
       'Homicide_Rate2018', 'Homicide_Rate2019', 'Homicide_Rate2020',
       'Shootings_2014', 'Shootings_2015', 'Shootings_2016', 'Shootings_2017',
       'Shootings_2018', 'Shootings_2019', 'Shootings_2020',
       'Shooting_Rate2014', 'Shootings_Rate

In [10]:
# the unsed features, the geometry feature is not removed as it will be used in extraction the location data
unsed_columns = ['_id', 'OBJECTID', 'Hood_ID']
crime_rates_df.sort_values(by='Neighbourhood', inplace=True)
crime_rates_df.reset_index(inplace=True, drop=True)
crime_rates_df.drop(columns=unsed_columns, inplace=True)
neighbourhood_df = pd.concat([neighbourhood_df, crime_rates_df], axis=1)
neighbourhood_df.head()

Unnamed: 0,Neighbourhood,"Population, 2016","Population, 2011",Population Change 2011-2016,Total private dwellings,Private dwellings occupied by usual residents,Population density per square kilometre,Land area in square kilometres,Children (0-14 years),Youth (15-24 years),...,Shootings_2019,Shootings_2020,Shooting_Rate2014,Shootings_Rate2015,Shootings_Rate2016,Shootings_Rate2017,Shootings_Rate2018,Shootings_Rate2019,Shootings_Rate2020,geometry
0,Agincourt North,29113,30279,-3.90%,9371,9120,3929,7.41,3840,3705,...,2,2,0.0,0.0,0.0,3.294567,0.0,6.406971,6.325511,"{""type"": ""Polygon"", ""coordinates"": [[[-79.2420..."
1,Agincourt South-Malvern West,23757,21988,8.00%,8535,8136,3034,7.83,3075,3360,...,2,1,0.0,4.143188,12.23441,15.86043,19.26263,7.495409,3.648836,"{""type"": ""Polygon"", ""coordinates"": [[[-79.2549..."
2,Alderwood,12054,11904,1.30%,4732,4616,2435,4.95,1760,1235,...,1,0,16.17076,0.0,0.0,0.0,0.0,7.662248,0.0,"{""type"": ""Polygon"", ""coordinates"": [[[-79.5486..."
3,Annex,30526,29177,4.60%,18109,15934,10863,2.81,2360,3750,...,1,0,0.0,0.0,15.77884,3.085753,6.030818,2.947766,0.0,"{""type"": ""Polygon"", ""coordinates"": [[[-79.3941..."
4,Banbury-Don Mills,27695,26918,2.90%,12473,12124,2775,9.98,3605,2730,...,2,2,3.554292,0.0,7.034822,3.437844,0.0,6.562757,6.413134,"{""type"": ""Polygon"", ""coordinates"": [[[-79.3305..."


### Geospatial data extraction

In [11]:
def extracting_longitudes_lattuides(neighbourhood_long_latt):
    """ Extracting the longitudes and latitudes inforamtion.
    This function extract, clean, and transform the longitude and latitude information
    from the crime rates dataset.
    
    Inputs
    --------
    neighbourhood_long_latt[DataFrame]: The dataframe that contains the raw infomration of 
    longitudes and latitudes for each neighbourhood. it contains two columns the ne
    
    Outputs
    --------
    neighbourhood_long_latt[DataFrame]: dataframe contains the extracted longitudes and 
    latitudes inforamtion.
    """
    neighbourhood_long_latt = long_lat_df['geometry'][0][37:-1]  # selecting the longitudes and latitudes inforamtion
    neighbourhood_long_latt = neighbourhood_long_latt.split(',')  # splitting the list of strings to lists at ,
    neighbourhood_long_latt = [item.replace('[',"") for item in neighbourhood_long_latt]  # remove the square brackets
    neighbourhood_long_latt = [item.replace(']',"") for item in neighbourhood_long_latt]  # remove the square brackets
    
    extracted_long_latt = np.empty((int(len(neighbourhood_long_latt)/2), 2)) # define an empty array to save the latitudes and longitudes
    long = neighbourhood_long_latt[0::2] # extract the longitudes as the even elements of the list  
    lat = neighbourhood_long_latt[1::2]  # extract the latitudes as the odd elements of the list

    extracted_long_latt[:,0] = long
    extracted_long_latt[:,1] = lat  
    extracted_long_latt_sorted = extracted_long_latt[extracted_long_latt[:, 1].argsort()] # sorting the array on lattiudes 
    return extracted_long_latt_sorted[int(len(extracted_long_latt_sorted)/2)]    

In [12]:
long_lat_columns = ['Neighbourhood', 'geometry'] # define the working columns 
long_lat_df = crime_rates_df.loc[:, long_lat_columns] # select the working columns fromt the crime rates dataset 

# apply the cleaning function 
long_lat_df['Long_latt'] = long_lat_df.apply(lambda row : extracting_longitudes_lattuides(row['geometry']), axis = 1) # apply the cleaning function for each row
long_lat_df.head()

Unnamed: 0,Neighbourhood,geometry,Long_latt
0,Agincourt North,"{""type"": ""Polygon"", ""coordinates"": [[[-79.2420...","[-79.24376995555, 43.8082815798025]"
1,Agincourt South-Malvern West,"{""type"": ""Polygon"", ""coordinates"": [[[-79.2549...","[-79.24376995555, 43.8082815798025]"
2,Alderwood,"{""type"": ""Polygon"", ""coordinates"": [[[-79.5486...","[-79.24376995555, 43.8082815798025]"
3,Annex,"{""type"": ""Polygon"", ""coordinates"": [[[-79.3941...","[-79.24376995555, 43.8082815798025]"
4,Banbury-Don Mills,"{""type"": ""Polygon"", ""coordinates"": [[[-79.3305...","[-79.24376995555, 43.8082815798025]"


In [13]:
neighbourhood_df['long_latt'] = long_lat_df['Long_latt']
neighbourhood_df.head()

Unnamed: 0,Neighbourhood,"Population, 2016","Population, 2011",Population Change 2011-2016,Total private dwellings,Private dwellings occupied by usual residents,Population density per square kilometre,Land area in square kilometres,Children (0-14 years),Youth (15-24 years),...,Shootings_2020,Shooting_Rate2014,Shootings_Rate2015,Shootings_Rate2016,Shootings_Rate2017,Shootings_Rate2018,Shootings_Rate2019,Shootings_Rate2020,geometry,long_latt
0,Agincourt North,29113,30279,-3.90%,9371,9120,3929,7.41,3840,3705,...,2,0.0,0.0,0.0,3.294567,0.0,6.406971,6.325511,"{""type"": ""Polygon"", ""coordinates"": [[[-79.2420...","[-79.24376995555, 43.8082815798025]"
1,Agincourt South-Malvern West,23757,21988,8.00%,8535,8136,3034,7.83,3075,3360,...,1,0.0,4.143188,12.23441,15.86043,19.26263,7.495409,3.648836,"{""type"": ""Polygon"", ""coordinates"": [[[-79.2549...","[-79.24376995555, 43.8082815798025]"
2,Alderwood,12054,11904,1.30%,4732,4616,2435,4.95,1760,1235,...,0,16.17076,0.0,0.0,0.0,0.0,7.662248,0.0,"{""type"": ""Polygon"", ""coordinates"": [[[-79.5486...","[-79.24376995555, 43.8082815798025]"
3,Annex,30526,29177,4.60%,18109,15934,10863,2.81,2360,3750,...,0,0.0,0.0,15.77884,3.085753,6.030818,2.947766,0.0,"{""type"": ""Polygon"", ""coordinates"": [[[-79.3941...","[-79.24376995555, 43.8082815798025]"
4,Banbury-Don Mills,27695,26918,2.90%,12473,12124,2775,9.98,3605,2730,...,2,3.554292,0.0,7.034822,3.437844,0.0,6.562757,6.413134,"{""type"": ""Polygon"", ""coordinates"": [[[-79.3305...","[-79.24376995555, 43.8082815798025]"


### Venues data load and extraction 

In [14]:
CLIENT_ID =  # your Foursquare ID

CLIENT_SECRET =  # your Foursquare Secret

VERSION = '20200605' # Foursquare API version
LIMIT = 100 # A default Foursquare API limit value

ACCESS_TOKEN =

print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)

Your credentails:
CLIENT_ID: JYY4M43D3NMOD5NPPWAMDR2KI2YMMWFSE0OGN42PCL1T04MZ
CLIENT_SECRET:L42CIAPTOLFE2ELBEXK3V0ICG1J3JEDQEBN1KAUJAZ5L1LHW


In [15]:
def getting_number_tips(url):
    """getting the number of tips for the gyms in each neighbourhood
    
    Inputs 
    ---------
    url [sting]: The API request URL.
    
    Outputs 
    ---------
    number_tips [int]: The number of tips for the gyms in each neighbourhood.
    
    """
    number_tips = 0 # intiate the number of tips 
    results_list = requests.get(url).json()  # get the results lsit form the API request
    # looping over the items in each the result list
    for result in results_list["response"]['groups'][0]['items']:
       if result['venue']['categories'][0]['name'] == 'Gym':
           gym_id = result['venue']['id']
           url_venue = 'https://api.foursquare.com/v2/venues/{}?client_id={}&client_secret={}&oauth_token={}&v={}'.format(gym_id, 
                                                                                                                          CLIENT_ID, 
                                                                                                                          CLIENT_SECRET,
                                                                                                                          ACCESS_TOKEN, 
                                                                                                                          VERSION)
           result_number_tips = requests.get(url_venue).json() 
           number_tips = number_tips + result_number_tips['response']['venue']['tips']['count'] 
    return number_tips    
    


In [16]:
def getNearbyVenues(Neighbourhood, longitude_latitude, radius):
   
    """ 
    calculating the total number of shops and bussines, total number of gyms and the number of
    tips given to the gyms in each neighbourhood.
    
    Inputs
    -------
    Neighbourhood [Series]: The neighbourhoods in Toronto.
    longitude_latitude [Series]:The longitude latitude of each neighbourhood center.
    radius [int]: The raduis of the search area.
    
    Outputs
    -------
    venues_infomration [DataFrame]: Dataframe that contains the nummber of venues, the number of gyms, and 
                                    the number of tips for the gyms for each neighbourhood in Toronto.
    """
    number_gyms = [] 
    number_venues = []
    number_tips = [] 
    
    # looping over each neighbourhood
    for name, lng_lat in zip(Neighbourhood, longitude_latitude): 
        venues_list = []
        lng = lng_lat[0]
        lat = lng_lat[1]
        
        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(CLIENT_ID, 
                                                                                                                                    CLIENT_SECRET, 
                                                                                                                                    VERSION, 
                                                                                                                                    lat, 
                                                                                                                                    lng, 
                                                                                                                                    radius, 
                                                                                                                                    LIMIT)
        # make the GET request
        results = requests.get(url).json()["response"]['groups'][0]['items']
        
        # return only relevant information for each nearby venue
        venues_list.append([(name ,
                            v['venue']['name'], 
                            v['venue']['categories'][0]['name']) for v in results])
        # create a dataframe that contains the number of total number of shops     
        nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
        nearby_venues.columns = ['Neighbourhood', 'Venue', 'Venue Category']
        number_gyms_per_Neighbourhoods = sum((nearby_venues['Venue Category']=='Gym / Fitness Center') | 
                                            (nearby_venues['Venue Category']=='Gym') | 
                                            (nearby_venues['Venue Category']=='Spa'))
        
        number_gyms = np.append(number_gyms, number_gyms_per_Neighbourhoods)
        number_venues = np.append(number_venues, len(nearby_venues['Venue'].unique())-number_gyms_per_Neighbourhoods)
        number_tips = np.append(number_tips, getting_number_tips(url))
    # create a dataframe for the final values         
    venues_infomration= pd.DataFrame({'Neighbourhood':Neighbourhood,
                                      'number_venues': number_venues,
                                      'number_gyms':number_gyms,
                                      'number_tips':number_tips})
    return venues_infomration

In [17]:
venues_infomration = getNearbyVenues(neighbourhood_df['Neighbourhood'], neighbourhood_df['long_latt'], 1000)
neighbourhood_df['number_gyms'] = venues_infomration['number_gyms']
neighbourhood_df['number_venues'] = venues_infomration['number_venues']
neighbourhood_df['number_tips'] = venues_infomration['number_tips']
neighbourhood_df.head(5)

Unnamed: 0,Neighbourhood,"Population, 2016","Population, 2011",Population Change 2011-2016,Total private dwellings,Private dwellings occupied by usual residents,Population density per square kilometre,Land area in square kilometres,Children (0-14 years),Youth (15-24 years),...,Shootings_Rate2016,Shootings_Rate2017,Shootings_Rate2018,Shootings_Rate2019,Shootings_Rate2020,geometry,long_latt,number_gyms,number_venues,number_tips
0,Agincourt North,29113,30279,-3.90%,9371,9120,3929,7.41,3840,3705,...,0.0,3.294567,0.0,6.406971,6.325511,"{""type"": ""Polygon"", ""coordinates"": [[[-79.2420...","[-79.24376995555, 43.8082815798025]",0.0,26.0,26.0
1,Agincourt South-Malvern West,23757,21988,8.00%,8535,8136,3034,7.83,3075,3360,...,12.23441,15.86043,19.26263,7.495409,3.648836,"{""type"": ""Polygon"", ""coordinates"": [[[-79.2549...","[-79.24376995555, 43.8082815798025]",0.0,34.0,34.0
2,Alderwood,12054,11904,1.30%,4732,4616,2435,4.95,1760,1235,...,0.0,0.0,0.0,7.662248,0.0,"{""type"": ""Polygon"", ""coordinates"": [[[-79.5486...","[-79.24376995555, 43.8082815798025]",1.0,17.0,17.0
3,Annex,30526,29177,4.60%,18109,15934,10863,2.81,2360,3750,...,15.77884,3.085753,6.030818,2.947766,0.0,"{""type"": ""Polygon"", ""coordinates"": [[[-79.3941...","[-79.24376995555, 43.8082815798025]",3.0,63.0,63.0
4,Banbury-Don Mills,27695,26918,2.90%,12473,12124,2775,9.98,3605,2730,...,7.034822,3.437844,0.0,6.562757,6.413134,"{""type"": ""Polygon"", ""coordinates"": [[[-79.3305...","[-79.24376995555, 43.8082815798025]",2.0,14.0,14.0
