In [1]:
import sys
sys.path.append('../..')

from common.toronto_api import TorontoOpenDataAPI
from common.population_metrics import extract_population_metrics, calculate_service_need_index

import pandas as pd
import geojson
from datetime import datetime

In [2]:
# Initialize API client
client = TorontoOpenDataAPI()

---

## Neighbourhood Profiles


In [3]:
# Get package metadata using dataset id
package = client.get_package("neighbourhood-profiles")
# Check resources in package
# client.show_resources_info(package)

In [None]:
# Name of resource identified in prev cell
resource_name = 'neighbourhood-profiles-2021-158-model'

# get url for download
for r in package['resources']:
    if (r['name']==resource_name):
        url = r['url']
# use url to download excel file into df
nb_df = pd.read_excel(url)
nb_df.head()

Unnamed: 0,Neighbourhood Name,West Humber-Clairville,Mount Olive-Silverstone-Jamestown,Thistletown-Beaumond Heights,Rexdale-Kipling,Elms-Old Rexdale,Kingsview Village-The Westway,Willowridge-Martingrove-Richview,Humber Heights-Westmount,Edenbridge-Humber Valley,...,Harbourfront-CityPlace,St Lawrence-East Bayfront-The Islands,Church-Wellesley,Downtown Yonge East,Bay-Cloverhill,Yonge-Bay Corridor,Junction-Wallace Emerson,Dovercourt Village,North Toronto,South Eglinton-Davisville
0,Neighbourhood Number,1,2,3,4,5,6,7,8,9,...,165,166,167,168,169,170,171,172,173,174
1,TSNS 2020 Designation,Not an NIA or Emerging Neighbourhood,Neighbourhood Improvement Area,Neighbourhood Improvement Area,Not an NIA or Emerging Neighbourhood,Neighbourhood Improvement Area,Neighbourhood Improvement Area,Not an NIA or Emerging Neighbourhood,Emerging Neighbourhood,Not an NIA or Emerging Neighbourhood,...,Not an NIA or Emerging Neighbourhood,Not an NIA or Emerging Neighbourhood,Not an NIA or Emerging Neighbourhood,Not an NIA or Emerging Neighbourhood,Not an NIA or Emerging Neighbourhood,Not an NIA or Emerging Neighbourhood,Not an NIA or Emerging Neighbourhood,Not an NIA or Emerging Neighbourhood,Not an NIA or Emerging Neighbourhood,Not an NIA or Emerging Neighbourhood
2,Total - Age groups of the population - 25% sam...,33300,31345,9850,10375,9355,22005,22445,10005,15190,...,28135,31285,22320,17700,16670,12645,23180,12380,15885,22735
3,0 to 14 years,4295,5690,1495,1575,1610,3915,3500,1370,2070,...,2065,2285,895,1055,745,970,3075,1365,1315,2190
4,0 to 4 years,1460,1650,505,505,440,1245,1065,395,520,...,1030,1045,495,480,370,500,1135,445,535,910


In [6]:
pop_metrics = extract_population_metrics(nb_df)
pop_metrics.head()

Unnamed: 0,neighbourhood_name,neighbourhood_number,total_population,youth_15_19,youth_20_24,seniors_65_plus,low_income,median_income_2019,median_income_2020,youth_15_24,youth_15_24_pct,seniors_65_plus_pct,low_income_pct
0,West Humber-Clairville,1,33300,1860,3175,5360,2420,28600,31600,5035,15.12012,16.096096,7.267267
1,Mount Olive-Silverstone-Jamestown,2,31345,2280,2675,4170,2960,25000,28400,4955,15.807944,13.303557,9.443292
2,Thistletown-Beaumond Heights,3,9850,570,745,1740,710,28000,30600,1315,13.350254,17.664975,7.208122
3,Rexdale-Kipling,4,10375,515,715,1850,800,29000,31800,1230,11.855422,17.831325,7.710843
4,Elms-Old Rexdale,5,9355,635,685,1390,720,29200,32400,1320,14.110102,14.858365,7.696419


In [7]:
pop_metrics['service_need_index'] = calculate_service_need_index(pop_metrics)
pop_metrics.head()

Unnamed: 0,neighbourhood_name,neighbourhood_number,total_population,youth_15_19,youth_20_24,seniors_65_plus,low_income,median_income_2019,median_income_2020,youth_15_24,youth_15_24_pct,seniors_65_plus_pct,low_income_pct,service_need_index
0,West Humber-Clairville,1,33300,1860,3175,5360,2420,28600,31600,5035,15.12012,16.096096,7.267267,0.407958
1,Mount Olive-Silverstone-Jamestown,2,31345,2280,2675,4170,2960,25000,28400,4955,15.807944,13.303557,9.443292,0.397935
2,Thistletown-Beaumond Heights,3,9850,570,745,1740,710,28000,30600,1315,13.350254,17.664975,7.208122,0.167127
3,Rexdale-Kipling,4,10375,515,715,1850,800,29000,31800,1230,11.855422,17.831325,7.710843,0.169075
4,Elms-Old Rexdale,5,9355,635,685,1390,720,29200,32400,1320,14.110102,14.858365,7.696419,0.166332


In [8]:
# Look at possible priority neighbourhoods, based on service need index
pop_metrics.sort_values('service_need_index', ascending=False).head(10)

Unnamed: 0,neighbourhood_name,neighbourhood_number,total_population,youth_15_19,youth_20_24,seniors_65_plus,low_income,median_income_2019,median_income_2020,youth_15_24,youth_15_24_pct,seniors_65_plus_pct,low_income_pct,service_need_index
0,West Humber-Clairville,1,33300,1860,3175,5360,2420,28600,31600,5035,15.12012,16.096096,7.267267,0.407958
1,Mount Olive-Silverstone-Jamestown,2,31345,2280,2675,4170,2960,25000,28400,4955,15.807944,13.303557,9.443292,0.397935
22,Glenfield-Jane Heights,25,30020,2080,2275,5150,2600,26000,29000,4355,14.506995,17.15523,8.660893,0.377727
149,St Lawrence-East Bayfront-The Islands,166,31285,600,1990,3995,3320,44800,50400,2590,8.278728,12.769698,10.612114,0.376133
23,York University Heights,27,28255,1545,3155,3720,3165,27600,30600,4700,16.634224,13.165811,11.201557,0.375619
84,Annex,95,29300,865,2145,5785,3570,43600,45600,3010,10.273038,19.744027,12.1843,0.368151
120,West Hill,136,28140,1730,2135,4545,2680,27400,31000,3865,13.734897,16.151386,9.52381,0.359211
148,Harbourfront-CityPlace,165,28135,365,2250,1525,3285,45600,50800,2615,9.294473,5.420295,11.675849,0.351533
106,Wexford/Maryvale,119,28345,1525,1965,4760,2220,30400,32800,3490,12.312577,16.793085,7.832069,0.350883
124,Golfdale-Cedarbrae-Woburn,141,27085,1735,2200,4200,2525,25600,29400,3935,14.528337,15.506738,9.322503,0.350624


In [9]:
# For tableau join
pop_metrics['Area Long Code'] = pop_metrics['neighbourhood_number'].apply(lambda x: f'{x:03}')

In [10]:
# Save to csv
pop_metrics.to_csv(
    '../data/processed/neighbourhood_metrics.csv',
    header=True,
    index=False
)

---

## Wellbeing Youth

### Mental Health


In [10]:
# Get package metadata using dataset id
package = client.get_package("wellbeing-youth-mental-health")
# Get data from active datastore
mh_df = client.get_resource_data(package)
mh_df.head()

Unnamed: 0,_id,OBJECTID,AGENCY_NAME,ORGANIZATION_ADDRESS,NEIGHBOURHOOD,OFFICE_PHONE,EMAIL,WEBSITE,ELIGIBILITY,DESCRIPTION_SERVICE,...,DATE_UPDATED,ADDRESS_POINT_ID,X,Y,LONGITUDE,LATITUDE,ADDRESS_FULL,MUNICIPALITY,POSTAL_CODE,geometry
0,1,1236,St Michael's Hospital,"21 McGill St, Toronto, ON M5B 1H3","Church-Yonge Corridor, 75",Central Intake 416-864-5120,steps@smh.ca,"<a href=""http://www.stmichaelshospital.com/pro...",Youth 16-23 years experiencing a first episode...,"Assessment * treatment * support with school, ...",...,2013-10-21T04:00:00,840654,,,,,21 McGill St,former Toronto,M5B 1H3,"{""type"": ""Point"", ""coordinates"": [-79.38137399..."
1,2,1237,St Michael's Hospital,"30 Bond St, Toronto, ON M5B 1W8","Church-Yonge Corridor, 75",Office 416-864-5346 ; Emergency Department 416...,,"<a href=""http://www.stmichaelshospital.com"" ta...",People who need medical care for serious illne...,Psychiatric emergencies -- Monday-Sunday 24 ho...,...,2013-10-16T04:00:00,773891,,,,,30 Bond St,former Toronto,M5B 1W8,"{""type"": ""Point"", ""coordinates"": [-79.37767928..."
2,3,1238,WoodGreen Community Services,"815 Danforth Ave, Ste 300, Toronto, ON M4J...","Danforth Village-Toronto, 66",416-645-6000 ext 2100/2200,newcomerinfo@woodgreen.org,"<a href=""http://www.woodgreen.org"" target=""_bl...",Newcomers (immigrants and refugees) * Mentorsh...,"<b>Settlement services</b> -- orientation, int...",...,2016-01-25T05:00:00,7569237,,,,,815 Danforth Ave,former Toronto,M4J 1L2,"{""type"": ""Point"", ""coordinates"": [-79.34097126..."
3,4,1239,Bangladeshi Canadian Community Services,"2899 Danforth Ave, Toronto, ON M4C 1M3","East End-Danforth, 62",416-699-4484,nakter@bangladeshi.ca,"<a href=""http://www.bangladeshi.ca"" target=""_b...",Open to all including immigrants and refugees ...,Settlement services * information and referral...,...,2015-09-15T04:00:00,790475,,,,,2899 Danforth Ave,former Toronto,M4C 1M3,"{""type"": ""Point"", ""coordinates"": [-79.29397796..."
4,5,1240,Harriet Tubman Community Organization,"1761 Sheppard Ave E, Main Fl, Street Level, To...","Henry Farm, 53",416-496-2042,info@tubmancommunity.org,"<a href=""http://www.tubmancommunity.org"" targe...","Focus on African-Canadian children, youth and ...",Social and recreational programs * leadership ...,...,2015-07-15T04:00:00,30013252,,,,,1761 Sheppard Ave E,North York,M2J 0A5,"{""type"": ""Point"", ""coordinates"": [-79.34303694..."


In [11]:
srv_columns = [
    'OBJECTID',
    'AGENCY_NAME',
    'ORGANIZATION_ADDRESS',
    'NEIGHBOURHOOD',
    'LEGAL_STATUS',
    'DATE_UPDATED',
    'ADDRESS_FULL',
    'MUNICIPALITY',
    'POSTAL_CODE',
    'geometry'
]

In [12]:
mh_df = mh_df[srv_columns]
mh_df.head()

Unnamed: 0,OBJECTID,AGENCY_NAME,ORGANIZATION_ADDRESS,NEIGHBOURHOOD,LEGAL_STATUS,DATE_UPDATED,ADDRESS_FULL,MUNICIPALITY,POSTAL_CODE,geometry
0,1236,St Michael's Hospital,"21 McGill St, Toronto, ON M5B 1H3","Church-Yonge Corridor, 75",(50) Non Profit,2013-10-21T04:00:00,21 McGill St,former Toronto,M5B 1H3,"{""type"": ""Point"", ""coordinates"": [-79.38137399..."
1,1237,St Michael's Hospital,"30 Bond St, Toronto, ON M5B 1W8","Church-Yonge Corridor, 75",(50) Non Profit,2013-10-16T04:00:00,30 Bond St,former Toronto,M5B 1W8,"{""type"": ""Point"", ""coordinates"": [-79.37767928..."
2,1238,WoodGreen Community Services,"815 Danforth Ave, Ste 300, Toronto, ON M4J...","Danforth Village-Toronto, 66",(50) Non Profit ; (51) Registered Charity,2016-01-25T05:00:00,815 Danforth Ave,former Toronto,M4J 1L2,"{""type"": ""Point"", ""coordinates"": [-79.34097126..."
3,1239,Bangladeshi Canadian Community Services,"2899 Danforth Ave, Toronto, ON M4C 1M3","East End-Danforth, 62",(50) Non Profit ; (51) Registered Charity,2015-09-15T04:00:00,2899 Danforth Ave,former Toronto,M4C 1M3,"{""type"": ""Point"", ""coordinates"": [-79.29397796..."
4,1240,Harriet Tubman Community Organization,"1761 Sheppard Ave E, Main Fl, Street Level, To...","Henry Farm, 53",(50) Non Profit,2015-07-15T04:00:00,1761 Sheppard Ave E,North York,M2J 0A5,"{""type"": ""Point"", ""coordinates"": [-79.34303694..."


In [13]:
mh_df['wellbeing_youth_type'] = 'mental-health'


### Concurrent Disorder Programs


In [14]:
# Get package metadata using dataset id
package = client.get_package("wellbeing-youth-concurrent-disorder-programs")
# Get data from active datastore
cdp_df = client.get_resource_data(package)
cdp_df.head()


Unnamed: 0,_id,OBJECTID,AGENCY_NAME,ORGANIZATION_ADDRESS,NEIGHBOURHOOD,OFFICE_PHONE,EMAIL,WEBSITE,ELIGIBILITY,DESCRIPTION_SERVICE,...,DATE_UPDATED,ADDRESS_POINT_ID,X,Y,LONGITUDE,LATITUDE,ADDRESS_FULL,MUNICIPALITY,POSTAL_CODE,geometry
0,1,1243,"Accommodation, Information and Support","720 Spadina Ave, Ste 316, Toronto, ON M5S 2T9","University, 79",416-504-3610,mbastidas@aistoronto.ca,"<a href=""http://www.aistoronto.ca"" target=""_bl...","Individuals 16 years and older, single or with...","Permanent supportive housing * bachelor, one a...",...,2015-05-07T04:00:00,870176,,,,,720 Spadina Ave,former Toronto,M5S 2T9,"{""type"": ""Point"", ""coordinates"": [-79.40361735..."
1,2,1244,Bellwood Health Services,"1020 McNicoll Ave, Toronto, ON M1W 2J6","Steeles, 116",416-495-0926,info@bellwood.ca,"<a href=""http://www.bellwood.ca"" target=""_blan...","Women and men 19 years and older, with alcohol...",Residential and outpatient addiction treatment...,...,2014-10-15T04:00:00,9311782,,,,,1020 McNicoll Ave,Scarborough,M1W 2J6,"{""type"": ""Point"", ""coordinates"": [-79.33600977..."
2,3,1245,Humber River Hospital,"1235 Wilson Ave, Toronto, ON M3M 0B2","Downsview-Roding-CFB, 26",416-242-1000,,"<a href=""http://www.hrh.ca"" target=""_blank"">ww...",,Acute care hospital * medical and surgical inp...,...,2015-10-16T04:00:00,30063647,,,,,1235 Wilson Ave,North York,M3M 0B2,"{""type"": ""Point"", ""coordinates"": [-79.48921562..."
3,4,1246,Jean Tweed Centre,"215 Evans Ave, Toronto, ON M8Z 1J5","Mimico, 17",416-255-7359,info@jeantweed.com,"<a href=""http://www.jeantweed.com"" target=""_bl...",Women 16 years and older who have problems wit...,Intensive 3 week residential and day treatment...,...,2016-01-07T05:00:00,10998869,,,,,215 Evans Ave,Etobicoke,M8Z 1J5,"{""type"": ""Point"", ""coordinates"": [-79.51552302..."
4,5,1247,"University Health Network, Toronto Western Hos...","399 Bathurst St, East Wing, 9th Fl, Toronto, O...","Trinity-Bellwoods, 81",416-603-5974,,"<a href=""http://www.uhn.ca"" target=""_blank"">ww...",Portuguese speaking people with a mental healt...,Linguistically and culturally appropriate ment...,...,2014-01-31T05:00:00,9086232,,,,,399 Bathurst St,former Toronto,M5T 2S8,"{""type"": ""Point"", ""coordinates"": [-79.40568847..."


In [15]:
cdp_df = cdp_df[srv_columns]
cdp_df.head()

Unnamed: 0,OBJECTID,AGENCY_NAME,ORGANIZATION_ADDRESS,NEIGHBOURHOOD,LEGAL_STATUS,DATE_UPDATED,ADDRESS_FULL,MUNICIPALITY,POSTAL_CODE,geometry
0,1243,"Accommodation, Information and Support","720 Spadina Ave, Ste 316, Toronto, ON M5S 2T9","University, 79",(50) Non Profit ; (51) Registered Charity,2015-05-07T04:00:00,720 Spadina Ave,former Toronto,M5S 2T9,"{""type"": ""Point"", ""coordinates"": [-79.40361735..."
1,1244,Bellwood Health Services,"1020 McNicoll Ave, Toronto, ON M1W 2J6","Steeles, 116",(50) Non Profit,2014-10-15T04:00:00,1020 McNicoll Ave,Scarborough,M1W 2J6,"{""type"": ""Point"", ""coordinates"": [-79.33600977..."
2,1245,Humber River Hospital,"1235 Wilson Ave, Toronto, ON M3M 0B2","Downsview-Roding-CFB, 26",(50) Non Profit ; (51) Registered Charity,2015-10-16T04:00:00,1235 Wilson Ave,North York,M3M 0B2,"{""type"": ""Point"", ""coordinates"": [-79.48921562..."
3,1246,Jean Tweed Centre,"215 Evans Ave, Toronto, ON M8Z 1J5","Mimico, 17",(50) Non Profit ; (51) Registered Charity,2016-01-07T05:00:00,215 Evans Ave,Etobicoke,M8Z 1J5,"{""type"": ""Point"", ""coordinates"": [-79.51552302..."
4,1247,"University Health Network, Toronto Western Hos...","399 Bathurst St, East Wing, 9th Fl, Toronto, O...","Trinity-Bellwoods, 81",(50) Non Profit ; (51) Registered Charity,2014-01-31T05:00:00,399 Bathurst St,former Toronto,M5T 2S8,"{""type"": ""Point"", ""coordinates"": [-79.40568847..."


In [16]:
cdp_df['wellbeing_youth_type'] = 'concurrent-disorder-programs'

### All wellbeing youth datasets in one df


In [17]:
srv_df = pd.concat(
    [mh_df, cdp_df],
    axis=0,
    ignore_index=True
)
srv_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 125 entries, 0 to 124
Data columns (total 11 columns):
 #   Column                Non-Null Count  Dtype 
---  ------                --------------  ----- 
 0   OBJECTID              125 non-null    int64 
 1   AGENCY_NAME           125 non-null    object
 2   ORGANIZATION_ADDRESS  125 non-null    object
 3   NEIGHBOURHOOD         123 non-null    object
 4   LEGAL_STATUS          124 non-null    object
 5   DATE_UPDATED          125 non-null    object
 6   ADDRESS_FULL          125 non-null    object
 7   MUNICIPALITY          125 non-null    object
 8   POSTAL_CODE           125 non-null    object
 9   geometry              125 non-null    object
 10  wellbeing_youth_type  125 non-null    object
dtypes: int64(1), object(10)
memory usage: 10.9+ KB


In [18]:
# convert geojson geometry to list of dictionaries
points_list = [
        dict([
                i.split(': ')
                for i in s.replace('"', '')[1:-1].split(', ', 1)
        ])
        for s in srv_df['geometry']
    ]
for p in points_list:
    p['coordinates'] = p['coordinates'][1:-1].split(', ')
# extract coordinates and add them to df
srv_df[['longitude','latitude']] = pd.DataFrame(
    pd.json_normalize(points_list)['coordinates'].tolist(),
    index=srv_df.index
)
# drop geometry column
srv_df.drop(columns=['geometry'], inplace=True)
# match format of neighbourhood column
srv_df['NEIGHBOURHOOD'] = srv_df['NEIGHBOURHOOD'].str.replace(', ', ' (') + ')'
srv_df.head()


Unnamed: 0,OBJECTID,AGENCY_NAME,ORGANIZATION_ADDRESS,NEIGHBOURHOOD,LEGAL_STATUS,DATE_UPDATED,ADDRESS_FULL,MUNICIPALITY,POSTAL_CODE,wellbeing_youth_type,longitude,latitude
0,1236,St Michael's Hospital,"21 McGill St, Toronto, ON M5B 1H3",Church-Yonge Corridor (75),(50) Non Profit,2013-10-21T04:00:00,21 McGill St,former Toronto,M5B 1H3,mental-health,-79.3813739919446,43.6598728550589
1,1237,St Michael's Hospital,"30 Bond St, Toronto, ON M5B 1W8",Church-Yonge Corridor (75),(50) Non Profit,2013-10-16T04:00:00,30 Bond St,former Toronto,M5B 1W8,mental-health,-79.3776792804321,43.6536468859287
2,1238,WoodGreen Community Services,"815 Danforth Ave, Ste 300, Toronto, ON M4J...",Danforth Village-Toronto (66),(50) Non Profit ; (51) Registered Charity,2016-01-25T05:00:00,815 Danforth Ave,former Toronto,M4J 1L2,mental-health,-79.3409712694603,43.6794389537811
3,1239,Bangladeshi Canadian Community Services,"2899 Danforth Ave, Toronto, ON M4C 1M3",East End-Danforth (62),(50) Non Profit ; (51) Registered Charity,2015-09-15T04:00:00,2899 Danforth Ave,former Toronto,M4C 1M3,mental-health,-79.2939779674297,43.6896991337791
4,1240,Harriet Tubman Community Organization,"1761 Sheppard Ave E, Main Fl, Street Level, To...",Henry Farm (53),(50) Non Profit,2015-07-15T04:00:00,1761 Sheppard Ave E,North York,M2J 0A5,mental-health,-79.3430369482099,43.7756095392188


In [19]:
# Count services by municipality
srv_df.groupby('MUNICIPALITY').count()['OBJECTID']

MUNICIPALITY
East York          1
Etobicoke          8
North York        17
Scarborough       17
York               6
former Toronto    76
Name: OBJECTID, dtype: int64

In [20]:
# dataset with repeated data by year, for tableau display
years = pd.DataFrame({
    'year': [*range(2014, datetime.today().year + 1)],
    'key': 0
})
srv_df['key'] = 0
srv_df = (srv_df
    .merge(years, on='key', how='outer')
    .drop(columns=['key'])
)
srv_df.head()

Unnamed: 0,OBJECTID,AGENCY_NAME,ORGANIZATION_ADDRESS,NEIGHBOURHOOD,LEGAL_STATUS,DATE_UPDATED,ADDRESS_FULL,MUNICIPALITY,POSTAL_CODE,wellbeing_youth_type,longitude,latitude,year
0,1236,St Michael's Hospital,"21 McGill St, Toronto, ON M5B 1H3",Church-Yonge Corridor (75),(50) Non Profit,2013-10-21T04:00:00,21 McGill St,former Toronto,M5B 1H3,mental-health,-79.3813739919446,43.6598728550589,2014
1,1236,St Michael's Hospital,"21 McGill St, Toronto, ON M5B 1H3",Church-Yonge Corridor (75),(50) Non Profit,2013-10-21T04:00:00,21 McGill St,former Toronto,M5B 1H3,mental-health,-79.3813739919446,43.6598728550589,2015
2,1236,St Michael's Hospital,"21 McGill St, Toronto, ON M5B 1H3",Church-Yonge Corridor (75),(50) Non Profit,2013-10-21T04:00:00,21 McGill St,former Toronto,M5B 1H3,mental-health,-79.3813739919446,43.6598728550589,2016
3,1236,St Michael's Hospital,"21 McGill St, Toronto, ON M5B 1H3",Church-Yonge Corridor (75),(50) Non Profit,2013-10-21T04:00:00,21 McGill St,former Toronto,M5B 1H3,mental-health,-79.3813739919446,43.6598728550589,2017
4,1236,St Michael's Hospital,"21 McGill St, Toronto, ON M5B 1H3",Church-Yonge Corridor (75),(50) Non Profit,2013-10-21T04:00:00,21 McGill St,former Toronto,M5B 1H3,mental-health,-79.3813739919446,43.6598728550589,2018


In [21]:
# Save to csv
srv_df.to_csv(
    '../data/processed/mh_services.csv',
    header=True,
    index=False
)

---

## Mental Health Apprehensions


In [22]:
# Get package metadata using dataset id
package = client.get_package("mental-health-apprehensions")
# Check resources in package
# client.show_resources_info(package)

In [23]:
# Get data from active datastore resource
apr_df = client.get_resource_data(package)
apr_df.head()

Unnamed: 0,_id,EVENT_UNIQUE_ID,REPORT_DATE,REPORT_YEAR,REPORT_MONTH,REPORT_DOW,REPORT_DOY,REPORT_DAY,REPORT_HOUR,OCC_DATE,...,OCC_HOUR,DIVISION,PREMISES_TYPE,APPREHENSION_TYPE,SEX,AGE_COHORT,HOOD_158,NEIGHBOURHOOD_158,HOOD_140,NEIGHBOURHOOD_140
0,1,GO-20141262056,2014-01-01,2014,January,Wednesday,1,1,11.0,2014-01-01,...,11.0,D13,House,Mha Sec 17 (Power Of App),Male,25 to 34,092,Corso Italia-Davenport (92),092,Corso Italia-Davenport (92)
1,2,GO-20141263993,2014-01-01,2014,January,Wednesday,1,1,19.0,2014-01-01,...,19.0,D12,Apartment,Mha Sec 17 (Power Of App),Male,18 to 24,115,Mount Dennis (115),115,Mount Dennis (115)
2,3,GO-20141261310,2014-01-01,2014,January,Wednesday,1,1,8.0,2014-01-01,...,6.0,NSA,Outside,Mha Sec 17 (Power Of App),Female,55 to 64,NSA,NSA,NSA,NSA
3,4,GO-20141259983,2014-01-01,2014,January,Wednesday,1,1,,2014-01-01,...,,D23,House,Mha Sec 17 (Power Of App),Female,45 to 54,007,Willowridge-Martingrove-Richview (7),007,Willowridge-Martingrove-Richview (7)
4,5,GO-20141263946,2014-01-01,2014,January,Wednesday,1,1,19.0,2014-01-01,...,19.0,D42,House,Mha Sec 17 (Power Of App),Male,55 to 64,144,Morningside Heights (144),131,Rouge (131)


In [24]:
apr_df = apr_df[[
    'EVENT_UNIQUE_ID',
    'REPORT_DATE',
    'OCC_DATE',
    'OCC_YEAR',
    'DIVISION',
    'PREMISES_TYPE',
    'APPREHENSION_TYPE',
    'SEX',
    'AGE_COHORT',
    'HOOD_158',
    'NEIGHBOURHOOD_158'
]]

In [25]:
apr_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 112314 entries, 0 to 112313
Data columns (total 11 columns):
 #   Column             Non-Null Count   Dtype 
---  ------             --------------   ----- 
 0   EVENT_UNIQUE_ID    112314 non-null  object
 1   REPORT_DATE        112314 non-null  object
 2   OCC_DATE           112314 non-null  object
 3   OCC_YEAR           112314 non-null  int64 
 4   DIVISION           112314 non-null  object
 5   PREMISES_TYPE      112314 non-null  object
 6   APPREHENSION_TYPE  112314 non-null  object
 7   SEX                112314 non-null  object
 8   AGE_COHORT         112314 non-null  object
 9   HOOD_158           112314 non-null  object
 10  NEIGHBOURHOOD_158  112314 non-null  object
dtypes: int64(1), object(10)
memory usage: 9.4+ MB


In [26]:
# Keep only apprehensions that occurred 2014 or later
apr_df = apr_df.loc[apr_df['OCC_YEAR'] >= 2014]
apr_df['OCC_YEAR'].unique()

array([2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023, 2024])

In [27]:
# Save to csv
apr_df.to_csv(
    '../data/processed/mh_appr.csv',
    header=True,
    index=False
)


---

## Neighbourhood geometry data

In [28]:
with open('../data/raw/Neighbourhoods - 4326.geojson') as f:
    gj = geojson.load(f)

In [29]:
for feature in gj['features']:
    # keep only necessary properties
    d = feature['properties']
    feature['properties'] = {
        k: v for k, v in d.items()
        if k in [
            'OBJECTID',
            'AREA_LONG_CODE',
            'AREA_DESC',
            'CLASSIFICATION',
            'CLASSIFICATION_CODE'
        ]
    }


In [30]:
# output data
with open('../data/processed/neighbourhoods.geojson', 'w') as f:
    geojson.dump(gj, f)