# Filtering of Dissemination Blocks and Cultural Amenities

In [1]:
import pandas as pd
import numpy as np

In [2]:
path = '../../data/1_raw/'
cleanpath = '../../data/2_clean/'

# import dissemination blocks
all_cad_DBs = pd.read_csv(path + 'dissemination_blocks/census2016_dissemination_blocks.csv')
# import cultural amenities
all_cad_amenities = pd.read_csv(path + '/amenities/ODCAF_v1.0.csv', encoding='latin-1', index_col=0)

print('Canadian Dissemination Block Frame: \n'); print(all_cad_DBs.info(), '\n'*2)
print('Canadian Cultural Amenities Frame: \n'); print(all_cad_amenities.info(), '\n'*2)


Canadian Dissemination Block Frame: 

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 489676 entries, 0 to 489675
Data columns (total 41 columns):
 #   Column              Non-Null Count   Dtype  
---  ------              --------------   -----  
 0   DBUID               489676 non-null  int64  
 1   DBPOP               489361 non-null  object 
 2   DAUID               489676 non-null  int64  
 3   DAPOP               489361 non-null  object 
 4   CSDUID              489676 non-null  int64  
 5   CSDNAME             489676 non-null  object 
 6   CSDTYPE             489676 non-null  object 
 7   CSDPOP              489361 non-null  object 
 8   CMAUID              276762 non-null  float64
 9   CMAPUID             276762 non-null  float64
 10  CMANAME             276762 non-null  object 
 11  CMATYPE             276762 non-null  object 
 12  CMAPOP              276762 non-null  object 
 13  PRUID               489676 non-null  int64  
 14  PRNAME              489676 non-null  object 
 

## We can filter by metropolitan area (CMANAME) to yield only Vancouver areas

In [3]:
# keep essential columns
all_cad_DBs = all_cad_DBs[['DBUID','CMANAME','CSDNAME','DBPOP','lat','lon']]
all_cad_DBs_renamed = all_cad_DBs.rename(columns = {'DBUID':'id', 'CMANAME':'metropolitan_area', 'CSDNAME':'subdiv', 'DBPOP':'pop'})

# filter by metropolitan area
DBsVan = all_cad_DBs_renamed[all_cad_DBs_renamed['metropolitan_area'] == 'Vancouver']

# save subdivisions
vancouver_subdivisions = set(DBsVan.subdiv.unique())
print('Vancouver subdivisions: \n', vancouver_subdivisions)

# only Vancouver dissemination blocks
DBsVan.head()

Vancouver subdivisions: 
 {'Port Coquitlam', 'Delta', 'Musqueam 2', 'Coquitlam 2', 'West Vancouver', 'Lions Bay', 'Langley 5', 'North Vancouver', 'Tsawwassen', 'Belcarra', 'Coquitlam', 'Capilano 5', 'Seymour Creek 2', 'Richmond', 'Greater Vancouver A', 'Surrey', 'McMillan Island 6', 'Port Moody', 'Coquitlam 1', 'Katzie 2', 'Maple Ridge', 'Katzie 1', 'Barnston Island 3', 'Burrard Inlet 3', 'Pitt Meadows', 'Burnaby', 'Semiahmoo', 'White Rock', 'Anmore', 'Bowen Island', 'Musqueam 4', 'New Westminster', 'Whonnock 1', 'Matsqui 4', 'Langley', 'Mission 1', 'Vancouver'}


Unnamed: 0,id,metropolitan_area,subdiv,pop,lat,lon
442430,59150004004,Vancouver,West Vancouver,35,49.3739,-123.2738
442431,59150004005,Vancouver,West Vancouver,50,49.3746,-123.2757
442432,59150004006,Vancouver,West Vancouver,94,49.3738,-123.2763
442433,59150004011,Vancouver,West Vancouver,0,49.3735,-123.2725
442434,59150004012,Vancouver,West Vancouver,0,49.3725,-123.2729


In [4]:
DBsVan = DBsVan.drop(columns=['metropolitan_area'])
DBsVan.head()

Unnamed: 0,id,subdiv,pop,lat,lon
442430,59150004004,West Vancouver,35,49.3739,-123.2738
442431,59150004005,West Vancouver,50,49.3746,-123.2757
442432,59150004006,West Vancouver,94,49.3738,-123.2763
442433,59150004011,West Vancouver,0,49.3735,-123.2725
442434,59150004012,West Vancouver,0,49.3725,-123.2729


## Filter Greater Vancouver Amenities

In [5]:
# filter BC amenities to only those in Vancouver Metropolitan Area filter set
gva_amenities_cluttered = all_cad_amenities[all_cad_amenities.CSD_Name.isin(vancouver_subdivisions)]


# keep only necessary columns
columns_to_keep = ['Latitude','Longitude', 'ODCAF_Facility_Type','Facility_Name', 'CSD_Name', 'CSDUID']
gva_amenities = gva_amenities_cluttered.loc[:, columns_to_keep].reset_index()

gva_amenities = gva_amenities.rename(columns={'Index':'id',
                                              'Latitude':'lat',
                                              'Longitude':'lon',
                                              'ODCAF_Facility_Type':'type',
                                              'Facility_Name':'name',
                                              'CSD_Name':'city',
                                              'CSDUID':'city_id'})

gva_amenities

Unnamed: 0,id,lat,lon,type,name,city,city_id
0,10,49.1763542,-123.112783,museum,12 Service Battalion Museum,Richmond,5915015
1,15,49.261938,-123.151123,museum,15th Field Artillery Regiment Museum And Archives,Vancouver,5915022
2,24,49.278786,-123.098796,museum,221A Artist Run Centre,Vancouver,5915022
3,41,49.2210003,-123.0091848,artist,7302754 Canada Inc,Burnaby,5915025
4,97,49.14709735,-122.6467963,heritage or historic site,Abc Heritage Preschool And Child Care,Langley,5915001
...,...,...,...,...,...,...,...
441,9694,49.2772414,-123.0660915,theatre/performance and concert hall,Wise Club Hall,Vancouver,5915022
442,9728,49.282541,-123.107579,theatre/performance and concert hall,Woodward's Atrium,Vancouver,5915022
443,9766,49.2786795,-123.0707458,theatre/performance and concert hall,York Theatre,Vancouver,5915022
444,9780,49.2600654,-123.1151069,theatre/performance and concert hall,Yuk Yuk's Comedy Club,Vancouver,5915022


## Summary Information about Cultural Amenities in Vancouver

In [6]:
## Summary
# gva_amenities.groupby('ODCAF_Facility_Type').count().sort_values(by='Index', ascending=False).Index
ams_by_type = gva_amenities.type.value_counts()
ams_by_city = gva_amenities.city.value_counts()
n_ams = len(gva_amenities)

print(f'Number of Van-MA Cultural Amenities: {n_ams}',
      f'Number of Cultural Amenities by Type: \n{ams_by_type}',
      f'Number of Cultural Amenities by City: \n{ams_by_city}', sep='\n\n')


Number of Van-MA Cultural Amenities: 446

Number of Cultural Amenities by Type: 
gallery                                 99
museum                                  92
library or archives                     89
theatre/performance and concert hall    76
artist                                  48
heritage or historic site               28
miscellaneous                            6
art or cultural centre                   6
festival site                            2
Name: type, dtype: int64

Number of Cultural Amenities by City: 
Vancouver              195
New Westminster         80
Langley                 39
Burnaby                 21
North Vancouver         19
Greater Vancouver A     17
Surrey                  17
Richmond                16
Port Moody               7
Maple Ridge              7
Coquitlam                7
West Vancouver           5
Delta                    5
White Rock               4
Bowen Island             3
Pitt Meadows             2
Port Coquitlam           1
Capilano

## Save dataframes to clean directory

In [7]:
DBsVan.to_csv(cleanpath + 'vancouver_db.csv', index=False)
gva_amenities.to_csv(cleanpath + 'vancouver_facilities.csv', index = False)