In [1]:
import pandas as pd
import json

In [34]:
employment_df = (pd.read_csv('parsed_employment_data.txt', sep='\t')
                 .set_index("NEIGHBORHOOD"))

In [27]:
employment_df

Unnamed: 0,NEIGHBORHOOD,NUM_EST,NUM_EMP,PAY_ANN,PAY_QTR1,ANN_PAY_DIV_EMP
0,Allston,539,8304,596490,139035,71.831647
1,Area 2/MIT,1173,52093,9139616,2335424,175.448064
2,Back Bay,1979,67148,9874471,2775575,147.055326
3,Baldwin,1108,44379,3420910,807398,77.083981
4,Beacon Hill,1087,19066,2139545,594518,112.217822
5,Brighton,656,12837,915721,217918,71.334502
6,Cambridge Highlands,1108,44379,3420910,807398,77.083981
7,Cambridgeport,1173,52093,9139616,2335424,175.448064
8,Charlestown,503,12664,999317,230729,78.91006
9,Chinatown,963,33148,4271232,1225276,128.853385


In [35]:
with open("../geographic/Cambridge_Neighborhoods.geojson", "r") as oFile:
    cambridge_json = json.load(oFile)
with open("../geographic/Boston_Neighborhoods.geojson", "r") as oFile:
    boston_json = json.load(oFile)
# Drop keys that are not shared between the two files
boston_json.pop('crs')
boston_json.pop('name')

'Boston_Neighborhood_Boundaries_approximated_by_2020_Census_Block_Groups'

In [36]:
neighborhood_set = []
# Iterate through Boston geojson
for feature_d in boston_json['features']:
    neighborhood_set.append(feature_d['properties']['blockgr2020_ctr_neighb_name'])
# Iterate through Cambridge geojson
for feature_d in cambridge_json['features']:
    neighborhood_set.append(feature_d['properties']['NAME'])

boston_json['features'] = [d for d in boston_json['features'] if
                           d['properties']['blockgr2020_ctr_neighb_name']!='Harbor Islands']

neighborhood_set.remove('Harbor Islands')
neighborhood_set

['Allston',
 'Back Bay',
 'Beacon Hill',
 'Brighton',
 'Charlestown',
 'Chinatown',
 'Dorchester',
 'Downtown',
 'East Boston',
 'Fenway',
 'Hyde Park',
 'Jamaica Plain',
 'Longwood',
 'Mattapan',
 'Mission Hill',
 'North End',
 'Roslindale',
 'Roxbury',
 'South Boston',
 'South Boston Waterfront',
 'South End',
 'West End',
 'West Roxbury',
 'The Port',
 'Neighborhood Nine',
 'Wellington-Harrington',
 'Mid-Cambridge',
 'North Cambridge',
 'Cambridge Highlands',
 'Strawberry Hill',
 'West Cambridge',
 'Riverside',
 'Cambridgeport',
 'Area 2/MIT',
 'East Cambridge',
 'Baldwin']

In [37]:
from copy import deepcopy
# Neighborhoods to include
include_hood_set = neighborhood_set
# Standardize keys for neighborhood in both json dictionaries
std_boston_features_l = []
std_cambridge_features_l = []
for feature_d in boston_json['features']:
    std_feature_d = deepcopy(feature_d)
    std_feature_d.pop('properties')
    properties_d = {}
    neighborhood = feature_d['properties']['blockgr2020_ctr_neighb_name']
    if neighborhood not in include_hood_set:
        continue
    properties_d['neighborhood'] = neighborhood
    if neighborhood in employment_df.index:
        ann_pay_div_emp_value = employment_df.loc[neighborhood, 'ANN_PAY_DIV_EMP']
    else:
        ann_pay_div_emp_value = None
        print(f"No data available for the neighborhood: {neighborhood}")
    properties_d['avg_salary'] = ann_pay_div_emp_value
    #properties_d.update(new_average_dict[neighborhood])
    std_feature_d['properties'] = properties_d
    std_boston_features_l.append(std_feature_d)

for feature_d in cambridge_json['features']:
    std_feature_d = deepcopy(feature_d)
    std_feature_d.pop('properties')
    properties_d = {}
    neighborhood = feature_d['properties']['NAME']
    if neighborhood not in include_hood_set:
        continue
    properties_d['neighborhood'] = neighborhood
    if neighborhood in employment_df.index:
        ann_pay_div_emp_value = employment_df.loc[neighborhood, 'ANN_PAY_DIV_EMP']
    else:
        ann_pay_div_emp_value = None
        print(f"No data available for the neighborhood: {neighborhood}")
    properties_d['avg_salary'] = ann_pay_div_emp_value
    #properties_d.update(new_average_dict[neighborhood])
    std_feature_d['properties'] = properties_d
    std_cambridge_features_l.append(std_feature_d)

In [38]:
combined_features_l = std_boston_features_l + std_cambridge_features_l
combined_json = {}
combined_json['type'] = 'FeatureCollection'
combined_json['features'] = combined_features_l

In [39]:
id = 1
for c in combined_json['features']:
    c['id'] = id
    id += 1

In [40]:
got = [x["properties"] for x in combined_json["features"]]
got

[{'neighborhood': 'Allston', 'avg_salary': 71.83164739884393},
 {'neighborhood': 'Back Bay', 'avg_salary': 147.05532554953237},
 {'neighborhood': 'Beacon Hill', 'avg_salary': 112.21782230147907},
 {'neighborhood': 'Brighton', 'avg_salary': 71.33450183064579},
 {'neighborhood': 'Charlestown', 'avg_salary': 78.91006001263423},
 {'neighborhood': 'Chinatown', 'avg_salary': 128.85338481959695},
 {'neighborhood': 'Dorchester', 'avg_salary': 59.683840847913864},
 {'neighborhood': 'Downtown', 'avg_salary': 188.8833296001792},
 {'neighborhood': 'East Boston', 'avg_salary': 59.68693009118541},
 {'neighborhood': 'Fenway', 'avg_salary': 81.23144565599719},
 {'neighborhood': 'Hyde Park', 'avg_salary': 55.42134944612286},
 {'neighborhood': 'Jamaica Plain', 'avg_salary': 69.7178024833959},
 {'neighborhood': 'Longwood', 'avg_salary': 78.51204242827254},
 {'neighborhood': 'Mattapan', 'avg_salary': 43.01632438125329},
 {'neighborhood': 'Mission Hill', 'avg_salary': 57.42377328785096},
 {'neighborhood': 

In [41]:
with open("Boston_Cambridge_salary.geojson", 'w') as oFile:
    json.dump(combined_json, oFile)