In [1]:
import pandas as pd

In [2]:
import json
import os
import csv

In [3]:
os.getcwd()

'/repos/EHDP-data/indicators'

## Reading in indicators file

In [104]:
test = json.load(open("data/2120.json"))

In [105]:
df = pd.DataFrame.from_dict(test)

In [106]:
df.to_csv("2120.csv", index = False)

In [107]:
df.shape

(768, 9)

In [108]:
len(df['GeoID'].unique())

47

In [109]:
df['GeoID'].unique()

array([  1,   2,   3,   4,   5, 101, 102, 103, 104, 105, 106, 107, 201,
       202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 301, 302, 303,
       304, 305, 306, 307, 308, 309, 310, 401, 402, 403, 404, 405, 406,
       407, 408, 409, 410, 501, 502, 503, 504])

## Reading in csv file as index

In [7]:
csv_file = '../geography/GeoLookup.csv'

In [8]:
def read_csv_as_index(csv_file):
    index = {}
    with open(csv_file, 'r') as file:
        reader = csv.DictReader(file)
        for row in reader:
            index[row['GeoID']] = row
    return index

In [9]:
index = read_csv_as_index(csv_file)

## Merging files

In [110]:
topojson_file = '../geography/UHF42.topo.json'
json_file = 'data/2120.json'
output_file = 'joined_data_2120.topojson'

In [117]:
def join_json_to_topojson(json_file, topojson_file, csv_file, output_file):
    # Step 1: Read the CSV file and store index values
    index = read_csv_as_index(csv_file)

    # Step 2: Read the JSON file
    with open(json_file, 'r') as file:
        json_data = json.load(file)

    # Step 3: Read the TopoJSON file
    with open(topojson_file, 'r') as file:
        topojson_data = json.load(file)

    # Step 4: Iterate through objects in TopoJSON
    for object_key in topojson_data['objects']:
        object_data = topojson_data['objects'][object_key]
        if object_data['type'] == 'GeometryCollection':
            # Iterate through geometries in GeometryCollection
            for geometry in object_data['geometries']:
                # Step 5: Retrieve corresponding data from JSON using index values
                id_value = geometry['properties']['GEOCODE']
                if id_value in index:
                    json_entry = index[id_value]
                    json_entry['value'] = json_data[int(id_value)]['Value']
                    json_entry['measure_id'] = json_data[int(id_value)]['MeasureID']
                    json_entry['time'] = json_data[int(id_value)]['Time']
                    # Step 6: Add retrieved data to TopoJSON properties
                    geometry['properties'] = json_entry

    # Step 7: Write updated TopoJSON data to a new file
    with open(output_file, 'w') as file:
        json.dump(topojson_data, file)

In [118]:
join_json_to_topojson(json_file, topojson_file, csv_file, output_file)


## Checking data file for indicators

In [161]:
df['GeoID'].unique()

array([  1,   2,   3,   4,   5, 101, 102, 103, 104, 105, 106, 107, 201,
       202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 301, 302, 303,
       304, 305, 306, 307, 308, 309, 310, 401, 402, 403, 404, 405, 406,
       407, 408, 409, 410, 501, 502, 503, 504])

In [162]:
df.head()

Unnamed: 0,MeasureID,GeoType,GeoID,Time,ban_summary_flag,Value,CI,Note,DisplayValue
0,651,Borough,1,2015-2017,0,21.0,,,21.0
1,651,Borough,1,2012-2014,0,15.4881,,,15.5
2,651,Borough,1,2009-2011,0,18.8,,,18.8
3,651,Borough,1,2005-2007,0,31.3,,,31.3
4,651,Borough,2,2015-2017,0,18.2,,,18.2


## Read joined data to check

In [28]:
joined = json.load(open("joined_data_2120.topojson"))

In [29]:
properties = [geometry['properties'] for geometry in joined['objects']['collection']['geometries']]

In [30]:
joined_df = pd.DataFrame(properties)

In [31]:
joined_df.head()

Unnamed: 0,GeoType,GeoTypeDesc,GeoTypeShortDesc,GeoID,Name,BoroID,Borough,Lat,Long,value,measure_id,time,id,NTA2020,GEOCODE,GEONAME
0,Subboro,Sub-borough/PUMA,Neighborhood (Sub-borough/PUMA),1,Mott Haven/Hunts Point,1,Bronx,40.8092,-73.8991,15.4881,651.0,2012-2014,,,,
1,Subboro,Sub-borough/PUMA,Neighborhood (Sub-borough/PUMA),2,Morrisania/East Tremont,1,Bronx,40.8454,-73.8876,18.8,651.0,2009-2011,,,,
2,Subboro,Sub-borough/PUMA,Neighborhood (Sub-borough/PUMA),3,Highbridge/South Concourse,1,Bronx,40.8325,-73.9216,31.3,651.0,2005-2007,,,,
3,Subboro,Sub-borough/PUMA,Neighborhood (Sub-borough/PUMA),4,University Heights/Fordham,1,Bronx,40.8522,-73.9097,18.2,651.0,2015-2017,,,,
4,Subboro,Sub-borough/PUMA,Neighborhood (Sub-borough/PUMA),5,Kingsbridge Heights/Mosholu,1,Bronx,40.8703,-73.8909,14.8043,651.0,2012-2014,,,,


## Checking Topojson file

In [88]:
topojson_file = '../geography/CD.topo.json'

In [89]:
with open(topojson_file, 'r') as file:
        cd_data = json.load(file)

In [90]:
properties2 = [geometry['properties'] for geometry in cd_data['objects']['collection']['geometries']]

In [49]:
topojson_data['objects']['collection']['geometries'][0]['properties']['GEOCODE']

50101

In [50]:
topo_data = pd.DataFrame(properties2)

In [51]:
topo_data.head()

Unnamed: 0,id,NTA2020,GEOCODE,GEONAME
0,1,BX0101,50101,Mott Haven-Port Morris
1,2,BX0102,50102,Melrose
2,3,QN0151,50151,Rikers Island
3,4,BX0201,50201,Hunts Point
4,5,BX0202,50202,Longwood


In [91]:
cd_df = pd.DataFrame(properties2)

In [93]:
cd_df['GEOCODE']

Unnamed: 0,GEOCODE,GEONAME,BOROUGH
0,101,Financial District (CD1),Manhattan
1,102,Greenwich Village and Soho (CD2),Manhattan
2,103,Lower East Side and Chinatown (CD3),Manhattan
3,104,Clinton and Chelsea (CD4),Manhattan
4,105,Midtown (CD5),Manhattan


In [97]:
len(cd_df['GEOCODE'].unique())

65

In [73]:
with open(json_file, 'r') as file:
        json_data = json.load(file)

## Check geography data

In [164]:
geo = pd.read_csv(csv_file)

In [118]:
sub = geo[geo['GeoType'] == 'Subboro']

In [115]:
ones

Unnamed: 0,GeoType,GeoTypeDesc,GeoTypeShortDesc,GeoID,Name,BoroID,Borough,Lat,Long
2,Borough,Borough,Borough,2,Brooklyn,2.0,Brooklyn,40.6447,-73.9479
83,Subboro,Sub-borough/PUMA,Neighborhood (Sub-borough/PUMA),2,Morrisania/East Tremont,1.0,Bronx,40.8454,-73.8876
