In [1]:
import pandas as pd
import numpy as np
import requests
import json

In [13]:
# Global variables :) 
# We'll save the mapping in a python dictionary, kind of like a cache so that we don't continuously hit the google API
lat_lng_mapping = {}

def create_latlng_mapping(column):
    for county in column:
        if county in lat_lng_mapping:
            continue
        cache(county)
        
# Can manually call the cache function to try to get latlng of a location
def cache(location):
    
    countyLookUp = location.lower().split(",")[0] + " New York"
    lat_lng_mapping[location] = {"lat": -1, "lng": -1}
    resp = requests.get('https://maps.googleapis.com/maps/api/geocode/json', params={'address': countyLookUp, 'key': ' AIzaSyB5YqK2Po4xXDlDERlenKSug9SOGNzmfPU'})
    latlng_json = json.loads(resp.text)
    if latlng_json['status'] == 'ZERO_RESULTS' or latlng_json['status'] != 'OK':
        print("Found no results for: {}".format(location))
    try:
        chosen_result = latlng_json["results"][0]
        latlng = chosen_result['geometry']['location']
        lat_lng_mapping[location] = latlng
        return latlng
    except KeyError:
        print("Geolocation data is for: {}".format(location))
    except IndexError:
        print("Geolocation data is missing for: {}".format(location))

# Fix Lat Lng for food inspections

In [23]:
df = pd.read_csv("waterloo_datathon_datasets/count_food_inspections_with_counties_lat_long.csv")

create_latlng_mapping(df["city"])

In [26]:
df["lat_new"] = df["city"].apply(lambda item: lat_lng_mapping[item]["lat"])
df["lng_new"] = df["city"].apply(lambda item: lat_lng_mapping[item]["lng"])

In [28]:
df.to_csv("waterloo_datathon_datasets/count_food_inspections_with_counties_lat_long_fixed.csv")


# Calculate Lat Lng for the demographics dataset 

In [19]:
df = pd.read_csv("waterloo_datathon_datasets/demographics_cleaned.csv")

print(df["geography"])
create_latlng_mapping(df["geography"])

0            Albany
1          Allegany
2             Bronx
3            Broome
4       Cattaraugus
5            Cayuga
6        Chautauqua
7           Chemung
8          Chenango
9           Clinton
10         Columbia
11         Cortland
12         Delaware
13         Dutchess
14             Erie
15            Essex
16         Franklin
17           Fulton
18          Genesee
19           Greene
20         Hamilton
21         Herkimer
22        Jefferson
23            Kings
24            Lewis
25       Livingston
26          Madison
27           Monroe
28       Montgomery
29           Nassau
           ...     
342          Oneida
343        Onondaga
344         Ontario
345          Orange
346         Orleans
347          Oswego
348          Otsego
349          Putnam
350          Queens
351      Rensselaer
352        Richmond
353        Rockland
354    St. Lawrence
355        Saratoga
356     Schenectady
357       Schoharie
358        Schuyler
359          Seneca
360         Steuben


In [15]:
df

Unnamed: 0,year,geography,total_households,median_household_income,mean_household_income,mean_earnings,mean_soc_sec,mean_ret_income,mean_supp_sec_income,mean_cash_pub_income,food_stamp_benefits,population,%_9999_or_less,%_150000_or_more,%_health_insurance,%_private_coverage,%_public_coverage
0,2016,Albany,124108,60904,81603,83232,19131,29496,9341,3743,14305,304048.0,0.061285,0.120637,0.946314,0.781850,0.308313
1,2016,Allegany,18032,44085,54958,54495,17561,19662,9160,3220,2872,47207.0,0.071096,0.030002,0.911983,0.680365,0.390260
2,2016,Bronx,490740,35302,51445,57317,14211,20862,8885,3598,179811,1414859.0,0.156333,0.045391,0.876989,0.434970,0.532936
3,2016,Broome,78738,47744,63302,64554,18249,22972,9310,3492,12867,194413.0,0.077904,0.062803,0.942236,0.689866,0.404083
4,2016,Cattaraugus,31502,43884,55632,56502,17614,18700,9342,2879,5588,77748.0,0.065964,0.034157,0.909091,0.680069,0.408731
5,2016,Cayuga,31039,53114,65579,66822,18117,20376,10148,2977,3990,75490.0,0.059184,0.058668,0.927103,0.721672,0.372592
6,2016,Chautauqua,52718,43211,55820,56322,18130,22205,9517,4112,9948,129115.0,0.084696,0.037615,0.929559,0.657197,0.429052
7,2016,Chemung,34887,49578,64475,67328,17696,21358,9367,3057,5546,84179.0,0.076074,0.064838,0.936540,0.677521,0.408867
8,2016,Chenango,19837,46979,57930,59495,17340,18547,10469,3175,3582,48706.0,0.072390,0.039220,0.931117,0.672710,0.426292
9,2016,Clinton,31648,50502,63804,63140,18305,24391,10340,3360,5760,76963.0,0.071600,0.062942,0.944402,0.718449,0.390032


In [16]:
df["lat"] = df["geography"].apply(lambda item: lat_lng_mapping[item]["lat"])
df["lng"] = df["geography"].apply(lambda item: lat_lng_mapping[item]["lng"])

In [17]:
df

Unnamed: 0,year,geography,total_households,median_household_income,mean_household_income,mean_earnings,mean_soc_sec,mean_ret_income,mean_supp_sec_income,mean_cash_pub_income,food_stamp_benefits,population,%_9999_or_less,%_150000_or_more,%_health_insurance,%_private_coverage,%_public_coverage,lat,lng
0,2016,Albany,124108,60904,81603,83232,19131,29496,9341,3743,14305,304048.0,0.061285,0.120637,0.946314,0.781850,0.308313,42.652579,-73.756232
1,2016,Allegany,18032,44085,54958,54495,17561,19662,9160,3220,2872,47207.0,0.071096,0.030002,0.911983,0.680365,0.390260,42.090065,-78.494189
2,2016,Bronx,490740,35302,51445,57317,14211,20862,8885,3598,179811,1414859.0,0.156333,0.045391,0.876989,0.434970,0.532936,40.844782,-73.864827
3,2016,Broome,78738,47744,63302,64554,18249,22972,9310,3492,12867,194413.0,0.077904,0.062803,0.942236,0.689866,0.404083,42.491656,-74.288354
4,2016,Cattaraugus,31502,43884,55632,56502,17614,18700,9342,2879,5588,77748.0,0.065964,0.034157,0.909091,0.680069,0.408731,42.329229,-78.868090
5,2016,Cayuga,31039,53114,65579,66822,18117,20376,10148,2977,3990,75490.0,0.059184,0.058668,0.927103,0.721672,0.372592,42.918956,-76.726340
6,2016,Chautauqua,52718,43211,55820,56322,18130,22205,9517,4112,9948,129115.0,0.084696,0.037615,0.929559,0.657197,0.429052,42.209774,-79.466844
7,2016,Chemung,34887,49578,64475,67328,17696,21358,9367,3057,5546,84179.0,0.076074,0.064838,0.936540,0.677521,0.408867,42.008407,-76.623837
8,2016,Chenango,19837,46979,57930,59495,17340,18547,10469,3175,3582,48706.0,0.072390,0.039220,0.931117,0.672710,0.426292,42.201464,-75.876633
9,2016,Clinton,31648,50502,63804,63140,18305,24391,10340,3360,5760,76963.0,0.071600,0.062942,0.944402,0.718449,0.390032,43.048403,-75.378503


In [18]:
df.to_csv("waterloo_datathon_datasets/demographics_cleaned_with_latlng.csv")

# Calculate  Lat Lng for the Environmental Radiation Dataset 

In [10]:
df = pd.read_csv("waterloo_datathon_datasets/environmental_radiation.csv")

create_latlng_mapping(df["sample_location"])


Found no results for: SWAMP DRAINAGE TO ERDMAN BROOK
Geolocation data is missing for: SWAMP DRAINAGE TO ERDMAN BROOK
Found no results for: ERDMAN BROOK NORTH OF BURIAL TRENCHES
Geolocation data is missing for: ERDMAN BROOK NORTH OF BURIAL TRENCHES
Found no results for: FRANK'S CREEK - NE OF BURIAL BURIAL TRENCHES
Geolocation data is missing for: FRANK'S CREEK - NE OF BURIAL BURIAL TRENCHES
Found no results for: ERDMAN BROOK DOWNSTREAM OF EQ3
Geolocation data is missing for: ERDMAN BROOK DOWNSTREAM OF EQ3


In [11]:
df["lat"] = df["sample_location"].apply(lambda item: lat_lng_mapping[item]["lat"])
df["lng"] = df["sample_location"].apply(lambda item: lat_lng_mapping[item]["lng"])

In [12]:
df.to_csv("waterloo_datathon_datasets/environmental_radiation_with_latlng.csv")

      location_code sample_type  \
0          5151-001         AIR   
1          5151-001         AIR   
2          5151-001         AIR   
3          5151-001         AIR   
4          5151-001         AIR   
5          5151-001         AIR   
6          5151-001         AIR   
7          5151-001         AIR   
8          5151-001         AIR   
9          5151-001         AIR   
10         5151-001         AIR   
11         5151-001         AIR   
12         5151-001         AIR   
13         5151-001         AIR   
14         5151-001         AIR   
15         5151-001         AIR   
16         5151-001         AIR   
17         5151-001         AIR   
18         5151-001         AIR   
19         5151-001         AIR   
20         5151-001         AIR   
21         5151-001         AIR   
22         5151-001         AIR   
23         5151-001         AIR   
24         5151-001         AIR   
25         5151-001         AIR   
26         5151-001         AIR   
27         5151-001 