In [1]:
# Dependencies# Depen 
import requests
import json
import pandas as pd
# Google developer API key. Create a config.py and put your own google api key to the file.
from config import gkey
import numpy as np

### Fill out the empty county cells using Google API

In [2]:
# Import the raw data
drug_death_raw_df = pd.read_csv("Resources/Accidental_Drug_Related_Deaths_2012-2017.csv")
drug_death_raw_df = drug_death_raw_df[1:]
drug_death_raw_df.columns

Index(['CaseNumber', 'Date', 'Year', 'Sex', 'Race', 'Age', 'Residence City',
       'Residence State', 'Residence County', 'Death City', 'Death State',
       'Death County', 'Location', 'DescriptionofInjury', 'InjuryPlace',
       'ImmediateCauseA', 'Heroin', 'Cocaine', 'Fentanyl', 'Oxycodone',
       'Oxymorphone', 'EtOH', 'Hydrocodone', 'Benzodiazepine', 'Methadone',
       'Amphet', 'Tramad', 'Morphine (not heroin)', 'Other', 'Any Opioid',
       'MannerofDeath', 'AmendedMannerofDeath', 'DeathLoc',
       'DeathLocationCity', 'DeathLocLat', 'DeathLocLong'],
      dtype='object')

In [3]:
# index = 0
for rindex, row in drug_death_raw_df.iterrows():
    death_county = row['Death County']

    if pd.isnull(death_county):
        
        # Use Google geo-api to populate the empty county
        death_city = row['DeathLocationCity']

        # Target city
        target_city = f"{death_city}, CT"

        # Build the endpoint URL
        target_url = "https://maps.googleapis.com/maps/api/geocode/json?" \
            "address=%s&key=%s" % (target_city, gkey)


        try:
            geo_data = requests.get(target_url).json()

            # Print the json
            county_name = ' '.join(geo_data["results"][0]["address_components"][1]['long_name'].split(' ')[:2])

            # Populate the death county cell
            drug_death_raw_df.loc[rindex,'Death County'] = county_name
            
        except:
            print("Failed.")

### Clean the county names

In [4]:
drug_death_raw_df['Death County'].value_counts()

HARTFORD              1022
NEW HAVEN              975
FAIRFIELD              555
NEW LONDON             319
LITCHFIELD             187
Hartford County        181
MIDDLESEX              154
New Haven              140
WINDHAM                116
TOLLAND                100
Fairfield County        99
New London              37
Manchester              29
Windham County          23
Litchfield              16
Wallingford             15
Tolland County          14
Litchfield County       11
Middlesex County        10
Windham                 10
Huntington               8
Colchester               7
Portland                 6
Groton                   6
Montville                4
Deep River               4
Guilford                 4
Waterford                3
Stonington               3
Bethel                   2
East Windsor             2
Killingly                2
Simsbury                 2
Newtown                  2
Watertown                1
Sprague                  1
Plymouth                 1
S

In [5]:
# County names in uppercase
drug_death_raw_df['Death County'] = drug_death_raw_df['Death County'].str.upper()
# Remove 'County' from the county names
remove_county = lambda x: ' '.join(x.split(' ')[:-1]) if 'COUNTY' in x else x
drug_death_raw_df['Death County'] = drug_death_raw_df['Death County'].apply(remove_county)
# Remove whitespace
drug_death_raw_df['Death County'] = drug_death_raw_df['Death County'].str.strip()
# Clean the county cells filled with city names
cities = {'MANCHESTER':'HARTFORD', 'WALLINGFORD':'NEW HAVEN',
         'HUNTINGTON':'FAIRFIELD','COLCHESTER':'NEW LONDON',
         'PORTLAND':'MIDDLESEX', 'GROTON':'NEW LONDON',
         'GUILFORD':'NEW HAVEN', 'MONTVILLE':'NEW LONDON',
         'DEEP RIVER':'MIDDLESEX','STONINGTON':'NEW LONDON',
         'WATERFORD':'NEW LONDON','KILLINGLY':'WINDHAM',
         'SIMSBURY':'HARTFORD', 'EAST WINDSOR':'HARTFORD',
         'NEWTOWN':'FAIRFIELD', 'BETHEL':'FAIRFIELD',
         'WESTBROOK':'MIDDLESEX','HADDAM':'MIDDLESEX',
         'WINCHESTER':'LITCHFIELD','SPRAGUE':'NEW LONDON',
         'GREENWICH':'FAIRFIELD','MADISON':'NEW HAVEN',
         'WATERTOWN':'LITCHFIELD','GRISWOLD':'NEW LONDON',
         'PLYMOUTH':'LITCHFIELD','MANSFIELD':'TOLLAND',
         'CONNECTICUT AVENUE':'HARTFORD', 'SUFFIELD':'HARTFORD'}
replace_cities = lambda x: cities.get(x) if x in cities.keys() else x
drug_death_raw_df['Death County']= drug_death_raw_df['Death County'].apply(replace_cities)

In [6]:
drug_death_df = drug_death_raw_df[drug_death_raw_df['Death County'] != 'USA']

In [7]:
drug_death_df['Death County'].value_counts()

HARTFORD      1238
NEW HAVEN     1135
FAIRFIELD      668
NEW LONDON     381
LITCHFIELD     217
MIDDLESEX      176
WINDHAM        151
TOLLAND        115
Name: Death County, dtype: int64

In [8]:
# save the clean dataframe to csv
drug_death_df.to_csv('Cleaned_county.csv')