In [1]:
import pandas as pd
import requests

In [3]:
# load data
addr = pd.read_csv('../housing_inspections_an.csv', encoding='iso-8859-1')
addr = addr[['BLDGNO_confidential', 'STNAME', 'STTYPE', 'QUAD']]
addr = addr.rename(columns = {'BLDGNO_confidential': 'BLDG'})
print(addr.shape)
addr.head()

(9962, 4)


Unnamed: 0,BLDG,STNAME,STTYPE,QUAD
0,4008.0,2ND,ST,SW
1,39.0,MISSISSIPPI,AVE,SE
2,3345.0,23RD,ST,SE
3,3600.0,B,ST,SE
4,531.0,KENNEDY,ST,NW


In [4]:
# assess missing values and drop duplicates
print(addr.isnull().sum())
addr = addr.dropna()
addr = addr.drop_duplicates()
addr.shape

BLDG      14
STNAME    14
STTYPE    23
QUAD      16
dtype: int64


(3282, 4)

In [5]:
# convert building number to int
addr['BLDG'] = addr['BLDG'].astype(int).astype(str)
addr.head()

Unnamed: 0,BLDG,STNAME,STTYPE,QUAD
0,4008,2ND,ST,SW
1,39,MISSISSIPPI,AVE,SE
2,3345,23RD,ST,SE
3,3600,B,ST,SE
4,531,KENNEDY,ST,NW


In [6]:
# review http://citizenatlas.dc.gov/newwebservices/locationverifier.asmx?op=findLocation2
host = 'http://citizenatlas.dc.gov'
url = '/newwebservices/locationverifier.asmx/findLocation2'
headers = {'Content-Type': 'application/x-www-form-urlencoded',
           'Content-Length': 'length'}

# access, parse, and transform
def get_address_info(addr_str):
    '''sends a post request to MAR API to retrieve address information
       accepts str, returns dict'''
    data = {'str': addr_str, 'f': 'json'}
    r = requests.post(url=host+url, data=data, headers=headers)
    parsed = r.json()
    info = parsed['returnDataset']['Table1'][0]
    return info

In [7]:
# test function
test_addr = '2722 olive st  nw'
print(get_address_info(test_addr))

{'ADDRESS_ID': 272108, 'MARID': 272108, 'STATUS': 'ACTIVE', 'FULLADDRESS': '2722 OLIVE STREET NW', 'ADDRNUM': 2722, 'ADDRNUMSUFFIX': None, 'STNAME': 'OLIVE', 'STREET_TYPE': 'STREET', 'QUADRANT': 'NW', 'CITY': 'WASHINGTON', 'STATE': 'DC', 'XCOORD': 395095.84, 'YCOORD': 137567.67, 'SSL': '1214    0025', 'ANC': 'ANC 2E', 'PSA': 'Police Service Area 206', 'WARD': 'Ward 2', 'NBHD_ACTION': ' ', 'CLUSTER_': 'Cluster 4', 'POLDIST': 'Police District - Second District', 'ROC': 'Police Sector 2D2', 'CENSUS_TRACT': '000100', 'VOTE_PRCNCT': 'Precinct 5', 'SMD': 'SMD 2E06', 'ZIPCODE': 20007, 'NATIONALGRID': '18S UJ 21677 08351', 'ROADWAYSEGID': 3143, 'FOCUS_IMPROVEMENT_AREA': 'NA', 'HAS_ALIAS': 'N', 'HAS_CONDO_UNIT': 'N', 'HAS_RES_UNIT': 'Y', 'HAS_SSL': 'Y', 'LATITUDE': 38.9059535, 'LONGITUDE': -77.05654082, 'STREETVIEWURL': 'http://maps.google.com/maps?z=16&layer=c&cbll=38.9059535,-77.05654082&cbp=11,165.254806948118,,0,2.09', 'RES_TYPE': 'RESIDENTIAL', 'WARD_2002': 'Ward 2', 'WARD_2012': 'Ward 2',

In [8]:
# automate info retrieval
addr['ADDRESS'] = addr.BLDG + ' ' + addr.STNAME + ' ' + addr.STTYPE + ' ' + addr.QUAD
addr.head()

Unnamed: 0,BLDG,STNAME,STTYPE,QUAD,ADDRESS
0,4008,2ND,ST,SW,4008 2ND ST SW
1,39,MISSISSIPPI,AVE,SE,39 MISSISSIPPI AVE SE
2,3345,23RD,ST,SE,3345 23RD ST SE
3,3600,B,ST,SE,3600 B ST SE
4,531,KENNEDY,ST,NW,531 KENNEDY ST NW


In [9]:
# convert dataframe to list of dictionaries
addr_dict = addr.to_dict(orient = 'records')
addr_dict[:2]

[{'BLDG': '4008',
  'STNAME': '2ND',
  'STTYPE': 'ST',
  'QUAD': 'SW',
  'ADDRESS': '4008 2ND ST SW'},
 {'BLDG': '39',
  'STNAME': 'MISSISSIPPI',
  'STTYPE': 'AVE',
  'QUAD': 'SE',
  'ADDRESS': '39 MISSISSIPPI AVE SE'}]

In [10]:
# add info to address dictionaries (this takes awhile so only doing the first 10)
addr_info = []
for row in addr_dict[:10]:
        info = get_address_info(row['ADDRESS'])
        addr_info.append({**row, **info})
addr_info[:1]

[{'BLDG': '4008',
  'STNAME': '2ND',
  'STTYPE': 'ST',
  'QUAD': 'SW',
  'ADDRESS': '4008 2ND ST SW',
  'ADDRESS_ID': 28309,
  'MARID': 28309,
  'STATUS': 'ACTIVE',
  'FULLADDRESS': '4008 2ND STREET SW',
  'ADDRNUM': 4008,
  'ADDRNUMSUFFIX': None,
  'STREET_TYPE': 'STREET',
  'QUADRANT': 'SW',
  'CITY': 'WASHINGTON',
  'STATE': 'DC',
  'XCOORD': 398951.03,
  'YCOORD': 129310.89,
  'SSL': '6173    0040',
  'ANC': 'ANC 8D',
  'PSA': 'Police Service Area 708',
  'WARD': 'Ward 8',
  'NBHD_ACTION': ' ',
  'CLUSTER_': 'Cluster 39',
  'POLDIST': 'Police District - Seventh District',
  'ROC': 'Police Sector 7D3',
  'CENSUS_TRACT': '009807',
  'VOTE_PRCNCT': 'Precinct 124',
  'SMD': 'SMD 8D07',
  'ZIPCODE': 20032,
  'NATIONALGRID': '18S UJ 25350 00011',
  'ROADWAYSEGID': 7627,
  'FOCUS_IMPROVEMENT_AREA': 'NA',
  'HAS_ALIAS': 'N',
  'HAS_CONDO_UNIT': 'N',
  'HAS_RES_UNIT': 'N',
  'HAS_SSL': 'Y',
  'LATITUDE': 38.83158613,
  'LONGITUDE': -77.01208114,
  'STREETVIEWURL': 'http://maps.google.com/ma

In [12]:
# store
addr_df = pd.DataFrame(addr_info)
addr_df.to_csv('../address_info.csv', index=False)
addr_df.head()

Unnamed: 0,ADDRESS,ADDRESS_ID,ADDRNUM,ADDRNUMSUFFIX,ANC,ANC_2002,ANC_2012,BLDG,CENSUS_TRACT,CITY,...,STREETVIEWURL,STREET_TYPE,STTYPE,VOTE_PRCNCT,WARD,WARD_2002,WARD_2012,XCOORD,YCOORD,ZIPCODE
0,4008 2ND ST SW,28309,4008,,ANC 8D,ANC 8D,ANC 8D,4008,9807,WASHINGTON,...,http://maps.google.com/maps?z=16&layer=c&cbll=...,STREET,ST,Precinct 124,Ward 8,Ward 8,Ward 8,398951.03,129310.89,20032
1,39 MISSISSIPPI AVE SE,29809,39,,ANC 8C,ANC 8C,ANC 8C,39,9803,WASHINGTON,...,http://maps.google.com/maps?z=16&layer=c&cbll=...,AVENUE,AVE,Precinct 124,Ward 8,Ward 8,Ward 8,399456.46,129421.69,20032
2,3345 23RD ST SE,146744,3345,,ANC 8E,ANC 8B,ANC 8E,3345,7403,WASHINGTON,...,http://maps.google.com/maps?z=16&layer=c&cbll=...,STREET,ST,Precinct 116,Ward 8,Ward 8,Ward 8,402480.44,131220.15,20020
3,3600 B ST SE,301154,3600,,ANC 7F,ANC 7A,ANC 7F,3600,7703,WASHINGTON,...,http://maps.google.com/maps?z=16&layer=c&cbll=...,STREET,ST,Precinct 107,Ward 7,Ward 7,Ward 7,404151.19,135586.07,20019
4,531 KENNEDY ST NW,246689,531,,ANC 4D,ANC 4D,ANC 4D,531,2101,WASHINGTON,...,http://maps.google.com/maps?z=16&layer=c&cbll=...,STREET,ST,Precinct 56,Ward 4,Ward 4,Ward 4,398173.02,143189.06,20011
