In [1]:
import requests
import pandas as pd
from tqdm import tqdm

In [2]:
prospect_addresses = pd.read_csv('data/prospect_addresses.csv')

dim_asset = pd.read_csv('data/DimAsset.csv')

prospect_addresses = pd.merge(prospect_addresses, dim_asset[['oslPropertyID', 'Latitude', 'Longitude']], left_on='osl_property_id', right_on='oslPropertyID', how='left')
prospect_addresses = prospect_addresses.rename(columns={'Latitude': 'asset_lat', 'Longitude': 'asset_lon'})
prospect_addresses.drop_duplicates(inplace=True)

In [3]:
georgia_addresses = prospect_addresses[prospect_addresses['employer_state'] == 'GA'].reset_index(drop=True)

In [4]:
def get_coords(address):
    url = "http://localhost:7070/search"
    params = {
        "q": address,
        "format": "json",
        "limit": 1
    }
    headers = {"User-Agent": "nominatim-simple-client"}
    
    r = requests.get(url, params=params, headers=headers)
    r.raise_for_status()
    data = r.json()
    
    if not data:
        return None
    
    return float(data[0]["lat"]), float(data[0]["lon"])

In [6]:
print(get_coords('626 Dekalb Ave SE, Atlanta GA 30312'))

None


In [10]:
for i, row in tqdm(georgia_addresses.iterrows(), total=len(georgia_addresses), desc="Geocoding"):

    try:
        lat, lon = get_coords(f"{row['applicant_address']} {row['applicant_zip_code']}")
        georgia_addresses.loc[i, 'applicant_lat'] = lat
        georgia_addresses.loc[i, 'applicant_lon'] = lon
    except Exception as e:
        # tqdm.write(f"Row {i} applicant failed: {f"{row['applicant_address']} {row['applicant_zip_code']}"}")
        continue

    try:
        lat, lon = get_coords(f"{row['employer_address']} {row['employer_zip_code']}")
        georgia_addresses.loc[i, 'employer_lat'] = lat
        georgia_addresses.loc[i, 'employer_lon'] = lon
    except Exception as e:
        # tqdm.write(f"Row {i} employer failed: {e}")
        continue

    if (i + 1) % 100 == 0:
        georgia_addresses.to_csv("data/georgia_addresses.csv", index=False)
        tqdm.write(f"Progress saved at row {i+1}")

georgia_addresses.to_csv("data/georgia_addresses.csv", index=False)


Geocoding:   1%|          | 101/8887 [00:18<35:58,  4.07it/s]

Progress saved at row 100


Geocoding:   2%|▏         | 201/8887 [00:42<23:09,  6.25it/s]  

Progress saved at row 200


Geocoding:  10%|█         | 900/8887 [02:54<23:27,  5.68it/s]  

Progress saved at row 900


Geocoding:  11%|█▏        | 1001/8887 [03:10<17:02,  7.71it/s]

Progress saved at row 1000


Geocoding:  14%|█▎        | 1201/8887 [03:43<24:26,  5.24it/s]

Progress saved at row 1200


Geocoding:  15%|█▍        | 1300/8887 [04:00<23:28,  5.39it/s]

Progress saved at row 1300


Geocoding:  16%|█▌        | 1400/8887 [04:19<24:13,  5.15it/s]

Progress saved at row 1400


Geocoding:  17%|█▋        | 1500/8887 [04:37<25:50,  4.77it/s]

Progress saved at row 1500


Geocoding:  19%|█▉        | 1700/8887 [05:11<21:13,  5.64it/s]

Progress saved at row 1700


Geocoding:  23%|██▎       | 2001/8887 [06:03<23:08,  4.96it/s]

Progress saved at row 2000


Geocoding:  26%|██▌       | 2301/8887 [06:53<15:32,  7.06it/s]

Progress saved at row 2300


Geocoding:  27%|██▋       | 2401/8887 [07:11<20:12,  5.35it/s]

Progress saved at row 2400


Geocoding:  28%|██▊       | 2501/8887 [07:29<20:27,  5.20it/s]

Progress saved at row 2500


Geocoding:  30%|███       | 2700/8887 [08:01<12:36,  8.17it/s]

Progress saved at row 2700


Geocoding:  32%|███▏      | 2800/8887 [08:19<20:10,  5.03it/s]

Progress saved at row 2800


Geocoding:  33%|███▎      | 2901/8887 [08:36<18:11,  5.48it/s]

Progress saved at row 2900


Geocoding:  34%|███▍      | 3001/8887 [08:53<16:32,  5.93it/s]

Progress saved at row 3000


Geocoding:  35%|███▍      | 3100/8887 [09:10<24:11,  3.99it/s]

Progress saved at row 3100


Geocoding:  36%|███▌      | 3200/8887 [09:31<19:32,  4.85it/s]

Progress saved at row 3200


Geocoding:  37%|███▋      | 3301/8887 [09:47<14:09,  6.57it/s]

Progress saved at row 3300


Geocoding:  42%|████▏     | 3701/8887 [10:59<16:59,  5.09it/s]

Progress saved at row 3700


Geocoding:  44%|████▍     | 3901/8887 [11:34<17:31,  4.74it/s]

Progress saved at row 3900


Geocoding:  45%|████▌     | 4001/8887 [11:50<15:12,  5.35it/s]

Progress saved at row 4000


Geocoding:  47%|████▋     | 4200/8887 [12:28<19:08,  4.08it/s]

Progress saved at row 4200


Geocoding:  48%|████▊     | 4300/8887 [12:48<15:01,  5.09it/s]

Progress saved at row 4300


Geocoding:  51%|█████     | 4501/8887 [13:24<15:26,  4.73it/s]

Progress saved at row 4500


Geocoding:  54%|█████▍    | 4801/8887 [14:17<13:36,  5.01it/s]

Progress saved at row 4800


Geocoding:  55%|█████▌    | 4901/8887 [14:34<08:56,  7.43it/s]

Progress saved at row 4900


Geocoding:  56%|█████▋    | 5000/8887 [14:53<12:49,  5.05it/s]

Progress saved at row 5000


Geocoding:  61%|██████    | 5401/8887 [15:55<09:33,  6.07it/s]

Progress saved at row 5400


Geocoding:  62%|██████▏   | 5501/8887 [16:12<09:03,  6.23it/s]

Progress saved at row 5500


Geocoding:  63%|██████▎   | 5601/8887 [16:27<10:39,  5.14it/s]

Progress saved at row 5600


Geocoding:  64%|██████▍   | 5701/8887 [16:44<09:23,  5.65it/s]

Progress saved at row 5700


Geocoding:  65%|██████▌   | 5801/8887 [17:00<09:44,  5.28it/s]

Progress saved at row 5800


Geocoding:  66%|██████▋   | 5900/8887 [17:15<08:19,  5.97it/s]

Progress saved at row 5900


Geocoding:  68%|██████▊   | 6001/8887 [17:29<07:46,  6.18it/s]

Progress saved at row 6000


Geocoding:  70%|██████▉   | 6201/8887 [18:01<08:42,  5.14it/s]

Progress saved at row 6200


Geocoding:  74%|███████▍  | 6600/8887 [19:06<08:15,  4.62it/s]

Progress saved at row 6600


Geocoding:  77%|███████▋  | 6800/8887 [19:39<05:32,  6.27it/s]

Progress saved at row 6800


Geocoding:  78%|███████▊  | 6901/8887 [19:57<06:03,  5.47it/s]

Progress saved at row 6900


Geocoding:  82%|████████▏ | 7301/8887 [21:05<07:04,  3.73it/s]

Progress saved at row 7300


Geocoding:  83%|████████▎ | 7401/8887 [21:22<04:56,  5.01it/s]

Progress saved at row 7400


Geocoding:  89%|████████▉ | 7900/8887 [22:46<02:56,  5.61it/s]

Progress saved at row 7900


Geocoding:  91%|█████████ | 8101/8887 [23:08<01:00, 12.89it/s]

Progress saved at row 8100


Geocoding:  92%|█████████▏| 8202/8887 [23:15<00:54, 12.63it/s]

Progress saved at row 8200


Geocoding:  93%|█████████▎| 8304/8887 [23:23<00:35, 16.41it/s]

Progress saved at row 8300


Geocoding:  95%|█████████▍| 8402/8887 [23:30<00:36, 13.15it/s]

Progress saved at row 8400


Geocoding:  97%|█████████▋| 8601/8887 [23:46<00:23, 11.99it/s]

Progress saved at row 8600


Geocoding:  99%|█████████▉| 8803/8887 [24:01<00:08, 10.40it/s]

Progress saved at row 8800


Geocoding: 100%|██████████| 8887/8887 [24:08<00:00,  6.14it/s]


In [12]:
print(len(georgia_addresses[georgia_addresses['applicant_lat'].isna()]))

1706
