In [1]:
%load_ext autoreload
%autoreload 2

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [2]:
import matplotlib
plt.style.use('ggplot')
matplotlib.rcParams['figure.figsize'] = 12, 6
matplotlib.rcParams['font.size'] = 18  # Probably OS Dependent

## Load Merged Data

In [3]:
df_noemail = pd.read_json('results/records.noemail.json')
df_noemail.sample(5, random_state=42)

Unnamed: 0,key,city_type,city,county,clerk,deputy_clerk,municipal_address,mailing_address,phones,fax,url
233,TOWN OF LYNN - CLARK COUNTY,TOWN,LYNN,CLARK COUNTY,JOYE EICHTEN,,"W1877 US HIGHWAY 10, GRANTON, WI 54436-8847","PO BOX 104, GRANTON, WI 54436-0104",[(715)937-1626],,
450,TOWN OF SUPERIOR - DOUGLAS COUNTY,TOWN,SUPERIOR,DOUGLAS COUNTY,CAROLYN JONES,,"4917 S STATE ROAD 35, SUPERIOR, WI 54880-8301",,[(715)399-8385],(715)399-0181,http://www.townofsuperior.net
1240,TOWN OF SHARON - PORTAGE COUNTY,TOWN,SHARON,PORTAGE COUNTY,LORI PRINTZ,ALICE WIERZBA,"6704 STATE HIGHWAY 66, CUSTER, WI 54423-9641",,[(715)592-6600],(715)592-6666,
1693,TOWN OF WEST BEND - WASHINGTON COUNTY,TOWN,WEST BEND,WASHINGTON COUNTY,JULIE IHLENFELD,,"6355 COUNTY ROAD Z, WEST BEND, WI 53095-9201",,[(262)338-3417],(262)338-0427,http://www.townofwestbend.com
411,CITY OF BEAVER DAM - DODGE COUNTY,CITY,BEAVER DAM,DODGE COUNTY,ZAK BLOOM,TRACEY FERRON,"205 S LINCOLN AVE, BEAVER DAM, WI 53916-2323",,"[(920)887-4600, (920)356-2546]",(920)887-4662,https://www.cityofbeaverdam.com/


## Load Fetched Data

In [4]:
from aggregate import read_municipal_address, read_mailing_address


df_mail = pd.DataFrame(read_mailing_address()).dropna(axis=0, how='all')
df_muni = pd.DataFrame(read_municipal_address()).dropna(axis=0, how='all')

df_mail['key'] = df_mail['jurisdictionName'].str.upper()
df_muni['key'] = df_muni['jurisdictionName'].str.upper()

df_muni.sample(5, random_state=42)

Unnamed: 0,$id,jurisdictionID,jurisdictionName,clerkName,muncipalAddress,mailingAddress,phone1,phone2,fax,email,notificationEmail,key
1177,12,7864f172-ddaf-e511-80ea-0050568c2fc0,Town Of Springwater - Waushara County,Victoria Sage,"W7033 County Road Gh, Wild Rose, Wi 54984-6615","W7048 High Rd, Wild Rose, Wi 54984-9016",(920)622-3808,,(920)622-4718,clerk.springwater18@gmail.com,clerk.springwater18@gmail.com,TOWN OF SPRINGWATER - WAUSHARA COUNTY
1115,12,f4cd0067-ddaf-e511-80ea-0050568c2fc0,Town Of Bevent - Marathon County,Anne Mieska,"205825 County Road Y, Hatley, Wi 54440","207211 Moraine Rd, Hatley, Wi 54440",(715)454-7171,,(715)454-6303,amieska59@gmail.com,amieska59@gmail.com,TOWN OF BEVENT - MARATHON COUNTY
1662,12,b335105b-ddaf-e511-80ea-0050568c2fc0,Village Of Howard - Multiple Counties,Christopher A Haltom,"2456 Glendale Ave, Green Bay, Wi 54313-6502","Po Box 12207, Green Bay, Wi 54313-2207",(920)434-4640,,(920)434-4643,CHALTOM@VILLAGEOFHOWARD.COM,CHALTOM@VILLAGEOFHOWARD.COM; aduprey@villageof...,VILLAGE OF HOWARD - MULTIPLE COUNTIES
1223,12,aace0067-ddaf-e511-80ea-0050568c2fc0,Town Of Hazelhurst - Oneida County,Betty Cushing,"7020 Hwy 51, Hazelhurst, Wi 54531-9525","Po Box 67, Hazelhurst, Wi 54531-0067",(715)356-5800,,(715)358-9809,HAZELWI@FRONTIER.COM,HAZELWI@FRONTIER.COM,TOWN OF HAZELHURST - ONEIDA COUNTY
1191,12,84ce0067-ddaf-e511-80ea-0050568c2fc0,Village Of Norwalk - Monroe County,Caroline Vian,"208 S Church St, Norwalk, Wi 54648-8255","Po Box 230, Norwalk, Wi 54648-0230",(608)823-7760,,(608)823-7293,norwalkclerk@centurytel.net,norwalkclerk@centurytel.net,VILLAGE OF NORWALK - MONROE COUNTY


## Stats on which munis for which we have data

In [5]:
df_mail.shape

(635, 12)

In [6]:
df_muni.shape

(1493, 12)

In [7]:
all_jurisdictions = set(df_mail['jurisdictionName']) | set(df_muni['jurisdictionName'])
fetched_lens = len(all_jurisdictions)
print(fetched_lens)

1600


## Data agrees when there is overlap

In [8]:
df_merged = df_muni.merge(df_mail, on='key', how='inner')
assert((df_merged['clerkName_x'] == df_merged['clerkName_y']).all())
assert((df_merged['email_x'] == df_merged['email_y']).all())
assert((df_merged['muncipalAddress_x'] == df_merged['muncipalAddress_y']).all())

## Merge: fetched data gets priority and fall back to PDF if nothing

In [9]:
df_fetched = pd.concat([
    df_muni.set_index('key'),
    df_mail.set_index('key'),
]).drop_duplicates()
assert(df_fetched.shape[0] == fetched_lens)
df_fetched.sample(n=5, random_state=42)

Unnamed: 0_level_0,$id,jurisdictionID,jurisdictionName,clerkName,muncipalAddress,mailingAddress,phone1,phone2,fax,email,notificationEmail
key,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
TOWN OF HAMMEL - TAYLOR COUNTY,12,c919f96c-ddaf-e511-80ea-0050568c2fc0,Town Of Hammel - Taylor County,Renee Zenner,"W8792 Center Ave, Medford, Wi 54451-8528","N2497 County Rd E, Medford, Wi 54451-9445",(715)748-9608,,,zennerfarms@tds.net,zennerfarms@tds.net
VILLAGE OF SOMERSET - ST. CROIX COUNTY,12,3e19f96c-ddaf-e511-80ea-0050568c2fc0,Village Of Somerset - St. Croix County,Felicia Germain,"110 Spring St, Somerset, Wi 54025-9002","Po Box 356, Somerset, Wi 54025-0356",(715)247-3395,,(715)247-2790,fgermain@vil.somerset.wi.us,fgermain@vil.somerset.wi.us
TOWN OF FRANKFORT - PEPIN COUNTY,12,4d18f96c-ddaf-e511-80ea-0050568c2fc0,Town Of Frankfort - Pepin County,Maureen Manore,"N3290 Byington Rd, Pepin, Wi 54759-4718","N3290 Byington Rd, Pepin, Wi 54759-4718",(715)442-2685,,,TOWNOFFRANKFORT@YAHOO.COM,TOWNOFFRANKFORT@YAHOO.COM
TOWN OF HUBBARD - DODGE COUNTY,12,b2810861-ddaf-e511-80ea-0050568c2fc0,Town Of Hubbard - Dodge County,Carrie Neu,"W2864 Neda Rd, Iron Ridge, Wi 53035","W2864 Neda Rd, Iron Ridge, Wi 53035",(920)349-3223,,(920)349-3223,townofhubbard@gmail.com,kgibson@co.dodge.wi.us; townofhubbard@gmail.com
VILLAGE OF ROSHOLT - PORTAGE COUNTY,12,aa18f96c-ddaf-e511-80ea-0050568c2fc0,Village Of Rosholt - Portage County,Theresa Hartvig,"101 S Main St, Rosholt, Wi 54473-9772","Po Box 245, Rosholt, Wi 54473-0245",(715)677-4510,(715)677-4246,,rosholtvillage@wi-net.com,rosholtvillage@wi-net.com


In [10]:
df_noemail2 = df_noemail.set_index('key')
fix_cols = ['city_type', 'city', 'county', 'clerk', 'deputy_clerk', 'municipal_address', 'mailing_address']
for col in fix_cols:
    df_noemail2[col] = df_noemail2[col].str.title()

df_master = df_noemail2.merge(df_fetched, on='key', how='left').reset_index()
df_master['title_key'] = df_master['key'].str.title()

def merge_all_cols(df, pairs):
    return pd.DataFrame({
        first: df[first].where(df[first].notnull(), df[second])
        for first, second in pairs
    })

df_final = pd.concat([
    merge_all_cols(df_master, [
        ['muncipalAddress', 'municipal_address'],
        ['mailingAddress', 'mailing_address'],
        ['clerkName', 'clerk'],
        ['fax_y', 'fax_x'],
        ['jurisdictionName', 'title_key']
    ]),
    df_master[['email', 'notificationEmail', 'county']],
], axis=1).rename({
    'fax_y': 'fax',
    'city_type': 'cityType'
}, axis=1)

df_final['city'] = df_master['city_type'] + ' of ' + df_master['city']

df_final.sample(5, random_state=42)

Unnamed: 0,muncipalAddress,mailingAddress,clerkName,fax,jurisdictionName,email,notificationEmail,county,city
233,"W1877 Us Highway 10, Granton, Wi 54436-8847","Po Box 104, Granton, Wi 54436-0104",Joye Eichten,,Town Of Lynn - Clark County,lynn.townclerk@gmail.com,lynn.townclerk@gmail.com,Clark County,Town of Lynn
450,"4917 S State Road 35, Superior, Wi 54880-8301","4917 S State Road 35, Superior, Wi 54880-8301",Carolyn Jones,(715)399-0181,Town Of Superior - Douglas County,townofsuperior@centurytel.net,townofsuperior@centurytel.net,Douglas County,Town of Superior
1240,"6704 State Highway 66, Custer, Wi 54423-9641","6704 State Highway 66, Custer, Wi 54423-9641",Lori Printz,(715)592-6666,Town Of Sharon - Portage County,townofsharon@outlook.com,townofsharon@outlook.com,Portage County,Town of Sharon
1693,"6355 County Road Z, West Bend, Wi 53095-9201","6355 County Road Z, West Bend, Wi 53095-9201",Julie Ihlenfeld,(262)338-0427,Town Of West Bend - Washington County,clerk@townofwestbend.com,clerk@townofwestbend.com,Washington County,Town of West Bend
411,"205 S Lincoln Ave, Beaver Dam, Wi 53916-2323",,Zak Bloom,(920)887-4662,City Of Beaver Dam - Dodge County,,,Dodge County,City of Beaver Dam


In [11]:
frac1 = df_final[['email', 'notificationEmail']].notnull().any(axis=1).mean()
print(f"Have email for {frac1:%} of municipalities")

frac2 = df_final[['fax', 'email', 'notificationEmail']].notnull().any(axis=1).mean()
print(f"Have fax or email for {frac2:%} of municipalities")

Have email for 86.184566% of municipalities
Have fax or email for 95.089045% of municipalities


In [12]:
df_output = df_final[
    ['muncipalAddress', 'mailingAddress', 'clerkName', 'jurisdictionName', 'county', 'city']
].rename({
    'muncipalAddress': 'physicalAddress',
    'mailingAddress': 'address',
    'clerkName': 'official',
    'jurisdictionName': 'locale',
}, axis=1)

def to_list(df):
    return df.apply(
        lambda x: list(set(y for y in x if pd.notnull(y) if y != '')),
        axis=1
    )

df_output['county'] = df_output['county'].replace('Multiple Counties', np.nan).str.strip()
df_output['locale'] = df_output['locale'].str.replace(' - ', ':').str.replace(' Of ', ' of ')
df_output['locale'] = df_output['locale'].str.replace('Multiple Counties', '').str.strip()
df_output['city'] = df_output['city'].str.strip()

df_output['faxes'] = to_list(df_final[['fax']])
df_output['emails'] = to_list(df_final[['email', 'notificationEmail']])
df_output.to_json('../public/wisconsin.json', orient='records')
df_output.sample(5, random_state=42)


Unnamed: 0,physicalAddress,address,official,locale,county,city,faxes,emails
233,"W1877 Us Highway 10, Granton, Wi 54436-8847","Po Box 104, Granton, Wi 54436-0104",Joye Eichten,Town of Lynn:Clark County,Clark County,Town of Lynn,[],[lynn.townclerk@gmail.com]
450,"4917 S State Road 35, Superior, Wi 54880-8301","4917 S State Road 35, Superior, Wi 54880-8301",Carolyn Jones,Town of Superior:Douglas County,Douglas County,Town of Superior,[(715)399-0181],[townofsuperior@centurytel.net]
1240,"6704 State Highway 66, Custer, Wi 54423-9641","6704 State Highway 66, Custer, Wi 54423-9641",Lori Printz,Town of Sharon:Portage County,Portage County,Town of Sharon,[(715)592-6666],[townofsharon@outlook.com]
1693,"6355 County Road Z, West Bend, Wi 53095-9201","6355 County Road Z, West Bend, Wi 53095-9201",Julie Ihlenfeld,Town of West Bend:Washington County,Washington County,Town of West Bend,[(262)338-0427],[clerk@townofwestbend.com]
411,"205 S Lincoln Ave, Beaver Dam, Wi 53916-2323",,Zak Bloom,City of Beaver Dam:Dodge County,Dodge County,City of Beaver Dam,[(920)887-4662],[]
