In [2]:
import pandas as pd, numpy as np, re

# ── 1.  build the vote panel you already typed  ──────────────────────────
vote_rows = [
    # city, year, dem, rep, tp
    ('Dearborn',         2020, 30719, 13239,  605),
    ('Dearborn Heights', 2020, 16623,  9749,  346),
    ('Hamtramck',        2020,  6628,  1042,   75),
    ('Dane County',      2020, 260185, 78800, 5801),
    ('Dearborn',         2024, 15208, 17802, 8359),
    ('Dearborn Heights', 2024,  9652, 11083, 4159),
    ('Hamtramck',        2024,  3271,  3017,  741),
    ('Dane County',      2024, 273995, 85454, 6477),
]
votes = pd.DataFrame(
    vote_rows,
    columns=['City','Year','Dem','Rep','Third_Prty']
)
votes['TotalTurnout'] = votes[['Dem','Rep','Third_Prty']].sum(1)
votes.head(10)

Unnamed: 0,City,Year,Dem,Rep,Third_Prty,TotalTurnout
0,Dearborn,2020,30719,13239,605,44563
1,Dearborn Heights,2020,16623,9749,346,26718
2,Hamtramck,2020,6628,1042,75,7745
3,Dane County,2020,260185,78800,5801,344786
4,Dearborn,2024,15208,17802,8359,41369
5,Dearborn Heights,2024,9652,11083,4159,24894
6,Hamtramck,2024,3271,3017,741,7029
7,Dane County,2024,273995,85454,6477,365926


In [3]:
#Pull estimates from census CSVs

def grab(csv, row_label, col=1):
    df = pd.read_csv(csv, header=None)
    # first column often has hidden NBSP; strip & lower‐case
    df[0] = df[0].astype(str).str.replace(r'\s+', ' ', regex=True).str.strip().str.lower()
    mask = df[0].str.contains(row_label.lower())
    val  = df.loc[mask, col].iloc[0]
    # remove commas/percent signs
    val  = float(re.sub(r'[^\d\.]', '', str(val))) if val!='' else np.nan
    return val


In [4]:

# ── 3.  build dictionaries keyed by city  ───────────────────────────────
pop = {
    'Dearborn'        : grab('Population/dearborn pop.csv', 'Dearborn City', col=3),
    'Dearborn Heights': grab('Population/dearborn pop.csv', 'total population', col=3),
    'Hamtramck'       : grab('Population/dearborn pop.csv', 'hamtramck', col=5),
    'Dane County'     : grab('Dane County community stats.csv', 'total population')
}

arab = {
    'Dearborn'        : grab('BOTH Dane county and Dearborn area Arab Pop.csv','dearborn city'),
    'Dearborn Heights': grab('BOTH Dane county and Dearborn area Arab Pop.csv','dearborn heights'),
    'Hamtramck'       : grab('BOTH Dane county and Dearborn area Arab Pop.csv','hamtramck'),
    'Dane County'     : grab('BOTH Dane county and Dearborn area Arab Pop.csv','dane county')
}
# convert → share
arab_share = {k: arab[k]/pop[k] for k in arab}


IndexError: single positional indexer is out-of-bounds

In [None]:

# ── 4.  age jagged csv → VAP + pct 18-29  ───────────────────────────────
def age_stats(csv, city_name):
    df = pd.read_csv(csv, header=None)
    df[0] = df[0].astype(str).str.replace(r'\s+', ' ', regex=True).str.strip().str.lower()
    total = grab(csv, 'total population')       # reuse helper
    # voting-age pop = 18+  (sum of all rows that start with '18 to' OR have first number ≥18)
    age_rows = df[df[0].str.match(r'(\d+ to|\d{2} years|65 to)')]
    age_rows['num'] = age_rows[1].replace({',':''}, regex=True).astype(float)
    # extract start age
    age_rows['start'] = age_rows[0].str.extract(r'(\d+)').astype(float)
    vap = age_rows.loc[age_rows['start']>=18,'num'].sum()
    # 18-29 share
    pct1829 = age_rows.loc[age_rows['start'].between(18,25),'num'].sum() / total
    return total, vap, pct1829

age_dict = {}
for city,csv in [('Dearborn','Voting Aged/Age stats Dearborn areas.csv'),
                 ('Dearborn Heights','Age stats Dearborn areas.csv'),
                 ('Hamtramck','Age stats Dearborn areas.csv'),
                 ('Dane County','Age Stats Dane County.csv')]:
    total, vap, pct18_29 = age_stats(csv, city)
    age_dict[city] = {'Population': total,
                      'VAP'       : vap,
                      'Pct_18_29' : pct18_29}


FileNotFoundError: [Errno 2] No such file or directory: 'Voting Aged/Age stats Dearborn areas.csv'

In [None]:

# ── 5.  education (some-college-plus, 18+)  &  labour-force ─────────────
def edu_lf(csv, city, edu_row, lf_row):
    pct_somecollege = grab(csv, edu_row)/100          # already pct
    pct_notLF       = grab(csv, lf_row) / 100
    return pct_somecollege, pct_notLF

edu_lf_dict = {
    'Dearborn'        : edu_lf('Deaerborn community stats.csv',
                               'dearborn','some college','not in labor force'),
    'Dearborn Heights': edu_lf('Deaerborn community stats.csv',
                               'dearborn heights','some college','not in labor force'),
    'Hamtramck'       : edu_lf('Deaerborn community stats.csv',
                               'hamtramck','some college','not in labor force'),
    'Dane County'     : edu_lf('Dane County community stats.csv',
                               'some college','not in labor force')
}


In [None]:

# ── 6.  citizenship → foreign-born share  ───────────────────────────────
foreign = {
    'Dearborn'        : grab('Dearborn citizen status.csv', 'foreign born') /
                        grab('Dearborn citizen status.csv','total'),
    'Dearborn Heights': grab('Dearborn citizen status.csv', 'heights foreign born') /
                        grab('Dearborn citizen status.csv','heights total'),
    'Hamtramck'       : grab('Dearborn citizen status.csv', 'hamtramck foreign born') /
                        grab('Dearborn citizen status.csv','hamtramck total'),
    'Dane County'     : grab('Dane County citizen.csv', 'foreign born') /
                        grab('Dane County citizen.csv','total')
}

#

In [None]:
#── 7.  merge everything into votes  ─────────────────────────────────────
def add_col(name, source_dict, scale=1.0):
    votes[name] = votes['City'].map(source_dict) * scale

votes = votes.join(
    votes['City'].map(age_dict).apply(pd.Series)
)
add_col('PctArab',        arab_share)
add_col('PctSomeCollege', {k:v[0] for k,v in edu_lf_dict.items()})
add_col('PctNotLF',       {k:v[1] for k,v in edu_lf_dict.items()})
add_col('PctForeignBorn', foreign)

votes['logVAP'] = np.log(votes['VAP'])

# neat col order
cols = ['Year','City','Population','VAP','logVAP','TotalTurnout',
        'Dem','Rep','TP',
        'PctArab','PctSomeCollege','PctForeignBorn',
        'PctNotLF','Pct_18_29']
votes = votes[cols]
