In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly as py
import seaborn as sns
import plotly.express as px
import json

sns.set_theme()

In [3]:
df = pd.read_csv("./Data Collection/combined_df.csv")
fips_df = pd.read_csv("./Data Collection/data/2012.csv")

In [4]:
def convert_3_digit_code(fips):
    """
    converts integer fips number to a 3 digit string
    with zeroes as left padding if needed
    
    inputs:
    fips - int - fips number to convert
    outputs:
    fips code as str with length 3
    """
    digits_wanted = 3
    result = str(fips)
    if fips >= 100:
        return result
    missing_zeroes = digits_wanted - len(result)
    result = "0"*missing_zeroes + result
    return result

In [5]:
# only interested in fips data for plotly
fips_df = fips_df[['County', 'FIPS']]
fips_df = fips_df[fips_df['County']!='STATE OF TEXAS']

# change column name formatting to convention (lowercase and underscores)
cols = [col.lower() for col in fips_df.columns]
cols = [col.replace(" ", "_") for col in cols]
fips_df.columns = cols

# change county info to lower case for easier index merging
fips_df['county'] = fips_df['county'].apply(lambda x: x.lower())
# change de witt county to dewitt county
fips_df['county'] = fips_df['county'].replace(to_replace = "de witt county",
                                             value = "dewitt county")

# convert fips to 3-digit string
fips_df['fips'] = fips_df['fips'].apply(convert_3_digit_code)

fips_df = fips_df.set_index('county')
fips_df.drop_duplicates(inplace = True)

In [6]:
df = df.set_index('county')
full = df.merge(right = fips_df, how = 'left', left_index = True, right_index = True)
full = full.reset_index()

## Economic Factors

// https://www.fairvote.org/what_affects_voter_turnout_rates

Wealthier Americans are more likely to be voters, and we can examine this economic data in two ways: unemployment rate and median income.

### Unemployment Rate
#### 2012

#### 2016

#### 2020

2020 was an more economically challenging year in terms of unemployment compared to other years, likely due to the COVID-19 pandemic. Unemployment was higher in 2020 across all of Texas' counties than it was in previous years. One trend that endured from 2012 to 2020, however, was that Southern and Southwest Texan counties, most notably Presidio County, consistently observed higher unemployment rates than the rest of the state from across the three general election years.

### Median Income
#### 2012-2016

#### 2020

### Percentage of Population of Hispanics
We proceed to examine the Hispanic demographic, as it is widely believed that Texas's growing Hispanic population could potentially shift Texas's voting patterns.

#### 2012

#### 2016

#### 2020

Hispanics tend to compose larger proportions of county population especially in the southern Texan counties. It is also apparent by the increasingly yellow-colored counties over time that the slice of populations in each county that are Hispanic has grown from 2012 to 2016 to 2020. If the Hispanic population has an effect on voting patterns in our modeling, we might expect continued voting changes in the future alongside Texas's changing demographics.


### Percentage of Population of Black Women
We'd also like to examine the proportion of Black women in Texas, as this demographic is known to lean Democratic.
#### 2012

#### 2016

#### 2020

Although the population segment of Black women has risen nationally during this time period, this trend is not reflected in Texas as seen by the relatively static colors year to year. However, it is worth noting that counties with high population percentages of Black women are located primarily in East Texas, where the state borders Louisiana. The highest among these is Jefferson County-- 16.54% of its population in 2020 is composed of Black women. 

### Total Population
#### 2012

#### 2020

While the population in Texas's counties increases over time, overall populations remain concentrated in several major urban areas:
- In the Northeast, around Fort Worth and Dallas,
- In the center of the state, around Austin and San Antonio,
- To the East, near Houston, and
- To the South, around Brownsville.

### Voter Turnout
#### 2012

In [19]:
full[['2012_voted_perc', '2016_voted_perc', '2020_voted_perc']].describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
2012_voted_perc,254.0,0.576011,0.076378,0.3039,0.52975,0.58395,0.62335,0.8422
2016_voted_perc,254.0,0.587839,0.078787,0.2939,0.5403,0.5934,0.637775,0.8949
2020_voted_perc,254.0,0.651735,0.078754,0.3459,0.601725,0.65875,0.702525,0.8689


Voter turnout in Texas generally increased from general election year to general election year. In 2012, the average turnout was 57.6% and slightly increased in 2016 with 58.8%. In 2020, there was a large increase of about 6% turnout, with 65.2% turnout. Variability in turnout from county to county largely stayed the same across the three general election years, with a standard deviation of 8%. Generally, the western-most and southern-most counties observed less turnout in comparison with other counties.

Interestingly, some counties would see abnormally low turnout without an observable pattern over time. Turnout in 2020 was generally higher across the board, though there was one notable exception: Crane County, with Texas's minimum of 34.6% turnout that year.

In [8]:
[col for col in full.columns if "voted_perc" in col]

['2012_voted_perc', '2016_voted_perc', '2020_voted_perc']

In [25]:
full.columns[:50]

Index(['county', '2012_unemp', '2016_unemp', '2020_unemp', '2012_var_state',
       '2016_var_state', '2020_var_state',
       'median_income_2012_2016_16_dollars', 'median_income_2020_16_dollars',
       '2012_reg_voters', '2012_voted_num', '2012_voted_perc',
       '2012_early_vote_num', '2012_early_vote_perc', '2016_reg_voters',
       '2016_voted_num', '2016_voted_perc', '2016_early_vote_num',
       '2016_early_vote_perc', '2020_reg_voters', '2020_voted_num',
       '2020_voted_perc', '2020_early_vote_num', '2020_early_vote_perc',
       '2020_rep_vote_count', '2020_rep_vote_perc', '2020_dem_vote_count',
       '2020_dem_vote_perc', '2020_lib_vote_count', '2020_lib_vote_perc',
       '2020_grn_vote_count', '2020_grn_vote_perc', '2020_oth_vote_count',
       '2020_oth_vote_perc', '2020_votes_total', '2016_rep_vote_counte',
       '2016_rep_vote_perc', '2016_dem_vote_count', '2016_dem_vote_perc',
       '2016_lib_vote_count', '2016_lib_vote_perc', '2016_grn_vote_count',
       '2016

In [26]:
full.columns[50:100]

Index(['2012_oth_vote_perc', '2012_oth_vote_count', '2012_vote_total',
       '2012_CWPP', '2016_CWPP', '2020_CWPP', 'total_2012_18 to 27',
       'total_2012_28 to 37', 'total_2012_38 to 47', 'total_2012_48 to 57',
       'total_2012_58 to 67', 'total_2012_68 to 77', 'total_2012_78 to 87',
       'total_2012_88 to 97', 'total_2016_18 to 27', 'total_2016_28 to 37',
       'total_2016_38 to 47', 'total_2016_48 to 57', 'total_2016_58 to 67',
       'total_2016_68 to 77', 'total_2016_78 to 87', 'total_2016_88 to 97',
       'total_2020_18 to 27', 'total_2020_28 to 37', 'total_2020_38 to 47',
       'total_2020_48 to 57', 'total_2020_58 to 67', 'total_2020_68 to 77',
       'total_2020_78 to 87', 'total_2020_88 to 97',
       'total_male_2012_18 to 27', 'total_male_2012_28 to 37',
       'total_male_2012_38 to 47', 'total_male_2012_48 to 57',
       'total_male_2012_58 to 67', 'total_male_2012_68 to 77',
       'total_male_2012_78 to 87', 'total_male_2012_88 to 97',
       'total_male_2016

In [None]:
full.columns[100:150]

In [38]:
full.columns[150:200]

Index(['anglo_total_2020_78 to 87', 'anglo_total_2020_88 to 97',
       'anglo_male_2012_18 to 27', 'anglo_male_2012_28 to 37',
       'anglo_male_2012_38 to 47', 'anglo_male_2012_48 to 57',
       'anglo_male_2012_58 to 67', 'anglo_male_2012_68 to 77',
       'anglo_male_2012_78 to 87', 'anglo_male_2012_88 to 97',
       'anglo_male_2016_18 to 27', 'anglo_male_2016_28 to 37',
       'anglo_male_2016_38 to 47', 'anglo_male_2016_48 to 57',
       'anglo_male_2016_58 to 67', 'anglo_male_2016_68 to 77',
       'anglo_male_2016_78 to 87', 'anglo_male_2016_88 to 97',
       'anglo_male_2020_18 to 27', 'anglo_male_2020_28 to 37',
       'anglo_male_2020_38 to 47', 'anglo_male_2020_48 to 57',
       'anglo_male_2020_58 to 67', 'anglo_male_2020_68 to 77',
       'anglo_male_2020_78 to 87', 'anglo_male_2020_88 to 97',
       'anglo_female_2012_18 to 27', 'anglo_female_2012_28 to 37',
       'anglo_female_2012_38 to 47', 'anglo_female_2012_48 to 57',
       'anglo_female_2012_58 to 67', 'anglo_f

In [13]:
full.head()

Unnamed: 0,county,2012_unemp,2016_unemp,2020_unemp,2012_var_state,2016_var_state,2020_var_state,median_income_2012_2016_16_dollars,median_income_2020_16_dollars,2012_reg_voters,...,other_total_2012,other_total_2016,other_total_2020,hispanic_total_2012,hispanic_total_2016,hispanic_total_2020,total_2012,total_2016,total_2020,fips
0,anderson county,0.036,0.043,0.056,-0.029,-0.005,-0.012,42146,44871,26494,...,0.0205,0.0281,0.0245,0.1642,0.1764,0.1893,58964,58305,58199,1
1,andrews county,0.031,0.047,0.082,-0.034,-0.001,0.014,70121,69369,8271,...,0.0201,0.0198,0.0169,0.4979,0.5315,0.6273,16039,17829,22269,3
2,angelina county,0.051,0.059,0.076,-0.014,0.011,0.008,44185,47917,49317,...,0.0227,0.0281,0.027,0.2059,0.2265,0.2317,88850,90652,90437,5
3,aransas county,0.068,0.056,0.085,0.003,0.008,0.017,44851,49153,15883,...,0.038,0.0393,0.0352,0.2546,0.2748,0.3008,23825,25234,27699,7
4,archer county,0.034,0.043,0.053,-0.031,-0.005,-0.015,62407,59010,6322,...,0.0178,0.0183,0.0235,0.0778,0.0907,0.1095,8996,9123,8344,9
