In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly as py
import seaborn as sns
import plotly.express as px
import json

sns.set_theme()

In [None]:
df = pd.read_csv("./Data Collection/combined_df.csv")
fips_df = pd.read_csv("./Data Collection/data/2012.csv")

In [None]:
def convert_3_digit_code(fips):
    """
    converts integer fips number to a 3 digit string
    with zeroes as left padding if needed
    
    inputs:
    fips - int - fips number to convert
    outputs:
    fips code as str with length 3
    """
    digits_wanted = 3
    result = str(fips)
    if fips >= 100:
        return result
    missing_zeroes = digits_wanted - len(result)
    result = "0"*missing_zeroes + result
    return result

In [None]:
# only interested in fips data for plotly
fips_df = fips_df[['County', 'FIPS']]
fips_df = fips_df[fips_df['County']!='STATE OF TEXAS']

# change column name formatting to convention (lowercase and underscores)
cols = [col.lower() for col in fips_df.columns]
cols = [col.replace(" ", "_") for col in cols]
fips_df.columns = cols

# change county info to lower case for easier index merging
fips_df['county'] = fips_df['county'].apply(lambda x: x.lower())
# change de witt county to dewitt county
fips_df['county'] = fips_df['county'].replace(to_replace = "de witt county",
                                             value = "dewitt county")

# convert fips to 3-digit string
fips_df['fips'] = fips_df['fips'].apply(convert_3_digit_code)

fips_df = fips_df.set_index('county')
fips_df.drop_duplicates(inplace = True)

In [None]:
df = df.set_index('county')
full = df.merge(right = fips_df, how = 'left', left_index = True, right_index = True)
full = full.reset_index()

In [None]:
def read_merge_prob(year):
    """
    Easy read and merge of model soft probabilities for choropleth mapping
    """
    path = f"./output/probs_{year}.csv"
    prob = pd.read_csv(path, index_col = 'county')
    prob = prob.merge(right = fips_df, how = 'left', left_index = True,
              right_index = True)
    return prob.reset_index()

In [None]:
prob_12 = read_merge_prob(12)
prob_16 = read_merge_prob(16)
prob_20 = read_merge_prob(20)

In [None]:
# downloaded geojson of texas counties from
# https://data.texas.gov/Government-and-Taxes/County-Map/48ag-x9aa

# used code from https://stackoverflow.com/questions/64492092/im-making-a-choropleth-map-with-plotly-express-how-do-i-match-up-the-values-in
# to figure out how to use a custom geojson file
    
path_to_file = 'tx_county_map.geojson'
with open(path_to_file) as f:
    txgeo = json.load(f)

In [None]:
def make_tx_choropleth(col_name, col_label, range_color = None, scale = "Viridis",
                       geojson = txgeo, df = full):
    """
    Creates and displays choropleth of Texas counties colored by given col_name column
    in full DataFrame.
    
    inputs:
    col_name - str - name of column in full to color by
    col_label - str - name to display column under
    range_color - tuple - (x, y) with x, y being numbers indicating min and max
        values for color scale
    """
    fig = px.choropleth(data_frame = df, 
                        geojson = geojson,
                        locations='fips', 
                        featureidkey="properties.fips_code",
                        color= col_name,
                        color_continuous_scale= scale,
                        range_color = range_color,
#                         color_continuous_midpoint = scale_mid,
                        hover_data=["county"],
                        scope = 'usa',
                        labels = {col_name : col_label})
    fig.update_geos(fitbounds="locations", visible=False)
    fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
    fig.show()
    return

In [None]:
fips_df.head()

In [None]:
#actual votes

In [None]:
[col for col in full.columns if "dem_vote" in col]

In [None]:
full['2012_dem_vote_perc']

In [None]:
# # 2012
# make_tx_choropleth('2012_dem_vote_perc', 'Perc of Voting D (2012)',
#                    scale = "Bluered_r", range_color = (0, 1))

In [None]:
# # 2016
# make_tx_choropleth('2016_dem_vote_perc', 'Perc of Voting D (2016)',
#                    scale = "Bluered_r", range_color = (0, 1))

In [None]:
# # 2020
# make_tx_choropleth('2020_dem_vote_perc', 'Perc of Voting D (2020)',
#                    scale = "Bluered_r", range_color = (0, 1))

In [None]:
prob_12.head()

In [None]:
# # 2012
# make_tx_choropleth('Probability County=Democrat', 'Prob of Voting D (2012)',
#                    scale = "Bluered_r", range_color = (0, 1),
#                    df = prob_12)

In [None]:
# # 2016
# make_tx_choropleth('Probability County=Democrat', 'Prob of Voting D (2016)',
#                    scale = "Bluered_r", range_color = (0, 1),
#                    df = prob_16)

In [None]:
# # 2020
# make_tx_choropleth('Probability County=Democrat', 'Prob of Voting D (2020)',
#                    scale = "Bluered_r", range_color = (0, 1),
#                    df = prob_20)

### Unemployment Rate
#### 2012

In [None]:
# make_tx_choropleth('2012_unemp', '2012 Unemployment Rate', range_color = (0, 0.18))

#### 2016

In [None]:
# make_tx_choropleth('2016_unemp', '2016 Unemployment Rate', range_color = (0, 0.18))

#### 2020

In [None]:
# make_tx_choropleth('2020_unemp', '2020 Unemployment Rate', range_color = (0, 0.18))

### Median Income
#### 2012 2016

In [None]:
[col for col in full.columns if "inc" in col]

In [None]:
full[[col for col in full.columns if "inc" in col]].describe().T

In [None]:
# make_tx_choropleth('median_income_2012_2016_16_dollars', 'Median Income 12-16 (Adjusted)',
#                    range_color = (20_000, 100_000))

#### 2020

In [None]:
# make_tx_choropleth('median_income_2020_16_dollars', 'Median Income 2020 (Adjusted)',
#                    range_color = (20_000, 100_000))

### Percentage of Population that is Hispanic

#### 2012

In [None]:
# make_tx_choropleth('hispanic_total_2012', '% Population Hispanic 2012',
#                    range_color = (0, 1))

#### 2016

In [None]:
# make_tx_choropleth('hispanic_total_2016', '% Population Hispanic 2016',
#                    range_color = (0, 1))

#### 2020

In [None]:
# make_tx_choropleth('hispanic_total_2020', '% Population Hispanic 2020',
#                    range_color = (0, 1))

### Percentage of Population of Black Women
#### 2012

In [None]:
# make_tx_choropleth('total_black_female_2012', '% Black Female 2012',
#                    range_color = (0, 0.2))

#### 2016

In [None]:
# make_tx_choropleth('total_black_female_2016', '% Black Female 2016', range_color = (0, 0.2))

#### 2020

In [None]:
# make_tx_choropleth('total_black_female_2020', '% Black Female 2020',
#                    range_color = (0, 0.2))

### Total Population
#### 2012

In [None]:
# make_tx_choropleth('total_2012', 'Total Population 2012', range_color = (0, 2_500_000))

#### 2020

In [None]:
# make_tx_choropleth('total_2020', 'Total Population 2020', range_color = (0, 2_500_000))

### Voter Turnout
#### 2012

In [None]:
# make_tx_choropleth('2012_voted_perc', 'Voter Turnout % 2012', range_color = (0.2, 1))

#### 2016

In [None]:
# make_tx_choropleth('2016_voted_perc', 'Voter Turnout % 2016', range_color = (0.2, 1))

#### 2020

In [None]:
# make_tx_choropleth('2020_voted_perc', 'Voter Turnout % 2020', range_color = (0.2, 1))

In [None]:
full[]

In [None]:
[col for col in full.columns if "voted_perc" in col]

In [None]:
full.columns[:50]

In [None]:
full.columns[50:100]

In [None]:
full.columns[100:150]

In [None]:
full.columns[150:200]

In [None]:
full.head()