In [None]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import matplotlib.cm as cm
import plotly.express as px
import plotly.graph_objs as go

#This is a comment - python does not try to execute it

#This tells python to draw the graphs "inline" - in the notebook
%matplotlib inline  
import matplotlib.pyplot as plt
plt.style.use('ggplot')


The following two functions were based on the example on https://plotly.com/python/box-plots/. The code was modified based on this site: https://stackoverflow.com/questions/54368158/add-multiple-text-labels-from-dataframe-columns-in-plotly in order to add county/precinct names as a label.

In [None]:
# function for distribution

def distribution(x, t, name_of_the_html_file, label):

  N = 50

  y_data = x

  colors = ['rgba(93, 164, 214, 0.5)', 'rgba(255, 144, 14, 0.5)', 'rgba(44, 160, 101, 0.5)',
            'rgba(255, 65, 54, 0.5)', 'rgba(207, 114, 255, 0.5)', 'rgba(127, 96, 0, 0.5)']

  x_data = ['Hispanic','White','Black','Asian','Mixed','Others']

  fig = go.Figure()

  for xd, yd, cls in zip(x_data, y_data, colors):
          fig.add_trace(go.Box(
              y=yd,
              name=xd,
              boxpoints='all',
              jitter=0.5,
              whiskerwidth=0.2,
              fillcolor=cls,
              marker_size=2,
              line_width=2,
              text = label)
          )

  fig.update_layout(
      title=t,
      yaxis=dict(
          autorange=True,
          showgrid=True,
          zeroline=True,
          dtick=0.1,
          gridcolor='rgb(255, 255, 255)',
          gridwidth=0.1,
          zerolinecolor='rgb(255, 255, 255)',
          zerolinewidth=1,
      ),
      margin=dict(
          l=40,
          r=30,
          b=80,
          t=100,
      ),
      paper_bgcolor='rgb(243, 243, 243)',
      plot_bgcolor='rgb(243, 243, 243)',
      showlegend=False
  )

  fig.write_html(name_of_the_html_file)
  fig.show()


In [None]:
# opening files and selecting the right columns:
race =  pd.read_csv('data/race_county_data/cleaned_georgia_race_county.csv', index_col = 0)

racedenscounty = race[['Area Name', 'Population Density:Hispanic','Population Density: White','Population Density: Black','Population Density: Asian','Population Density: Mixed','Population Density: Others']]

race.head()

In [None]:
# racial density distribution county level
# Georgia State 2020 county level population density distribution by race
# list1 is a list for county level pop density
list1 = [racedenscounty['Population Density:Hispanic'],racedenscounty['Population Density: White'],racedenscounty['Population Density: Black'],racedenscounty['Population Density: Asian'], racedenscounty['Population Density: Mixed'], racedenscounty['Population Density: Others']]
name1 = 'Georgia State 2020 county level population density distribution by race'
distribution(list1, 'Georgia State 2020 county level population density distribution by race', 'html_files/county_race_density.html',racedenscounty['Area Name'] )


In [None]:
from enum import auto
# for non density distribution
def raw_distribution (x, t, name_of_the_html_file, label):
  N= 50

  y_data = x

  colors = ['rgba(93, 164, 214, 0.5)', 'rgba(255, 144, 14, 0.5)', 'rgba(44, 160, 101, 0.5)',
            'rgba(255, 65, 54, 0.5)', 'rgba(207, 114, 255, 0.5)', 'rgba(127, 96, 0, 0.5)']

  x_data = ['Hispanic','White','Black','Asian','Mixed','Others']

  fig = go.Figure()

  for xd, yd, cls in zip(x_data, y_data, colors):
          fig.add_trace(go.Box(
              y=yd,
              name=xd,
              boxpoints='all',
              jitter=0.5,
              whiskerwidth=0.2,
              fillcolor=cls,
              marker_size=2,
              line_width=1)
          )

  fig.update_layout(
      title=t,
      yaxis=dict(
          autorange=True,
          showgrid=True,
          zeroline=True,
          gridcolor='rgb(255, 255, 255)',
          zerolinecolor='rgb(255, 255, 255)',
          zerolinewidth=2,
      ),
      margin=dict(
          l=40,
          r=30,
          b=80,
          t=100,
      ),
      paper_bgcolor='rgb(243, 243, 243)',
      plot_bgcolor='rgb(243, 243, 243)',
      showlegend=False
  )

  fig.write_html(name_of_the_html_file)
  fig.show()

In [None]:
# Georgia state 2020 county level population distribution by race
# sample_data/html files/raw_county.html
# list2 is a list for raw data of county level population by race
list2 = [race['Hispanic'], race['White'], race['Black'], race['Asian'], race['Mixed'], race['Others']]
raw_distribution(list2, 'Georgia state 2020 county level population distribution by race', 'html_files/county_raw_distribution.html')

In [None]:
# precinct level data
precincts = pd.read_csv('data/polling_site_data/clean_precincts_with_polling_site.csv')

In [None]:
# raw precincts
# Georgia state 2020 precinct level population distribution by race
# sample_data/html files/raw_precinct.html
# list3 is a list for raw precinct level population data by race
list3 = [precincts['Hispanic'], precincts['White'], precincts['Black'], precincts['Asian'], precincts['Mixed'], precincts['Others']]
raw_distribution(list3, 'Georgia state 2020 precinct level population distribution by race', 'html_files/raw_precincts.html')

In [None]:
# general precinct racial density dist
# Georgia State 2020 precinct level population density distribution by race
# sample_data/html files/precinct_general_dist.html
# list4 is a list for precinct level population density by race
list4 = [precincts['Population Density: Hispanic'],precincts['Population Density: White'],precincts['Population Density: Black'],precincts['Population Density: Asian'], precincts['Population Density: Mixed'], precincts['Population Density: Others']]
distribution(list4, 'Georgia State 2020 precinct level population density distribution by race', 'html_files/precinct_general_dist.html')

In [None]:
# precincts with poling sites in 2020
withpolling = precincts[precincts['Polling Site 2020']==1]

In [None]:
# precincts with polling sites racial density distribution
# Georgia State 2020 population density distribution by race in precincts with polling sites
# list5 is a list population density by race in precincts with polling sites
list5 = [withpolling['Population Density: Hispanic'], withpolling['Population Density: White'], withpolling['Population Density: Black'], withpolling['Population Density: Asian'], withpolling['Population Density: Mixed'], withpolling['Population Density: Others']]
distribution(list5, 'Georgia State 2020 population density distribution by race in precincts with polling sites', 'html_files/precincts_density_with_polling.html')

In [None]:
# without polling sites distribution precinct level
withoutpolling = precincts[precincts['Polling Site 2020']==0]

In [None]:
# precincts without polling sites population density distribution
# Georgia State 2020 population density distribution by race in precincts without polling sites
# list6 is a list of population densities by race in precincts without polling sites
list6 = [withoutpolling['Population Density: Hispanic'], withoutpolling['Population Density: White'], withoutpolling['Population Density: Black'], withoutpolling['Population Density: Asian'], withoutpolling['Population Density: Mixed'], withoutpolling['Population Density: Others']]
distribution(list6, 'Georgia State 2020 population density distribution by race in precincts without polling sites', 'html_files/precincts_density_without_polling.html')

The following code was based on an example from https://plotly.com/python/table/#styled-table

In [None]:
import plotly.graph_objects as go

# list of all lists for averages by race
The_list = [list2, list1, list3, list4, list5, list6]

averages = []
for l in The_list:
  av_list = []
  for i in l:
    av = sum(i)/len(i)
    av = round(av, 3)
    av_list.append(av)
  averages.append(av_list)
averages.insert(0, ['Hispanic','White','Black','Asian','Mixed','Other'])
# table
fig = go.Figure(data=[go.Table(
    header=dict(values=['Race', 'Average county population ', 'Average county level density', 'Average precinct population', 'Average precinct population density', 'Average population density in precincts with polling sites', 'Average population density in precincts without polling sites'],
                line_color='darkslategray',
                fill_color='lightskyblue',
                align='left'),
    cells=dict(values=averages,
               line_color='darkslategray',
               fill_color='lightgrey',
               align='left'))
])

fig.update_layout(width=1000, height=600)
fig.write_html('html_files/averages_table.html')
fig.show()