In [228]:
import pandas as pd
import seaborn as sns
import openpyxl
import math
import re
import numpy as np

In [266]:
zipdata = pd.read_csv("ExternalDataByZipcode.csv")

# convert data into usuable types
zipdata['Median Household Income'] = zipdata['Median Household Income'].str.replace('\W', '', regex=True).astype(float)
zipdata['Population'] = zipdata['Population'].str.replace('\W', '', regex=True).astype(float)
zipdata['White Percentage'] = (zipdata['White Percentage'].str.replace('\W', '', regex=True).astype(float)) / 100

# print(zipdata.var())
# print(zipdata.dtypes)
zipdata.head()

focusdf = zipdata[['Zip Codesort column', 'Median Household Income', 'Median Age', 'Population', "White Percentage"]]
# normed_zipdata = (focusdf-focusdf.mean())/focusdf.std()
# normed_zipdata
normed_zipdata = (focusdf-focusdf.min())/(focusdf.max()-focusdf.min())
normed_zipdata

Unnamed: 0,Zip Codesort column,Median Household Income,Median Age,Population,White Percentage
0,0.000000,0.291734,0.636015,0.164170,0.586141
1,0.001678,0.285460,0.593870,0.116437,0.593670
2,0.003356,0.118556,0.586207,0.165117,0.313298
3,0.005034,0.812803,0.659004,0.309294,0.864599
4,0.006711,0.305435,0.793103,0.272116,0.882593
...,...,...,...,...,...
128,0.939597,0.167568,0.593870,0.120597,0.783946
129,0.954698,0.289195,0.624521,0.479409,0.889357
130,0.979866,0.410545,0.793103,0.315425,0.940531
131,0.981544,0.094597,0.402299,0.178032,0.736983


In [287]:

# for each zip code find standardized euclidean norm (2-norm) of varialbes
# vars = [income, age, population, nonwhite]

# initialize list of norms
norm = []

# select which zipcode to test
zipcode = 77034
zipindex = zipdata[zipdata['Zip Codesort column'] == zipcode].index.values
targeted = [zipindex[0]]

for precinct in targeted:
    # retrieves values of variables for zipcodes of targeted precincts
    tar_income = zipdata.iloc[precinct]['Median Household Income']
    tar_age = zipdata.iloc[precinct]['Median Age']
    tar_pop = zipdata.iloc[precinct]['Population']
    tar_div = 100 - zipdata.iloc[precinct]['White Percentage']

    for i in range(zipdata.shape[0]):
        # takes out testing precinct with itself
        if precinct != i:
            # retrieves values of variables for other zipcodes
            income = zipdata.iloc[i]['Median Household Income']
            age = zipdata.iloc[i]['Median Age']
            pop = zipdata.iloc[i]['Population']
            div = 100 - zipdata.iloc[i]['White Percentage']
            # standardizes the difference between the targeted and other zipcodes
            # i.e. income and age are varied across vastly different scales
            incomediff = (tar_income - income) / (1.067543e+09)
            agediff = (tar_age - age) / (1.693905e+01)
            popdiff = (tar_pop - pop) / (1.558452e+08)
            divdiff = (tar_div - div) / (3.090548e+02)
            # sums the standardized differences
            sum = ((incomediff)**2) + ((agediff)**2) + ((popdiff)**2) + ((divdiff)**2)
        # takes the square root of the sum according to formula
        norm.append(math.sqrt(sum))

# print(norm)
print('standardized euclidean norm:', min(norm))
indexnorm = np.argmin(norm)
print('targeted zipcode:', zipdata.loc[[targeted[0]]])
print('closest other zipcode:', zipdata.loc[[indexnorm]])
closezip = zipdata.loc[0].at['Zip Codesort column']
print(closezip)

normed_zipdata
x = normed_zipdata.loc[zipindex[0]].at['Population']
print(x)

standardized euclidean norm: 0.008412762278455001
targeted zipcode:     Zip Codesort column  Median Household Income  Median Age  Population  \
32                77034                  59587.0        31.1     21818.0   

    White Percentage Black/AfricanAmerican Percentage  \
32             57.95                           10.27%   

   Pacific Islander Percentage Asian Percentage Other Race Population  \
32                       0.08%            3.91%                22.75%   

   2+ Races Percentage     sum  
32               4.15%  99.11%  
closest other zipcode:      Zip Codesort column  Median Household Income  Median Age  Population  \
124                77530                  68075.0        31.1     19697.0   

     White Percentage Black/AfricanAmerican Percentage  \
124             55.35                           11.45%   

    Pacific Islander Percentage Asian Percentage Other Race Population  \
124                       0.05%            1.12%                26.89%   

    2+ 

In [289]:
import plotly.graph_objects as go

categories = ['median household income','median age','total population',
              'underrepresented voter percentage']

print(zipindex)
og_income = normed_zipdata.loc[zipindex[0]].at['Median Household Income']
og_age = normed_zipdata.loc[zipindex[0]].at['Median Age']
og_pop = normed_zipdata.loc[zipindex[0]].at['Population']
og_div = 1 - normed_zipdata.loc[zipindex[0]].at['White Percentage']

fig = go.Figure()
fig.add_trace(go.Scatterpolar(
      r=[og_income, og_age, og_pop, og_div],
      theta=categories,
      fill='toself',
      name='77034'
))

sim_income = normed_zipdata.loc[indexnorm].at['Median Household Income']
sim_age = normed_zipdata.loc[indexnorm].at['Median Age']
sim_pop = normed_zipdata.loc[indexnorm].at['Population']
sim_div = 1 - normed_zipdata.loc[indexnorm].at['White Percentage']

fig.add_trace(go.Scatterpolar(
      r=[sim_income, sim_age, sim_pop, sim_div],
      theta=categories,
      fill='toself',
      name='77530'
))

fig.update_layout(
  polar=dict(
    radialaxis=dict(
      visible=True,
      #range=[0, 1]
    )),
  showlegend=True
)



[32]


ValueError: Mime type rendering requires nbformat>=4.2.0 but it is not installed

In [179]:
import plotly.graph_objects as go

categories = ['median household income','median age','total population',
              'underrepresented voter percentage']

# fig = go.Figure()

# zipdata.loc[zipdata.n == "closezip", ['a','b']].values.flatten().tolist()
# r = zipdata.loc[0, 2:6].values.tolist()

# r = zipdata.values.tolist()[0]
# [1, 6]

#r = zipdata.loc[indexnorm, 2:6].values.flatten().tolist()

#print(r)

# scat_income = zipdata.iloc[indexnorm]['Median Household Income'] / (1.067543e+09)
# scat_age = zipdata.iloc[indexnorm]['Median Age'] / (1.693905e+01)
# scat_pop = zipdata.iloc[indexnorm]['Population'] / (1.558452e+08)
# scat_div = 100 - zipdata.iloc[indexnorm]['White Percentage'] / (3.090548e+02)


# fig.add_trace(go.Scatterpolar(
#       r=[scat_income, scat_age, scat_pop, scat_div],
#       theta=categories,
#       fill='toself',
#       name='77026'
# ))

Unnamed: 0,Zip Codesort column,Median Household Income,Median Age,Population,White Percentage
0,0.000000,0.291734,0.636015,0.164170,0.586141
1,0.001678,0.285460,0.593870,0.116437,0.593670
2,0.003356,0.118556,0.586207,0.165117,0.313298
3,0.005034,0.812803,0.659004,0.309294,0.864599
4,0.006711,0.305435,0.793103,0.272116,0.882593
...,...,...,...,...,...
128,0.939597,0.167568,0.593870,0.120597,0.783946
129,0.954698,0.289195,0.624521,0.479409,0.889357
130,0.979866,0.410545,0.793103,0.315425,0.940531
131,0.981544,0.094597,0.402299,0.178032,0.736983
