# Short Bursts Distributions

We look at short bursts on PA and AR senate.

In [49]:
import matplotlib.pyplot as plt
from gerrychain import (GeographicPartition, Partition, Graph, MarkovChain,
                        proposals, updaters, constraints, accept, Election)
from gerrychain.proposals import recom, propose_random_flip
from gerrychain.tree import recursive_tree_part
from gerrychain.metrics import mean_median, efficiency_gap, polsby_popper, partisan_gini
from functools import (partial, reduce)
import pandas
import geopandas as gp
import numpy as np
import networkx as nx
import pickle
import seaborn as sns
import pprint
import operator
import scipy
from sklearn.decomposition import PCA
from sklearn.preprocessing import scale, normalize
import random
from nltk.util import bigrams
from nltk.probability import FreqDist
from gingleator import Gingleator

In [2]:
## This function takes a name of a shapefile and returns a tuple of the graph
## and its associated dataframe
def build_graph(filename):
    print("Pulling in Graph from Shapefile: " + filename)
    graph = Graph.from_file(filename)
    df = gp.read_file(filename)
    return(graph, df)

In [10]:
## Set up PA enacted
graph_PA = pickle.load(open("PA_graph.p", "rb"))
df_PA = pickle.load(open("PA_df.p", "rb"))

PA_updaters = {"population": updaters.Tally("TOT_POP", alias="population"),
               "bvap": updaters.Tally("BLACK_POP", alias="bvap"),
               "vap": updaters.Tally("VAP", alias="vap"),
               "bvap_prec": lambda part: {k: part["bvap"][k] / part["population"][k] for k in part["bvap"]}}

PA_enacted_senate = GeographicPartition(graph_PA, assignment="SSD", 
                                     updaters=PA_updaters)

In [4]:
total_population_PA = sum(df_PA.TOT_POP.values)
ideal_population_PA = total_population_PA / 50

In [5]:
seed_part_senate = recursive_tree_part(graph_PA, range(50), pop_col="TOT_POP",
                                pop_target=ideal_population_PA,
                                epsilon=0.01, node_repeats=1)
PA_seed_seante = GeographicPartition(graph_PA, assignment=seed_part_senate,updaters=PA_updaters)

In [6]:
## Set up AR
graph_AR, df_AR = build_graph("AR_shape/AR.shp")

AR_updaters = {"population": updaters.Tally("TOTPOP", alias="population"),
               "bvap": updaters.Tally("BVAP", alias="bvap"),
               "vap": updaters.Tally("VAP", alias="vap"),
               "bvap_prec": lambda part: {k: part["bvap"][k] / part["vap"][k]
                                          for k in part["bvap"]}}

AR_enacted_senate = GeographicPartition(graph_AR, assignment="SSD", updaters=AR_updaters)

Pulling in Graph from Shapefile: AR_shape/AR.shp


In [7]:
total_population_AR = sum(df_AR.TOTPOP.values)
ideal_population_AR = total_population_AR / 35

In [8]:
senate_seed = recursive_tree_part(graph_AR, range(35), pop_col="TOTPOP",
                                pop_target=ideal_population_AR,
                                epsilon=0.01, node_repeats=1)
AR_seed_senate = GeographicPartition(graph_AR, assignment=senate_seed,updaters=AR_updaters)

In [14]:
Gingleator.num_opportunity_dists(PA_enacted_senate, "bvap_prec", 0.4)

4

In [15]:
Gingleator.num_opportunity_dists(PA_seed_seante, "bvap_prec", 0.4)

3

In [16]:
Gingleator.num_opportunity_dists(AR_seed_senate, "bvap_prec", 0.4)

3

## Reprojections onto the line

In [33]:
def transition_frequencies(observations):
    observations = observations.astype(int)
    dim = observations.max()
    
    seen_bigrams = []
    for row in observations:
        seen_bigrams.extend(bigrams(row))
    fdist = FreqDist(seen_bigrams)
    
    probs = np.zeros((dim, dim))
    for k, v in fdist.items():
        probs[k[0]-1][k[1]-1] = v
    probs = normalize(probs, norm="l1")
    return probs

In [34]:
def rand_walk_graph(transition_frequencies):
    G = nx.from_numpy_array(transition_frequencies, create_using=nx.DiGraph)
    mapping = {n: n+1 for n in G.nodes}
    G = nx.relabel_nodes(G, mapping)
    return G

In [35]:
def edge_weights(G, prec=4):
    return dict([((u,v,), round(d['weight'],prec)) for u,v,d in G.edges(data=True)])

In [54]:
PA_gingles = Gingleator(PA_seed_seante, pop_col="TOT_POP", minority_prec_col="bvap_prec",
                        epsilon=0.1)

In [55]:
AR_gingles = Gingleator(AR_seed_senate, pop_col="TOTPOP", minority_prec_col="bvap_prec",
                        epsilon=0.1)

#### PA random walk graph

In [None]:
_, PA_observations = PA_gingles.short_burst_run(num_bursts=200, num_steps=25)

In [46]:
PA_trans = transition_frequencies(PA_observations)

In [47]:
PA_rand_walk = rand_walk_graph(PA_trans)

In [48]:
edge_weights(PA_rand_walk)

{(3, 3): 0.9945,
 (3, 4): 0.0055,
 (4, 3): 0.0299,
 (4, 4): 0.9669,
 (4, 5): 0.0032,
 (5, 4): 0.0641,
 (5, 5): 0.9359}

#### AR random walk graph

In [41]:
_, AR_observations = AR_gingles.short_burst_run(num_bursts=500, num_steps=10)

In [43]:
AR_trans = transition_frequencies(AR_observations)

In [44]:
AR_rand_walk = rand_walk_graph(AR_trans)

In [45]:
edge_weights(AR_rand_walk)

{(2, 2): 0.9375,
 (2, 3): 0.0625,
 (3, 2): 0.0095,
 (3, 3): 0.9701,
 (3, 4): 0.0204,
 (4, 3): 0.0475,
 (4, 4): 0.9525}

## Distribution of Observations

In [52]:
_, PA_unbiased_run = PA_gingles.short_burst_run(num_bursts=1, num_steps=100)

In [53]:
PA_unbiased_run

array([[3., 3., 3., 3., 3., 3., 3., 3., 3., 3., 3., 3., 3., 3., 3., 3.,
        3., 3., 3., 3., 3., 3., 3., 3., 3., 3., 3., 3., 3., 3., 3., 2.,
        2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2.,
        2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2.,
        2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 3.,
        3., 3., 3., 3., 3., 3., 3., 3., 3., 3., 3., 3., 3., 3., 3., 3.,
        3., 3., 3., 3.]])