In [1]:
# Imports
import os
from gerrychain import Graph, GeographicPartition, Partition, Election, accept
from gerrychain.updaters import Tally, cut_edges
import geopandas as gpd
import numpy as np
from gerrychain.random import random
import copy
import seaborn as sns

from gerrychain import MarkovChain
from gerrychain.constraints import single_flip_contiguous
from gerrychain.proposals import recom, propose_random_flip
from gerrychain.accept import always_accept
from gerrychain.metrics import polsby_popper
from gerrychain import constraints
from gerrychain.constraints import no_vanishing_districts

from collections import defaultdict, Counter
import matplotlib.pyplot as plt
import networkx as nx
import pandas
import math
from itertools import combinations_with_replacement
from functools import partial

In [None]:
shapefile = "https://github.com/mggg-states/PA-shapefiles/raw/master/PA/PA_VTD.zip"

df = gpd.read_file(shapefile)

#For Pennsylvania
county_col = "COUNTYFP10"
pop_col = "TOT_POP"
uid = "GEOID10"

In [None]:
graph = Graph.from_geodataframe(df,ignore_errors=True)
graph.add_data(df,list(df))
print(list(df))
graph = nx.relabel_nodes(graph, df[uid])

In [116]:
graph['42073730']

AtlasView({'42073010': {'shared_perim': 0.11503376726602686}, '42073700': {'shared_perim': 0.02011673086762376}, '42073710': {'shared_perim': 0.05962268211829863}, '42073150': {'shared_perim': 0.10674457811611812}})

In [47]:
def locality_splits_dict(partition, locality_col, df):
    """
    From a partition, generates a dictionary of counter dictionaries.

    Args: 
        partition: the partition for which a dictionary is being generated.

    Returns: 
        A dictionary with keys as dictrict numbers and values as Counter() dictionaries.
        These counter dictionaries have pairs County_ID: NUM which counts the number of
        VTDs in the county in the district. 
    """
    localitydict = dict(graph.nodes(data=locality_col))
    localities = (set(list(df[locality_col])))

    locality_splits = {k:[] for k in localities}
    locality_splits = {  k:[localitydict[v] for v in d] for k,d in partition.assignment.parts.items()   }
    locality_splits = {k: Counter(v) for k,v in locality_splits.items()}
    
    return locality_splits, set(localities)

In [48]:
def num_splits(partition, locality_col, df):
    """
    Calculates the number of counties in 2 or more districts.

    :param partition: the partition to be scored

    return the number of splittings in the partition
    """
    locality_splits, localities = locality_splits_dict(partition, locality_col)

    counter = 0
    for district in locality_splits.keys():
        counter += len(locality_splits[district])
    return counter

In [77]:
def shannon_entropy(partition, locality_col, district, df):
    locality_splits, localities = locality_splits_dict(partition, locality_col, df)
    vtds = vtds_per_district(locality_splits)
    print(locality_splits)
    num_districts = len(locality_splits.keys())
    print(num_districts)
    total_vtds = 0
    for k,v in locality_splits.items():
        for x in list(v.values()):
            total_vtds += x
    
    some_vtds = locality_splits[district][locality_col]

    entropy = 0
    for locality_j in localities:
        inner_sum = 0
        q = some_vtds / total_vtds
        for district_i in range(num_districts):
            print(district_i)
            counter = locality_splits[district_i+1]
            
            print(counter)
            print(counter.keys())
            
            if locality_j in list(counter.keys()):
                intersection = counter[str(locality_j)]
                print(intersection)
                p = intersection / vtds[str(locality_j)]

                if p != 0:
                    inner_sum += p * math.log(1/p)
            else: 
                continue
        entropy += q * (inner_sum)
        #print(1/q * (inner_sum-1))
    return entropy

In [50]:
def power_entropy(partition, locality_col, df, alpha):

    locality_splits, localities = locality_splits_dict(partition, locality_col)
    vtds = vtds_per_district(locality_splits)

    total_vtds = 0
    for k,v in locality_splits.items():
        for x in list(v.values()):
            total_vtds += x
    
    entropy = 0
    for locality in localities:
        inner_sum = 0
        
        q = some_vtds / total_vtds
        for district_i in range(num_districts):
            
            
            counter = locality_splits[district_i+1]
            intersection = counter[str(locality_j)]
            p = intersection / vtds[str(locality_j)]

            inner_sum += p ** (1-alpha)
        entropy += 1/q * (inner_sum-1)
        #print(1/q * (inner_sum-1))
    return entropy

In [31]:

starting_partition = GeographicPartition(
    graph,
    assignment="2011_PLA_1",
    updaters={
        "polsby_popper" : polsby_popper,
        "cut_edges": cut_edges,
        "population": Tally(pop_col, alias="population"),

    }
)

In [32]:
num_splits(starting_partition, county_col)

108

In [78]:
shannon_entropy(starting_partition, county_col, 3, df)


{3: Counter({'085': 97, '049': 97, '019': 81, '005': 66, '073': 59, '039': 53, '031': 16}), 10: Counter({'081': 79, '015': 53, '069': 43, '115': 37, '127': 36, '117': 32, '119': 27, '089': 24, '099': 22, '109': 22, '087': 21, '097': 19, '103': 18, '067': 16, '113': 12, '061': 1}), 9: Counter({'051': 94, '013': 91, '055': 73, '125': 61, '063': 55, '021': 32, '009': 31, '059': 21, '129': 15, '111': 14, '061': 14, '057': 13, '005': 1}), 5: Counter({'027': 85, '033': 65, '049': 49, '065': 49, '121': 46, '083': 34, '105': 32, '035': 31, '061': 29, '123': 28, '047': 23, '031': 20, '023': 9, '053': 8, '117': 4, '081': 1}), 15: Counter({'077': 149, '095': 76, '043': 38, '075': 29, '011': 25}), 6: Counter({'029': 149, '011': 75, '091': 60, '075': 26}), 11: Counter({'079': 127, '043': 89, '097': 73, '041': 49, '037': 36, '131': 28, '025': 17, '093': 14, '099': 7}), 8: Counter({'017': 299, '091': 38}), 4: Counter({'133': 142, '041': 57, '001': 45, '043': 27}), 18: Counter({'003': 250, '129': 190,

KeyError: '125'

In [53]:
locality_splits, localities = locality_splits_dict(starting_partition, county_col, df)
print(localities)
print(locality_splits.keys())

{'125', '059', '093', '015', '003', '091', '109', '029', '063', '017', '049', '067', '117', '055', '047', '065', '051', '111', '083', '119', '035', '007', '081', '133', '075', '077', '013', '037', '021', '031', '105', '025', '069', '089', '071', '121', '011', '097', '079', '099', '019', '045', '103', '041', '123', '095', '023', '039', '113', '061', '115', '033', '053', '057', '087', '043', '027', '131', '085', '073', '127', '101', '129', '001', '009', '005', '107'}
dict_keys([3, 10, 9, 5, 15, 6, 11, 8, 4, 18, 12, 17, 7, 16, 14, 13, 2, 1])


In [79]:
for district, locality_dict in locality_splits.items():
    inner_sum = 0
    for locality in locality_dict.keys():
        
        
def power_entropy(partition, locality_col, district, alpha):

    locality_splits, localities = locality_splits_dict(partition, locality_col)
    vtds = vtds_per_district(locality_splits)
    
    for district, locality_dict in locality_splits.items():
        inner_sum = 0
        for locality in locality_dict.keys():

    return entropy

IndentationError: expected an indented block (<ipython-input-79-8dd75244c0cc>, line 6)

In [97]:
def district_splits_dict(locality_splits, localities):
    '''
    Args:
        county_splits: a dictionary with keys as district numbers and values Counter() dictionaries
                        these counter dictionaries have pairs COUNTY_ID : NUM which counts the number of VTDS
                        in the county in the district 
        
    Returns:
       district_splits: a dictionary that has as keys the county id and returns as values the
    districts in that county. 
    '''
    district_splits = {k:[] for k in localities}
    
    for locality in localities:
        districts = {}
        for district in locality_splits.keys():
            if locality in locality_splits[district].keys():
                district_splits[locality].append(district)
    return district_splits   

def pieces_allowed(localities, graph, locality_col, pop_col, to_add=1):
    district_splits ={}
    
    totpop = 0
    for node in graph.nodes:
        print(graph.node[node][pop_col])
    
    for locality in localities:
        sg=graph.subgraph(n for n, v in graph.nodes(data=True) if v[locality_col]==locality)
        pop = 0
        
        for n in sg.nodes():
            pop += sg.node[n][pop_col]
        
        district_splits[locality] = math.ceil(pop/(totpop/num_districts)) + to_add
    return district_splits

def pennsylvania_fouls(partition, graph, locality_col, pop_col, to_add=1):
    locality_splits, localities = locality_splits_dict(partition, locality_col, df)
    district_splits = district_splits_dict(locality_splits, localities)
    pieces = pieces_allowed(localities, graph, locality_col, pop_col, to_add)
    #vtds = vtds_per_district(locality_splits)

    too_many = 0
    for locality in localities:
        if len(district_splits[locality]) > pieces[locality]:
            too_many += 1
    
    return too_many

In [98]:
pennsylvania_fouls(starting_partition, graph, pop_col, county_col)

039
039
039
039
039
039
039
109
109
109
013
013
013
013
063
063
063
063
063
063
063
063
005
063
073
047
047
009
009
009
009
009
009
009
087
061
009
009
011
011
011
011
011
011
081
081
081
041
017
017
017
117
117
117
117
117
117
117
133
133
133
133
133
133
133
133
133
133
133
079
097
097
129
129
129
129
129
083
083
121
083
121
113
113
123
123
113
123
083
067
067
067
115
115
099
099
115
115
099
031
031
031
031
031
127
105
051
051
051
051
051
033
033
033
033
033
107
107
107
107
107
107
107
035
081
059
043
043
043
043
043
021
021
021
095
021
021
021
131
065
065
077
077
111
111
111
111
111
111
111
111
111
111
045
085
085
085
085
085
027
027
027
027
053
019
019
019
019
019
019
019
019
049
049
049
049
069
049
049
055
055
037
037
037
097
037
015
015
015
015
015
015
015
007
007
007
061
061
061
125
061
061
125
125
061
061
061
005
023
005
005
005
093
005
029
071
071
071
071
071
071
071
071
003
001
001
001
001
085
039
039
039
039
039
039
039
039
039
039
039
039
039
039
039
039
039
039
039
039
039


021
021
021
021
021
021
021
021
021
095
095
095
095
095
021
021
021
021
021
021
021
095
095
095
095
095
021
021
021
021
021
021
021
095
095
095
095
095
021
021
021
021
021
021
021
095
021
021
021
021
021
095
095
021
021
021
021
021
021
095
095
095
095
095
095
021
021
021
021
021
021
021
095
095
095
095
095
095
021
021
021
021
021
021
021
021
021
021
021
095
095
095
095
095
021
021
021
021
021
021
021
021
021
021
095
095
095
095
095
095
095
095
095
095
095
095
095
095
095
095
095
095
095
095
095
095
095
095
095
095
095
095
095
095
095
095
095
095
095
095
095
095
095
095
095
095
095
095
095
095
095
095
095
131
095
095
095
095
095
131
131
131
095
095
095
131
131
131
131
131
095
095
095
131
131
131
131
131
095
095
095
131
131
131
131
131
095
095
095
095
095
131
095
095
095
131
131
095
095
095
131
131
131
095
095
095
095
131
131
095
095
095
095
095
095
065
065
065
065
065
065
065
065
065
065
065
065
065
065
065
065
065
065
065
065
065
065
065
065
065
065
065
065
065
065
065
065
065
065
065


061
125
125
125
125
125
125
125
125
125
125
125
125
061
061
061
125
125
125
125
125
125
125
125
061
061
061
125
125
125
125
125
125
125
125
125
125
061
061
061
061
061
125
125
125
125
125
125
125
125
125
125
125
125
061
061
061
125
125
125
125
125
125
125
125
125
125
125
125
125
061
125
125
125
125
125
125
125
125
125
125
061
125
125
125
125
125
125
125
125
125
125
125
125
061
061
125
125
125
125
125
125
125
125
125
125
125
125
061
061
125
125
125
125
125
125
125
125
061
061
061
061
061
061
061
125
125
125
125
125
125
125
125
125
125
125
125
061
125
125
125
125
125
125
125
125
125
125
125
125
125
125
125
125
125
125
125
125
125
125
125
125
125
125
125
125
125
125
125
125
125
125
125
125
125
061
061
125
125
125
125
125
125
125
125
125
125
061
005
005
005
023
005
005
005
005
005
005
005
005
005
023
023
005
005
005
005
005
005
005
005
005
023
023
023
023
023
005
005
005
005
005
005
005
005
005
005
005
005
093
093
093
093
093
093
093
093
093
005
005
005
005
093
093
093
005
005
005
005
005


TypeError: unsupported operand type(s) for +=: 'int' and 'str'

In [110]:
df.columns

Index(['STATEFP10', 'COUNTYFP10', 'VTDST10', 'GEOID10', 'VTDI10', 'NAME10',
       'NAMELSAD10', 'LSAD10', 'MTFCC10', 'FUNCSTAT10', 'ALAND10', 'AWATER10',
       'INTPTLAT10', 'INTPTLON10', 'ATG12D', 'ATG12R', 'GOV10D', 'GOV10R',
       'PRES12D', 'PRES12O', 'PRES12R', 'SEN10D', 'SEN10R', 'T16ATGD',
       'T16ATGR', 'T16PRESD', 'T16PRESOTH', 'T16PRESR', 'T16SEND', 'T16SENR',
       'USS12D', 'USS12R', 'GOV', 'TS', 'HISP_POP', 'TOT_POP', 'WHITE_POP',
       'BLACK_POP', 'NATIVE_POP', 'ASIAN_POP', 'F2014GOVD', 'F2014GOVR',
       '2011_PLA_1', 'REMEDIAL_P', '538CPCT__1', '538DEM_PL', '538GOP_PL',
       '8THGRADE_1', 'geometry'],
      dtype='object')