In [1]:
import numpy as np
from matplotlib import pyplot as plt
from dataclasses import dataclass, asdict, field
from itertools import count
import random, itertools, statistics

In [13]:
class Board:
    PERCENTAGE_ATTRIBUTES = ['white', 'latino', 'black', 'white_salary', 'latino_salary', 'black_salary', 'children', 'pets']
    POPULATION = ['population']
    
    def __init__(self, board_size) -> None:
        self.board = self.generate_board(board_size)
        self.groups = {}

    @staticmethod
    def generate_board(board_size):
        return np.matrix([[District() for _ in range(board_size)] for _ in range (board_size)])
    
    def get_int_board(self):
        int_board = np.zeros((self.board.shape[0], self.board.shape[1]))
        for i, j in itertools.product(range(self.board.shape[0]), range(self.board.shape[1])):
            int_board[i, j] = repr(self.board[i, j])
        return int_board
        
    def count_unique(self):
        unique = set()
        for i, j in itertools.product(range(self.board.shape[0]), range(self.board.shape[1])):
            unique.add(self.board[i,j].identifier)
        return len(unique)

    def merge_units(self, u_1xy, u_2xy):
        unit_1_x, unit_1_y = u_1xy
        unit_1 = self.board[unit_1_x, unit_1_y]
        unit_2_x, unit_2_y = u_2xy
        unit_2 = self.board[unit_2_x, unit_2_y]
        
        if unit_1.identifier not in self.groups.keys() and unit_2.identifier not in self.groups.keys():
            group = Group()
            group.add_unit(unit_1)
            group.add_unit(unit_2)
            self.groups[group.identifier] = group
            self.board[unit_1_x, unit_1_y] = group
            self.board[unit_2_x, unit_2_y] = group
        
        if unit_1.identifier in self.groups.keys() and unit_2.identifier not in self.groups.keys():
            self.groups[unit_1.identifier].add_unit(unit_2)
            self.board[unit_2_x, unit_2_y] = self.groups[unit_1.identifier]

        if unit_1.identifier not in self.groups.keys() and unit_2.identifier in self.groups.keys():
            self.groups[unit_2.identifier].add_unit(unit_1)
            self.board[unit_1_x, unit_1_y] = self.groups[unit_2.identifier]
            
            
        if unit_1.identifier in self.groups.keys() and unit_2.identifier in self.groups.keys():
            self.groups[unit_1.identifier].add_unit(unit_2)
            for i, j in itertools.product(range(self.board.shape[0]), range(self.board.shape[1])):
                if self.board[i, j] == unit_2 or self.board[i, j] == unit_1: self.board[i, j] = self.groups[unit_1.identifier]
            del self.groups[unit_2.identifier]
            
    def clusters_raport(self):
        used_ids = []
        
        print("Clusters: ")
        for i, j in itertools.product(range(self.board.shape[0]), range(self.board.shape[1])):
            if self.board[i, j].__class__.__name__ == 'Group' and self.board[i, j].identifier not in used_ids:
                used_ids.append(self.board[i, j].identifier)
                print(f"Amount of units: {len(self.board[i, j].units)}, average cluster information: {str(self.board[i, j])}")
                print(f"Standard deviation: {self.board[i, j].group_standard_deviation()}")
                for unit in self.board[i, j].units:
                    print(f"District data: {str(unit)}")
                print("\n")
                
        print("Single districts: ")
        for i, j in itertools.product(range(self.board.shape[0]), range(self.board.shape[1])):
            if self.board[i, j].__class__.__name__ == 'District' and self.board[i, j].identifier not in used_ids:
                used_ids.append(self.board[i, j].identifier)
                print(f"District data: {str(self.board[i, j])}")
        
    def show_board(self) -> None:
        plt.figure()
        plt.imshow(self.get_int_board(), cmap='gray')
        plt.show()

    @staticmethod
    def bergman_divergence(p, q):
        pd = asdict(p)
        qd = asdict(q)
        result = 0
        for characteristic in Board.PERCENTAGE_ATTRIBUTES:
            result += abs(pd[characteristic] - qd[characteristic])
        return result

    @staticmethod
    def kullback_leibler_divergence(p, q):
        pd = asdict(p)
        qd = asdict(q)
        result = 0
        for characteristic in Board.PERCENTAGE_ATTRIBUTES:
            try:
                result += pd[characteristic] * np.log2(pd[characteristic]/qd[characteristic])
            except ZeroDivisionError:
                ...
        return result
    
    def agglompartition(self, k, divergence_function):
        def neighbours(x, y):
            _min_divergence = np.Inf
            min_x, min_y = 0, 0
            
            for i, j in itertools.product([-1, 0, 1], [-1, 0, 1]):
                if i == 0 and j == 0: continue
                if not (0 <= x+i < self.board.shape[0] and 0 <= y+j < self.board.shape[1]): continue
                if self.board[x, y].identifier == self.board[x+i, y+j].identifier: continue
                divergence = divergence_function(self.board[x, y], self.board[x+i, y+j])
                
                if divergence < _min_divergence:
                    _min_divergence = divergence
                    min_x, min_y = x+i, y+j
                    
            return _min_divergence, min_x, min_y
        
        R_size = self.board.shape[0]
        unique = self.count_unique()
        
        while unique > k:
            min_divergence = np.Inf
            min_x, min_y = np.Inf, np.Inf
            min_x1, min_y1 = np.Inf, np.Inf
            
            for x, y in itertools.product(range(R_size), range(R_size)):
                divergence_min_temp, min_x_temp, min_y_temp = neighbours(x, y)
                if divergence_min_temp < min_divergence:
                    min_divergence = divergence_min_temp
                    min_x, min_y = min_x_temp, min_y_temp
                    min_x1, min_y1 = x, y
                    
            # print(f"""to merge: 
            #       divergence: {min_divergence}
            #       {min_x, min_y}: id {b.board[min_x, min_y].identifier, b.board[min_x, min_y].latino, b.board[min_x, min_y].black, b.board[min_x, min_y].white}
            #       {min_x1, min_y1}: id {b.board[min_x1, min_y1].identifier, b.board[min_x1, min_y1].latino, b.board[min_x1, min_y1].black, b.board[min_x1, min_y1].white}
            #       """)
            # print(b.board, "\n\n")
            
            self.merge_units((min_x, min_y), (min_x1, min_y1))
            unique = self.count_unique()
            
        print(self.clusters_raport())
        print(self.board)
            
        
@dataclass(frozen=False)
class District:
    identifier: int = field(default_factory=count().__next__, init=False, repr=True)
    __min_salary: int = 3010
    __max_salary: int = 6156
    latino: int = field(init=False)
    black: int = field(init=False)
    white: int = field(init=False)
    latino_salary: int = field(init=False)
    black_salary: int = field(init=False)
    white_salary: int = field(init=False)
    children: int = field(init=False)
    pets: int = field(init=False)
    population: int = field(init=False)
    
    def __post_init__(self):
        self.latino = random.randint(0,100)
        self.white = random.randint(0,100-self.latino)
        self.black = 100-self.latino-self.white
        self.latino_salary = int(random.randint(self.__min_salary, self.__max_salary) / self.__max_salary * 100)
        self.black_salary = int(random.randint(self.__min_salary, self.__max_salary) / self.__max_salary * 100)
        self.white_salary = int(random.randint(self.__min_salary, self.__max_salary) / self.__max_salary * 100)
        self.children = random.randint(0, 100)
        self.pets = random.randint(0, 100)
        self.population = random.randint(1000, 10000)
        
    def __repr__(self) -> str:
        return str(self.identifier)
    
    def __str__(self) -> str:
        return f"{self.__class__.__name__} id: {self.identifier}, black: {self.black}%, white: {self.white}%, latino: {self.latino}%, latino salary: {self.latino_salary}%, black salary: {self.black_salary}%, white salary: {self.white_salary}%, pets: {self.pets}%, children: {self.children}% population: {self.population}"
       
@dataclass(frozen=False)
class Group(District):
    def __post_init__(self):
        self.units = []
        self.latino: int = 0
        self.black: int = 0
        self.white: int = 0
        self.latino_salary: int = 0
        self.black_salary: int = 0
        self.white_salary: int = 0
        self.children = int = 0
        self.pets = int = 0
        self.population: int = 0
    
    def add_unit(self, unit):
        if unit.__class__.__name__ == 'District':
            self.units.append(unit)
            self.update_group_info(unit)
        elif unit.__class__.__name__ == "Group":
            self.units.extend(unit.units)
            for unit in unit.units:
               self.update_group_info(unit)
               
    def __str__(self) -> str:
        return super().__str__()
        
    def __repr__(self):
        return super().__repr__()
    
    def update_group_info(self, unit) -> None:
        for characteristic in Board.PERCENTAGE_ATTRIBUTES:
            exec(f"self.{characteristic} = int(((self.{characteristic} * self.population / 100 + unit.{characteristic} * unit.population / 100) / (unit.population + self.population) ) * 100) ")
        self.population += unit.population
        
    def group_standard_deviation(self):
        result = {k: [] for k in Board.PERCENTAGE_ATTRIBUTES}
        for unit in self.units:
            for characteristic in Board.PERCENTAGE_ATTRIBUTES:
                result[characteristic].append(eval(f"unit.{characteristic}"))
        for characteristic in Board.PERCENTAGE_ATTRIBUTES:
            result[characteristic] = statistics.stdev(result[characteristic])
        return result



In [20]:
b = Board(5)
print(b.board)
b.agglompartition(12, b.bergman_divergence)


[[126 127 128 129 130]
 [131 132 133 134 135]
 [136 137 138 139 140]
 [141 142 143 144 145]
 [146 147 148 149 150]]
Clusters: 
Amount of units: 12, average cluster information: Group id: 152, black: 10%, white: 15%, latino: 65%, latino salary: 62%, black salary: 63%, white salary: 66%, pets: 59%, children: 24% population: 66823
Standard deviation: {'white': 13.534187911516613, 'latino': 12.629930780299294, 'black': 10.762941470198056, 'white_salary': 13.11112181132931, 'latino_salary': 11.735403570806458, 'black_salary': 14.806837077594398, 'children': 17.3438924714323, 'pets': 25.67674929864477}
District data: District id: 149, black: 0%, white: 36%, latino: 64%, latino salary: 60%, black salary: 83%, white salary: 71%, pets: 78%, children: 27% population: 1994
District data: District id: 145, black: 4%, white: 22%, latino: 74%, latino salary: 84%, black salary: 69%, white salary: 63%, pets: 71%, children: 32% population: 6852
District data: District id: 150, black: 14%, white: 21%, l

In [18]:
b = Board(5)
print(b.board)
b.agglompartition(15, b.kullback_leibler_divergence)

[[66 67 68 69 70]
 [71 72 73 74 75]
 [76 77 78 79 80]
 [81 82 83 84 85]
 [86 87 88 89 90]]
Clusters: 
Amount of units: 4, average cluster information: Group id: 93, black: 27%, white: 33%, latino: 37%, latino salary: 65%, black salary: 61%, white salary: 66%, pets: 50%, children: 61% population: 29074
Standard deviation: {'white': 5.802298395176403, 'latino': 10.739335795724674, 'black': 13.076696830622021, 'white_salary': 11.51810169544733, 'latino_salary': 15.588457268119896, 'black_salary': 11.757976016304847, 'children': 27.60434748368452, 'pets': 24.569628948493843}
District data: District id: 67, black: 31%, white: 25%, latino: 44%, latino salary: 92%, black salary: 51%, white salary: 53%, pets: 85%, children: 66% population: 3989
District data: District id: 73, black: 19%, white: 38%, latino: 43%, latino salary: 56%, black salary: 78%, white salary: 58%, pets: 59%, children: 29% population: 7575
District data: District id: 71, black: 46%, white: 31%, latino: 23%, latino salary: 

  result += pd[characteristic] * np.log2(pd[characteristic]/qd[characteristic])
  result += pd[characteristic] * np.log2(pd[characteristic]/qd[characteristic])
