# Minimum Spanning Trees
### Disjoint Set Union -- For Kruskal's Algorithm
Implementing the Disjoint Union Forest. This code is from https://en.wikipedia.org/wiki/Kruskal's_algorithm

In [93]:
class Node(object):
    def __init__(self, val):
        self.val = val
    def __repr__(self):
        return str(self.val)
    
# this will take in a node, and create new instance attrs parent and rank
def Make_Set(x):
    x.parent = x
    x.rank = 0

def Union(x, y):
    x_root = Find(x)
    y_root = Find(y)
    if x_root is y_root:
        return
    # set the smaller tree's representative to the bigger tree's representative
    if x_root.rank < y_root.rank:
        x_root.parent = y_root
    elif y_root.rank < x_root.rank:
        y_root.parent = x_root
    else:
        y_root.parent = x_root
        x_root.rank += 1
# when traversing upwards to the root, this will compress the path
# e.g. making every node's represenative on the path upward to the base root
# this improves look-up times on furuter find calls
def Find_Set(x):
    if x.parent is not x:
        x.parent = Find(x.parent)
    return x.parent

### Kruskal's Algorithm

In [94]:
def MST_Kruskal(G_V, G_E):
    A = []
    tot_weight = 0
    # keyd by the vert val
    nodes = {v: Node(v) for v in G_V}
    # this will set the parent and rank attrs for each node
    for val in nodes:
        Make_Set(nodes[val])
    # sort by weights
    G_E.sort(key=lambda t: t[2])
    for e in G_E:
        u, v, w = e
        if Find_Set(nodes[u]) is not Find_Set(nodes[v]):
            A.append((u, v))
            Union(nodes[u], nodes[v])
            tot_weight += w
    # output in the desired format
    print(tot_weight)
    for edge in A:
        print("{u}\t{v}".format(u=edge[0], v=edge[1]))

### Converting Input File and Running Test Cases

In [97]:
from collections import defaultdict
# test cases:11, 113,89, 130
for testcase in range(1, 5):
    with open("test" + str(testcase) + ".in") as file:
        # first line is # verticles
        num_vertcies = file.readline()
        # split each line, then convert the strs to ints. Last line is \n so ignore
        # each element of G_E is (u:int, v:int, w:int)
        G_E = [tuple(map(lambda l: int(l), line.split())) for line in file.readlines()[:-1]]
        # unique list of vertices
        G_V = set([t[0] for t in G_E] + [t[1] for t in G_E])
        with open("expected" + str(testcase) + ".out") as exp:
            expected = exp.readline()
        print("\nExpected Weight: {0}".format(expected))
        MST_Kruskal(G_V, G_E)


Expected Weight: 11

11
1	5
3	4
1	2
3	2

Expected Weight: 113

113
3	1
2	1

Expected Weight: 89

89
1	5
2	4
4	1
5	3

Expected Weight: 130

130
4	5
4	6
2	4
3	5
1	2


### Prims Algorithm
*To use a custom predicate in Python's heap implementation, you must insert tuples, where the first element is the key to compare*
1. Convert all the vertices in $V - {r}$ to tuples of the form (weight, key, parent)

2. Set **r.key** to 0, so it is at the top of the min-heap, and thus the root of the MST.

3. Insert all the nodes into the min-heap, based on the weight element of the tuple. 

4. While the Min-Heap is not empty
    * Get the vertex with the minimum weight from the min-heap
        * For iteration 1, this is the root
    * Then we update every vertex that is incident with the popped vertex, and change it's parent and key values

In [146]:
from heapq import *

def MST_Prim(G, r):
    # G is adj list {u:int : {v:w, v`:w`}....}
    # each node in the min-heap is of the form [key, label, parent]
    heap = [[float("inf"), v, None] for v in G.keys() if v != r]
    # r.key = 0
    heap.append([0, r, None])
    # we have to modify the nodes in the heap later on, thus we keep
    # a dictionary, that maps each node label to it's corresponding loc 
    # in the heap
    loc = {l[1]: l for l in heap}
    # Q = G.V
    heapify(heap)
    A = []
    tot_weight = 0
    while len(heap):
        u = heappop(heap)
        # ignore the first iteration, where None--Root is added into A
        # on all other iterations, append (u.parent--u) into A bc safe edge
        if u[2]:
            A.append((u[2], u[1]))
        tot_weight += u[0]
        # the node label, used to index into loc
        u = u[1]
        # remove node from Q, as it is now in the MST (A)
        del[loc[u]]
        
        for v in G[u]:
            # if v in Q and w(u, v) < v.key
            if v in loc and G[u][v] < loc[v][0]:
                # v.par = u
                loc[v][2] = u
                # v.key = w(u,v)
                loc[v][0] = G[u][v]
        # O(N), python has no decrease_key function, so this will suffice
        heapify(heap)
    print(tot_weight)
    for edge in A:
        print("{u}\t{v}".format(u=edge[0], v=edge[1]))
                

In [148]:
for testcase in [1,2,3,4]:
    with open("test" + str(testcase) + ".in") as file:
        num_vertcies = file.readline()
        G_E = [tuple(map(lambda l: int(l), line.split())) for line in file.readlines()[:-1]]
        adj_list = defaultdict(dict)
            # each edge is of form [u:int, v:int, w:int]
            # so we convert to an adjacency list of the form
            # {
            #  u: {v:w...}
            #  v: {u:w... }
            # }
        with open("expected" + str(testcase) + ".out") as exp:
            expected = exp.readline()
        print("\nExpected Weight: {0}".format(expected))
        for edge in G_E: 
            # inserting u:v
            adj_list[ edge[0] ][ edge[1] ] = edge[2]
            # inserting v:u
            adj_list[ edge[1] ][ edge[0] ] = edge[2]
        MST_Prim(adj_list, 1)


Expected Weight: 11

11
1	5
1	2
2	3
3	4

Expected Weight: 113

113
1	3
1	2

Expected Weight: 89

89
1	5
1	4
4	2
5	3

Expected Weight: 130

130
1	2
2	4
4	5
4	6
5	3
