# Alian Dictionary

Given a sorted dictionary (array of words) of an alien language, find order of characters in the language.

EXAMPLES:

    Input: words = ["baa", "abcd", "abca", "cab", "cad"]
    Output: "bdac""

    Input: words = ["z", "x"]
    Output: "zx"

    Input: words = ["z"]
    Output: "z"

    Input: words = ["z", "x", "z"]
    Output: ""
    Reason: Circula relationship.

    Input: words = ["abc", "ab"]
    Output: ""
    Reason: The 2nd word 'ab' is a prefix of the 1st. This is not valid.


NOTE:
 - You may assume all letters are in lowercase.
 - If the order is invalid, return an empty string.
 - There may be multiple valid order of letters, return any one of them is fine.

APPROACHES:
  1. Extract dependency rules from the input.
  2. Putting dependency rules into a graph with letters as nodes and
     dependencies as edges. 
  3. Also track the number of inputs for each node.
  4. Topologically sorting the graph nodes, starting with those without
     any inputs.
 
TECHNIQUES:
  - Topological sortting.

REFERENCE
 - https://www.geeksforgeeks.org/given-sorted-dictionary-find-precedence-characters/
 - https://www.geeksforgeeks.org/topological-sorting/
 - https://en.wikipedia.org/wiki/Topological_sorting#Kahn's_algorithm


In [1]:
from typing import List
from collections import defaultdict, Counter, deque


class Node:
    def __init__(self, val):
        self.val = val
        self.edges = set()
        self.in_degree = 0


class Solution:

    def alienOrder_v1(self, words: List[str]) -> str:
        """Use a Node structure to store the graph data."""
        # Step 0. Create nodes
        nodes = dict()
        for c in set([c for w in words for c in w]):
            nodes[c] = Node(c)

        # Step 1. Build the edges (dependency)
        for w1, w2 in zip(words, words[1:]):
            for c1, c2 in zip(w1, w2):
                if c1 != c2:
                    n1 = nodes[c1]
                    if c2 not in n1.edges:
                        n1.edges.add(c2)
                        nodes[c2].in_degree += 1
                    break
            else:
                if len(w2) < len(w1):
                    return ''

        # Step 2. Topology Sorting
        results = list()
        queue = [n for n in nodes.values() if n.in_degree == 0]
        while queue:
            n = queue.pop(0)
            results.append(n.val)
            for c in n.edges:
                n2 = nodes[c]
                n2.in_degree -= 1
                if n2.in_degree == 0:
                    queue.append(n2)

        # If not all letters are in output, that means there was a cycle and so
        # no valid ordering. Return "" as per the problem description.
        if len(results) < len(nodes):
            return ""

        return ''.join(results)

    def alienOrder_v2(self, words: List[str]) -> str:
        
        #--------------------
        # Build the graph
        #--------------------        
        # Get graph
        graph = defaultdict(set)
        for w1, w2 in zip(words[:-1], words[1:]):
            for c1, c2 in zip(w1, w2):
                if c1 != c2:
                    graph[c1].add(c2)
                    break
            else:
                # If getting out of the loop w/o break,
                if len(w2) < len(w1):
                    return ''

        # Get in-degree
        in_degree = {c:0 for word in words for c in word}
        for k, vals in graph.items():
            for v in vals:
                in_degree[v] += 1
                
        # print(f"- in_degree = {in_degree}")
        # print(f"- graph = {dict(graph)}")
        
        #--------------------
        # Special cases
        #--------------------
        if not in_degree:
            print(f"ERROR - no input.")
            return ''
        if len(in_degree) == 1:
            return list(in_degree.keys())[0]
        
        #--------------------
        # Topological ssort
        #--------------------
        ans = []
        # Create in degrees and graph

        # Find "sources": vertices with indegree 0
        queue = [v for v, count in in_degree.items() if count == 0]
        ans = []
        while queue:
            v = queue.pop(0)
            ans.append(v)
            for c in graph[v]:
                in_degree[c] -= 1
                if in_degree[c] == 0:
                    queue.append(c)

        # Check if we get all of the letters
        if len(ans) != len(in_degree):
            print(f"ERROR - detected cycles.")
            ans = []

        return ''.join(ans)


# ---------------------------
#   Main & Helper Functions
# ---------------------------
def main():
    """Main function"""

    # Test data
    test_data = [
        [["baa", "abcd", "abca", "cab", "cad"], "bdac"],
        [["wrt", "wrf", "er", "ett", "rftt"], "wertf"],
        [["z", "x"], "zx"],
        [["z", "x", "z"], ""],  # circular
        [["za", "zb", "ca", "cb"], "abzc"],
        [["abc", "ab"], ""],    # 2nd word is a prefix of 1st. Thus, invalid
        [["z", "z"], "z"],
        [["zy", "zx"], "zyx"],
        [["ab", "adc"], "abcd"],
        [["ri", "xz", "qxf", "jhsguaw", "dztqrbwbm",
            "dhdqfb", "jdv", "fcgfsilnb", "ooby"], ""],
    ]


    ob1 = Solution()
    for words, expected in test_data:
        print(f"# Input  = {words}")
        print(f"  output v1 = '{ob1.alienOrder_v1(words)}' ('{expected}')")
        print(f"  output v2 = '{ob1.alienOrder_v2(words)}' ('{expected}')")


if __name__ == "__main__":
    main()


# Input  = ['baa', 'abcd', 'abca', 'cab', 'cad']
  output v1 = 'bdac' ('bdac')
  output v2 = 'bdac' ('bdac')
# Input  = ['wrt', 'wrf', 'er', 'ett', 'rftt']
  output v1 = 'wertf' ('wertf')
  output v2 = 'wertf' ('wertf')
# Input  = ['z', 'x']
  output v1 = 'zx' ('zx')
  output v2 = 'zx' ('zx')
# Input  = ['z', 'x', 'z']
  output v1 = '' ('')
ERROR - detected cycles.
  output v2 = '' ('')
# Input  = ['za', 'zb', 'ca', 'cb']
  output v1 = 'zacb' ('abzc')
  output v2 = 'zacb' ('abzc')
# Input  = ['abc', 'ab']
  output v1 = '' ('')
  output v2 = '' ('')
# Input  = ['z', 'z']
  output v1 = 'z' ('z')
  output v2 = 'z' ('z')
# Input  = ['zy', 'zx']
  output v1 = 'yzx' ('zyx')
  output v2 = 'zyx' ('zyx')
# Input  = ['ab', 'adc']
  output v1 = 'bcad' ('abcd')
  output v2 = 'abcd' ('abcd')
# Input  = ['ri', 'xz', 'qxf', 'jhsguaw', 'dztqrbwbm', 'dhdqfb', 'jdv', 'fcgfsilnb', 'ooby']
  output v1 = '' ('')
ERROR - detected cycles.
  output v2 = '' ('')
