In [17]:
import xmltodict
from collections.abc import MutableMapping
import re


In [30]:
def _flatten_dict_generator(d: MutableMapping, parent_key: str = "", sep: str = "."):
    """
    Generator to flatten dictionary recursively

    Args:
        d (dict): Dictionary to flatten
        parent_key (str): String of parent dictionary
        sep (str): String used to seperate keys in flattened dictionary

    Yeilds:
        new_key (str): String of combining parent key and current key
        v (str): returns value of key value
    """

    for k, v in d.items():
        new_key = f"{parent_key}{sep}{k}" if parent_key else k
        if isinstance(v, MutableMapping):
            yield from flatten_dict(v, new_key, sep=sep).items()
        else:
            yield new_key, v


def flatten_dict(d: MutableMapping, parent_key: str = "", sep: str = "."):
    """
    Leverages _flatten_dict_generator to flatten dictionary recursively

    Args:
        d (dict): Dictionary to flatten
        parent_key (str): String of parent dictionary
        sep (str): String used to seperate keys in flattened dictionary

    Returns:
        dict: flattened dictionary
    """
    return dict(_flatten_dict_generator(d, parent_key, sep))


In [37]:
class Node:
    """
    Builds a Node object from a provided Alteryx tool node

    Args:
        node (dict): Dictionary that represents to alteryx tool
    """

    def __init__(self, node: dict):
        for k, v in flatten_dict(node).items():
            setattr(self, k, v)


class Workflow:
    """
    Builds a Workflow object from the provided Alteryx Workflow.

    This object will contain a list of all Nodes of the workflow

    Args:
        file_path (str): Location of the workflow to parse
    """

    def __init__(self, file_path: str):
        self.file_path = file_path
        self.nodes = []

    def read_workflow(self):
        """
        Reads the provided alteryx file and builds the node list
        """
        with open(self.file_path) as f:
            self.doc = xmltodict.parse(f.read(), dict_constructor=dict)

        if self.doc["AlteryxDocument"]["Nodes"] is not None:
            self.collect_nodes(self.doc["AlteryxDocument"]["Nodes"], self.nodes)

        return self

    def collect_nodes(
        self,
        nodes: dict,
        node_list: list,
        level: int = 0,
        parent_container: Node = None,
    ):
        """
        Extracts nodes from provided dictionary and adds them to the self.nodes list

        Will recursively nest items within tool containers of the Alteryx workflow to ensure capturing of all nodes

        Args:
            nodes (dict): All nodes within workflow, or tool container
            node_list (list): list of nodes within the workflow
            level (int): Level of nesting of the item
            parent_container (Node): Node object of parent tool container
        """

        if nodes is not None:
            for node in nodes["Node"]:
                n = Node(node)

                if "GuiSettings.@Plugin" in vars(n):
                    print(getattr(n, "GuiSettings.@Plugin"))
                    print(getattr(n, "GuiSettings.@Plugin").split(".")[-1])
                    setattr(
                        n, "nodeType", getattr(n, "GuiSettings.@Plugin").split(".")[-1]
                    )
                    if getattr(n, "GuiSettings.@Plugin").split(".")[-1] in [
                        "DbFileInput",
                        "DbFileOutput",
                    ]:
                        for var in vars(n):
                            print(f"{var} - {getattr(n, var)}")

                node_list.append(n)
                if "ChildNodes" in node.keys():
                    child_nodes = {}
                    print(node["ChildNodes"])
                    if type(node["ChildNodes"]) == dict:
                        child_nodes["Node"] = [node["ChildNodes"]]
                    else:
                        child_nodes = node["ChildNodes"]

                    self.collect_nodes(
                        child_nodes,
                        node_list,
                        level=level + 1,
                        parent_container=n,
                    )


In [38]:
if __name__ == "__main__":
    workflow_nodes = []
    input_nodes = []
    output_nodes = []
    Workflow("test_files/Challenge 320 completed.yxmd").read_workflow()

AlteryxGuiToolkit.TextBox.TextBox
TextBox
AlteryxGuiToolkit.TextBox.TextBox
TextBox
AlteryxBasePluginsGui.DbFileInput.DbFileInput
DbFileInput
@ToolID - 4
GuiSettings.@Plugin - AlteryxBasePluginsGui.DbFileInput.DbFileInput
GuiSettings.Position.@x - 78
GuiSettings.Position.@y - 546
Properties.Configuration.Passwords - None
Properties.Configuration.File.@OutputFileName - 
Properties.Configuration.File.@FileFormat - 19
Properties.Configuration.File.@SearchSubDirs - False
Properties.Configuration.File.@RecordLimit - 
Properties.Configuration.File.#text - Female Political Representation_World.yxdb
Properties.Configuration.FormatSpecificOptions - None
Properties.Annotation.@DisplayMode - 0
Properties.Annotation.Name - None
Properties.Annotation.DefaultAnnotationText - Female Political Representation_World.yxdb
Properties.Annotation.Left.@value - False
Properties.MetaInfo.@connection - Output
Properties.MetaInfo.RecordInfo.Field - [{'@name': 'Country Name', '@size': '254', '@source': 'File: C: