Split Cluster into subclasses (#656)

* Add skeleton for Cluster subclasses * Add constructors in subclasses * Move to_xml_string to ContainerCluster + update test * Move to_xml_string to ContentCluster + update test * Make document_name mandatory for ContentCluster * Remove type, subclass-specific params. Implement repr. Update doctest * Move documentation to appropriate subclasses * Use base constructor in Cluster subclasses * Generate common XML elements in parent class * Reorder output in test to match XML generation order * Minor comment
vespa-engine · Jan 16, 2024 · df81a8b · df81a8b
1 parent 6d863cf
commit df81a8b
Show file tree

Hide file tree

Showing 2 changed files with 128 additions and 92 deletions.
diff --git a/vespa/package.py b/vespa/package.py
@@ -1765,14 +1765,14 @@ def __init__(self,
                  parameters: Optional[List[Parameter]] = None,
                  ) -> None:
         """
-        Specify node resources for a content or container cluster as part of a :class: `Cluster`.
+        Specify node resources for a content or container cluster as part of a :class: `ContainerCluster` or :class: `ContentCluster`.
 
         :param count: Number of nodes in a cluster.
         :param parameters: List of :class: `Parameter`s defining the configuration of the cluster resources.
 
         Example:
 
-        >>> Cluster(id="example_container", type="container",
+        >>> ContainerCluster(id="example_container",
         ...    nodes=Nodes(
         ...        count="2",
         ...        parameters=[
@@ -1782,7 +1782,7 @@ def __init__(self,
         ...        ]
         ...    )
         ... )
-        Cluster(id="example_container", type="container", version="1.0", nodes="Nodes(count="2")")
+        ContainerCluster(id="example_container", version="1.0", nodes="Nodes(count="2")")
         """
         self.count = count
         self.parameters = parameters
@@ -1805,116 +1805,152 @@ def to_xml(self, root) -> ET.Element:
 class Cluster(object):
     def __init__(self,
                  id: str,
-                 type: str,
                  version: str = "1.0",
                  nodes: Optional[Nodes] = None,
-                 components: Optional[List[Component]] = None,
-                 document_name: Optional[str] = None
                  ) -> None:
         """
-        Define the configuration of a container or content cluster.
-
-        If :class: `Cluster` is used, :class: `Component`s must be added to the :class: `Cluster`,
-        rather than to the :class: `ApplicationPackage`, in order to be included in the generated schema.
+        Base class for a cluster configuration. Should not be instantiated directly.
+        Use subclasses :class: `ContainerCluster` or :class: `ContentCluster` instead.
 
         :param id: Cluster id
-        :param type: The type of cluster. Either "container" or "content".
         :param version: Cluster version.
         :param nodes: :class: `Nodes` that specifies node resources.
+        """
+        self.id = id
+        self.version = version
+        self.nodes = nodes
+
+    def __repr__(self) -> str:
+        id = f"id=\"{self.id}\""
+        version = f", version=\"{self.version}\""
+        nodes = f", nodes=\"{self.nodes}\"" if self.nodes else ""
+        return f"{self.__class__.__name__}({id}{version}{nodes}"
+
+    def to_xml(self, root):
+        """Set up XML elements that are used in both container and content clusters."""
+        root.set("id", self.id)
+        root.set("version", self.version)
+
+        if self.nodes:
+            self.nodes.to_xml(root)
+
+
+class ContainerCluster(Cluster):
+    def __init__(self,
+                 id: str,
+                 version: str = "1.0",
+                 nodes: Optional[Nodes] = None,
+                 components: Optional[List[Component]] = None
+                 ) -> None:
+        """
+        Defines the configuration of a container cluster.
+
         :param components: List of :class:`Component` that contains configurations for application components, e.g. embedders.
-        :param document_name: Name of document. Only used in content Cluster
+
+        If :class: `ContainerCluster` is used, any :class: `Component`s must be added to the :class: `ContainerCluster`,
+        rather than to the :class: `ApplicationPackage`, in order to be included in the generated schema.
 
         Example:
 
-        >>> Cluster(id="example_container", type="container",
+        >>> ContainerCluster(id="example_container",
         ...    components=[Component(id="e5", type="hugging-face-embedder",
         ...        parameters=[
         ...            Parameter("transformer-model", {"url": "https://github.com/vespa-engine/sample-apps/raw/master/simple-semantic-search/model/e5-small-v2-int8.onnx"}),
         ...            Parameter("tokenizer-model", {"url": "https://raw.githubusercontent.com/vespa-engine/sample-apps/master/simple-semantic-search/model/tokenizer.json"})
         ...        ]
         ...    )]
         ... )
-        Cluster(id="example_container", type="container", version="1.0", components="[Component(id="e5", type="hugging-face-embedder")]")
-        >>> Cluster(id="example_content", type="content", document_name="doc")
-        Cluster(id="example_content", type="content", version="1.0", document_name="doc")
+        ContainerCluster(id="example_container", version="1.0", components="[Component(id="e5", type="hugging-face-embedder")]")
         """
-        self.id = id
-        self.type = type
-        self.version = version
-        self.nodes = nodes
+        super().__init__(id, version, nodes)
         self.components = components
-        self.document_name = document_name
 
     def __repr__(self) -> str:
-        id = f"id=\"{self.id}\""
-        type = f", type=\"{self.type}\""
-        version = f", version=\"{self.version}\""
-        nodes = f", nodes=\"{self.nodes}\"" if self.nodes else ""
+        base_str = super().__repr__()
         components = f", components=\"{self.components}\"" if self.components else ""
+        return f"{base_str}{components})"
+
+    def to_xml_string(self, indent=1):
+        root = ET.Element("container")
+        super().to_xml(root)
+
+        # Add default elements in container
+        for child in ["search", "document-api", "document-processing"]:
+            ET.SubElement(root, child)
+
+        # Add potential components
+        if self.components:
+            for comp in self.components:
+                comp.to_xml(root)
+
+        # Temporary workaround to get ElementTree to print closing tags.
+        # Otherwise it prints <search/>, etc.
+        # TODO: Find a permanent solution
+        xml_str = minidom.parseString(ET.tostring(root)).toprettyxml(indent=" " * 4)
+        for child in ["search", "document-api", "document-processing"]:
+            xml_str = xml_str.replace(f'<{child}/>', f'<{child}></{child}>')
+
+        # Indent XML and remove opening tag
+        xml_lines = xml_str.strip().split("\n")
+        return "\n".join([xml_lines[1]] + [(" " * 4 * indent) + line for line in xml_lines[2:]])
+
+
+class ContentCluster(Cluster):
+    def __init__(self,
+                 id: str,
+                 document_name: str,
+                 version: str = "1.0",
+                 nodes: Optional[Nodes] = None
+                 ) -> None:
+        """
+        Defines the configuration of a content cluster.
+
+        :param document_name: Name of document.
+
+        Example:
+
+        >>> ContentCluster(id="example_content", document_name="doc")
+        ContentCluster(id="example_content", version="1.0", document_name="doc")
+        """
+        super().__init__(id, version, nodes)
+        self.document_name = document_name
+
+    def __repr__(self) -> str:
+        base_str = super().__repr__()
         document_name = f", document_name=\"{self.document_name}\"" if self.document_name else ""
-        return f"{self.__class__.__name__}({id}{type}{version}{nodes}{components}{document_name})"
+        return f"{base_str}{document_name})"
 
     def to_xml_string(self, indent=1):
-        if self.type == "container":
-            root = ET.Element("container")
-            root.set("id", self.id)
-            root.set("version", self.version)
-
-            # Add default elements in container
-            for child in ["search", "document-api", "document-processing"]:
-                ET.SubElement(root, child)
-
-            # Add potential components
-            if self.components:
-                for comp in self.components:
-                    comp.to_xml(root)
-
-            if self.nodes:
-                self.nodes.to_xml(root)
-
-            # Temporary workaround to get ElementTree to print closing tags.
-            # Otherwise it prints <search/>, etc.
-            # TODO: Find a permanent solution
-            xml_str = minidom.parseString(ET.tostring(root)).toprettyxml(indent=" " * 4)
-            for child in ["search", "document-api", "document-processing"]:
-                xml_str = xml_str.replace(f'<{child}/>', f'<{child}></{child}>')
-
-            # Indent XML and remove opening tag
-            xml_lines = xml_str.strip().split("\n")
-            return "\n".join([xml_lines[1]] + [(" " * 4 * indent) + line for line in xml_lines[2:]])
-        elif self.type == "content":
-            root = ET.Element("content")
-            root.set("id", self.id)
-            root.set("version", self.version)
-
-            ET.SubElement(root, "redundancy").text = "1"
-
-            if self.document_name:
-                documents = ET.SubElement(root, "documents")
-                document = ET.SubElement(documents, "document")
-                document.set("type", self.document_name)
-                document.set("mode", "index")
-            else:
-                raise ValueError("Missing parameter 'document_name' for content Cluster")
+        root = ET.Element("content")
+        super().to_xml(root)
 
+        if not self.nodes:
+            # Use some sensible defaults if the user doesn't pass a Nodes configuration.
+            # The defaults are the ones generated if the Cluster classes are not used at all.
             nodes = ET.SubElement(root, "nodes")
             node = ET.SubElement(nodes, "node")
             node.set("distribution-key", "0")
             node.set("hostalias", "node1")
 
-            # Temporary workaround for expanding tags.
-            # minidom's toprettyxml collapses empty tags, even if short_empty_elements is false in ET.tostring()
-            # Probably need to pretty print the xml ourselves
-            # TODO Find a more permanent solution
-            xml_str = minidom.parseString(ET.tostring(root)).toprettyxml(indent=" " * 4)
-            xml_str = xml_str.replace('<document type="test" mode="index"/>', '<document type="test" mode="index"></document>')
-            xml_str = xml_str.replace('<node distribution-key="0" hostalias="node1"/>', '<node distribution-key="0" hostalias="node1"></node>')
-
-            # Indent XML and remove opening tag
-            xml_lines = xml_str.strip().split("\n")
-            return "\n".join([xml_lines[1]] + [(" " * 4 * indent) + line for line in xml_lines[2:]])
-        else:
-            raise ValueError(f"Invalid Cluster type '{self.type}'. Supported types: 'container', 'content'")
+        ET.SubElement(root, "redundancy").text = "1"
+
+        documents = ET.SubElement(root, "documents")
+        document = ET.SubElement(documents, "document")
+        document.set("type", self.document_name)
+        document.set("mode", "index")
+
+        # Temporary workaround for expanding tags.
+        # minidom's toprettyxml collapses empty tags, even if short_empty_elements is false in ET.tostring()
+        # Probably need to pretty print the xml ourselves
+        # TODO Find a more permanent solution
+        xml_str = minidom.parseString(ET.tostring(root)).toprettyxml(indent=" " * 4)
+        xml_str = xml_str.replace('<document type="test" mode="index"/>', '<document type="test" mode="index"></document>')
+        xml_str = xml_str.replace('<node distribution-key="0" hostalias="node1"/>', '<node distribution-key="0" hostalias="node1"></node>')
+
+        # Indent XML and remove opening tag
+        xml_lines = xml_str.strip().split("\n")
+        return "\n".join([xml_lines[1]] + [(" " * 4 * indent) + line for line in xml_lines[2:]])
+
 
 class ValidationID(Enum):
     """Collection of IDs that can be used in validation-overrides.xml

diff --git a/vespa/test_package.py b/vespa/test_package.py
@@ -21,7 +21,8 @@
     QueryProfile,
     Component,
     Nodes,
-    Cluster,
+    ContentCluster,
+    ContainerCluster,
     Parameter,
     ApplicationPackage,
     AuthClient
@@ -1360,8 +1361,7 @@ def test_alias_to_schema(self) -> None:
 class TestCluster(unittest.TestCase):
     def setUp(self) -> None:
         clusters = [
-            Cluster(type="container",
-                    id="test_container",
+            ContainerCluster(id="test_container",
                     nodes=Nodes(
                         count="1",
                         parameters=[
@@ -1378,7 +1378,7 @@ def setUp(self) -> None:
                                           ])
                                 ]
                     ),
-            Cluster(type="content", id="test_content", document_name="test")
+            ContentCluster(id="test_content", document_name="test")
         ]
 
         self.app_package = ApplicationPackage(name="test", clusters=clusters)
@@ -1388,27 +1388,27 @@ def test_services_to_text(self):
             '<?xml version="1.0" encoding="UTF-8"?>\n'
             '<services version="1.0">\n'
             '    <container id="test_container" version="1.0">\n'
+            '        <nodes count="1">\n'
+            '            <resources vcpu="4.0" memory="16Gb" disk="125Gb">\n'
+            '                <gpu count="1" memory="16Gb"/>\n'
+            '            </resources>\n'
+            '        </nodes>\n'
             '        <search></search>\n'
             '        <document-api></document-api>\n'
             '        <document-processing></document-processing>\n'
             '        <component id="e5" type="hugging-face-embedder">\n'
             '            <transformer-model path="model/model.onnx"/>\n'
             '            <tokenizer-model path="model/tokenizer.json"/>\n'
             '        </component>\n'
-            '        <nodes count="1">\n'
-            '            <resources vcpu="4.0" memory="16Gb" disk="125Gb">\n'
-            '                <gpu count="1" memory="16Gb"/>\n'
-            '            </resources>\n'
-            '        </nodes>\n'
             '    </container>\n'
             '    <content id="test_content" version="1.0">\n'
+            '        <nodes>\n'
+            '            <node distribution-key="0" hostalias="node1"></node>\n'
+            '        </nodes>\n'
             '        <redundancy>1</redundancy>\n'
             '        <documents>\n'
             '            <document type="test" mode="index"></document>\n'
             '        </documents>\n'
-            '        <nodes>\n'
-            '            <node distribution-key="0" hostalias="node1"></node>\n'
-            '        </nodes>\n'
             '    </content>\n'
             '</services>'
         )