Skip to content

Commit

Permalink
Split Cluster into subclasses (#656)
Browse files Browse the repository at this point in the history
* Add skeleton for Cluster subclasses

* Add constructors in subclasses

* Move to_xml_string to ContainerCluster + update test

* Move to_xml_string to ContentCluster + update test

* Make document_name mandatory for ContentCluster

* Remove type, subclass-specific params. Implement repr. Update doctest

* Move documentation to appropriate subclasses

* Use base constructor in Cluster subclasses

* Generate common XML elements in parent class

* Reorder output in test to match XML generation order

* Minor comment
  • Loading branch information
tmaregge committed Jan 16, 2024
1 parent 6d863cf commit df81a8b
Show file tree
Hide file tree
Showing 2 changed files with 128 additions and 92 deletions.
196 changes: 116 additions & 80 deletions vespa/package.py
Original file line number Diff line number Diff line change
Expand Up @@ -1765,14 +1765,14 @@ def __init__(self,
parameters: Optional[List[Parameter]] = None,
) -> None:
"""
Specify node resources for a content or container cluster as part of a :class: `Cluster`.
Specify node resources for a content or container cluster as part of a :class: `ContainerCluster` or :class: `ContentCluster`.
:param count: Number of nodes in a cluster.
:param parameters: List of :class: `Parameter`s defining the configuration of the cluster resources.
Example:
>>> Cluster(id="example_container", type="container",
>>> ContainerCluster(id="example_container",
... nodes=Nodes(
... count="2",
... parameters=[
Expand All @@ -1782,7 +1782,7 @@ def __init__(self,
... ]
... )
... )
Cluster(id="example_container", type="container", version="1.0", nodes="Nodes(count="2")")
ContainerCluster(id="example_container", version="1.0", nodes="Nodes(count="2")")
"""
self.count = count
self.parameters = parameters
Expand All @@ -1805,116 +1805,152 @@ def to_xml(self, root) -> ET.Element:
class Cluster(object):
def __init__(self,
id: str,
type: str,
version: str = "1.0",
nodes: Optional[Nodes] = None,
components: Optional[List[Component]] = None,
document_name: Optional[str] = None
) -> None:
"""
Define the configuration of a container or content cluster.
If :class: `Cluster` is used, :class: `Component`s must be added to the :class: `Cluster`,
rather than to the :class: `ApplicationPackage`, in order to be included in the generated schema.
Base class for a cluster configuration. Should not be instantiated directly.
Use subclasses :class: `ContainerCluster` or :class: `ContentCluster` instead.
:param id: Cluster id
:param type: The type of cluster. Either "container" or "content".
:param version: Cluster version.
:param nodes: :class: `Nodes` that specifies node resources.
"""
self.id = id
self.version = version
self.nodes = nodes

def __repr__(self) -> str:
id = f"id=\"{self.id}\""
version = f", version=\"{self.version}\""
nodes = f", nodes=\"{self.nodes}\"" if self.nodes else ""
return f"{self.__class__.__name__}({id}{version}{nodes}"

def to_xml(self, root):
"""Set up XML elements that are used in both container and content clusters."""
root.set("id", self.id)
root.set("version", self.version)

if self.nodes:
self.nodes.to_xml(root)


class ContainerCluster(Cluster):
def __init__(self,
id: str,
version: str = "1.0",
nodes: Optional[Nodes] = None,
components: Optional[List[Component]] = None
) -> None:
"""
Defines the configuration of a container cluster.
:param components: List of :class:`Component` that contains configurations for application components, e.g. embedders.
:param document_name: Name of document. Only used in content Cluster
If :class: `ContainerCluster` is used, any :class: `Component`s must be added to the :class: `ContainerCluster`,
rather than to the :class: `ApplicationPackage`, in order to be included in the generated schema.
Example:
>>> Cluster(id="example_container", type="container",
>>> ContainerCluster(id="example_container",
... components=[Component(id="e5", type="hugging-face-embedder",
... parameters=[
... Parameter("transformer-model", {"url": "https://github.com/vespa-engine/sample-apps/raw/master/simple-semantic-search/model/e5-small-v2-int8.onnx"}),
... Parameter("tokenizer-model", {"url": "https://raw.githubusercontent.com/vespa-engine/sample-apps/master/simple-semantic-search/model/tokenizer.json"})
... ]
... )]
... )
Cluster(id="example_container", type="container", version="1.0", components="[Component(id="e5", type="hugging-face-embedder")]")
>>> Cluster(id="example_content", type="content", document_name="doc")
Cluster(id="example_content", type="content", version="1.0", document_name="doc")
ContainerCluster(id="example_container", version="1.0", components="[Component(id="e5", type="hugging-face-embedder")]")
"""
self.id = id
self.type = type
self.version = version
self.nodes = nodes
super().__init__(id, version, nodes)
self.components = components
self.document_name = document_name

def __repr__(self) -> str:
id = f"id=\"{self.id}\""
type = f", type=\"{self.type}\""
version = f", version=\"{self.version}\""
nodes = f", nodes=\"{self.nodes}\"" if self.nodes else ""
base_str = super().__repr__()
components = f", components=\"{self.components}\"" if self.components else ""
return f"{base_str}{components})"

def to_xml_string(self, indent=1):
root = ET.Element("container")
super().to_xml(root)

# Add default elements in container
for child in ["search", "document-api", "document-processing"]:
ET.SubElement(root, child)

# Add potential components
if self.components:
for comp in self.components:
comp.to_xml(root)

# Temporary workaround to get ElementTree to print closing tags.
# Otherwise it prints <search/>, etc.
# TODO: Find a permanent solution
xml_str = minidom.parseString(ET.tostring(root)).toprettyxml(indent=" " * 4)
for child in ["search", "document-api", "document-processing"]:
xml_str = xml_str.replace(f'<{child}/>', f'<{child}></{child}>')

# Indent XML and remove opening tag
xml_lines = xml_str.strip().split("\n")
return "\n".join([xml_lines[1]] + [(" " * 4 * indent) + line for line in xml_lines[2:]])


class ContentCluster(Cluster):
def __init__(self,
id: str,
document_name: str,
version: str = "1.0",
nodes: Optional[Nodes] = None
) -> None:
"""
Defines the configuration of a content cluster.
:param document_name: Name of document.
Example:
>>> ContentCluster(id="example_content", document_name="doc")
ContentCluster(id="example_content", version="1.0", document_name="doc")
"""
super().__init__(id, version, nodes)
self.document_name = document_name

def __repr__(self) -> str:
base_str = super().__repr__()
document_name = f", document_name=\"{self.document_name}\"" if self.document_name else ""
return f"{self.__class__.__name__}({id}{type}{version}{nodes}{components}{document_name})"
return f"{base_str}{document_name})"

def to_xml_string(self, indent=1):
if self.type == "container":
root = ET.Element("container")
root.set("id", self.id)
root.set("version", self.version)

# Add default elements in container
for child in ["search", "document-api", "document-processing"]:
ET.SubElement(root, child)

# Add potential components
if self.components:
for comp in self.components:
comp.to_xml(root)

if self.nodes:
self.nodes.to_xml(root)

# Temporary workaround to get ElementTree to print closing tags.
# Otherwise it prints <search/>, etc.
# TODO: Find a permanent solution
xml_str = minidom.parseString(ET.tostring(root)).toprettyxml(indent=" " * 4)
for child in ["search", "document-api", "document-processing"]:
xml_str = xml_str.replace(f'<{child}/>', f'<{child}></{child}>')

# Indent XML and remove opening tag
xml_lines = xml_str.strip().split("\n")
return "\n".join([xml_lines[1]] + [(" " * 4 * indent) + line for line in xml_lines[2:]])
elif self.type == "content":
root = ET.Element("content")
root.set("id", self.id)
root.set("version", self.version)

ET.SubElement(root, "redundancy").text = "1"

if self.document_name:
documents = ET.SubElement(root, "documents")
document = ET.SubElement(documents, "document")
document.set("type", self.document_name)
document.set("mode", "index")
else:
raise ValueError("Missing parameter 'document_name' for content Cluster")
root = ET.Element("content")
super().to_xml(root)

if not self.nodes:
# Use some sensible defaults if the user doesn't pass a Nodes configuration.
# The defaults are the ones generated if the Cluster classes are not used at all.
nodes = ET.SubElement(root, "nodes")
node = ET.SubElement(nodes, "node")
node.set("distribution-key", "0")
node.set("hostalias", "node1")

# Temporary workaround for expanding tags.
# minidom's toprettyxml collapses empty tags, even if short_empty_elements is false in ET.tostring()
# Probably need to pretty print the xml ourselves
# TODO Find a more permanent solution
xml_str = minidom.parseString(ET.tostring(root)).toprettyxml(indent=" " * 4)
xml_str = xml_str.replace('<document type="test" mode="index"/>', '<document type="test" mode="index"></document>')
xml_str = xml_str.replace('<node distribution-key="0" hostalias="node1"/>', '<node distribution-key="0" hostalias="node1"></node>')

# Indent XML and remove opening tag
xml_lines = xml_str.strip().split("\n")
return "\n".join([xml_lines[1]] + [(" " * 4 * indent) + line for line in xml_lines[2:]])
else:
raise ValueError(f"Invalid Cluster type '{self.type}'. Supported types: 'container', 'content'")
ET.SubElement(root, "redundancy").text = "1"

documents = ET.SubElement(root, "documents")
document = ET.SubElement(documents, "document")
document.set("type", self.document_name)
document.set("mode", "index")

# Temporary workaround for expanding tags.
# minidom's toprettyxml collapses empty tags, even if short_empty_elements is false in ET.tostring()
# Probably need to pretty print the xml ourselves
# TODO Find a more permanent solution
xml_str = minidom.parseString(ET.tostring(root)).toprettyxml(indent=" " * 4)
xml_str = xml_str.replace('<document type="test" mode="index"/>', '<document type="test" mode="index"></document>')
xml_str = xml_str.replace('<node distribution-key="0" hostalias="node1"/>', '<node distribution-key="0" hostalias="node1"></node>')

# Indent XML and remove opening tag
xml_lines = xml_str.strip().split("\n")
return "\n".join([xml_lines[1]] + [(" " * 4 * indent) + line for line in xml_lines[2:]])


class ValidationID(Enum):
"""Collection of IDs that can be used in validation-overrides.xml
Expand Down
24 changes: 12 additions & 12 deletions vespa/test_package.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,8 @@
QueryProfile,
Component,
Nodes,
Cluster,
ContentCluster,
ContainerCluster,
Parameter,
ApplicationPackage,
AuthClient
Expand Down Expand Up @@ -1360,8 +1361,7 @@ def test_alias_to_schema(self) -> None:
class TestCluster(unittest.TestCase):
def setUp(self) -> None:
clusters = [
Cluster(type="container",
id="test_container",
ContainerCluster(id="test_container",
nodes=Nodes(
count="1",
parameters=[
Expand All @@ -1378,7 +1378,7 @@ def setUp(self) -> None:
])
]
),
Cluster(type="content", id="test_content", document_name="test")
ContentCluster(id="test_content", document_name="test")
]

self.app_package = ApplicationPackage(name="test", clusters=clusters)
Expand All @@ -1388,27 +1388,27 @@ def test_services_to_text(self):
'<?xml version="1.0" encoding="UTF-8"?>\n'
'<services version="1.0">\n'
' <container id="test_container" version="1.0">\n'
' <nodes count="1">\n'
' <resources vcpu="4.0" memory="16Gb" disk="125Gb">\n'
' <gpu count="1" memory="16Gb"/>\n'
' </resources>\n'
' </nodes>\n'
' <search></search>\n'
' <document-api></document-api>\n'
' <document-processing></document-processing>\n'
' <component id="e5" type="hugging-face-embedder">\n'
' <transformer-model path="model/model.onnx"/>\n'
' <tokenizer-model path="model/tokenizer.json"/>\n'
' </component>\n'
' <nodes count="1">\n'
' <resources vcpu="4.0" memory="16Gb" disk="125Gb">\n'
' <gpu count="1" memory="16Gb"/>\n'
' </resources>\n'
' </nodes>\n'
' </container>\n'
' <content id="test_content" version="1.0">\n'
' <nodes>\n'
' <node distribution-key="0" hostalias="node1"></node>\n'
' </nodes>\n'
' <redundancy>1</redundancy>\n'
' <documents>\n'
' <document type="test" mode="index"></document>\n'
' </documents>\n'
' <nodes>\n'
' <node distribution-key="0" hostalias="node1"></node>\n'
' </nodes>\n'
' </content>\n'
'</services>'
)
Expand Down

0 comments on commit df81a8b

Please sign in to comment.