Skip to content

Commit

Permalink
Use __slots__ for frequently created classes (#153)
Browse files Browse the repository at this point in the history
* Use __slots__ for frequently created classes

* Set requests < 2.32 to fix build

* Comment and isinstance check

* Codespell
  • Loading branch information
kirillgarbar committed May 28, 2024
1 parent 147c6c2 commit 89ef858
Show file tree
Hide file tree
Showing 6 changed files with 91 additions and 29 deletions.
20 changes: 12 additions & 8 deletions ch_backup/backup/deduplication.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,24 @@
from ch_backup.backup_context import BackupContext
from ch_backup.clickhouse.models import Database, FrozenPart
from ch_backup.clickhouse.schema import is_replicated
from ch_backup.util import utcnow
from ch_backup.util import Slotted, utcnow


class PartDedupInfo:
class PartDedupInfo(Slotted):
"""
Information about data part to use for deduplication / creation incremental backups.
"""

__slots__ = (
"backup_path",
"checksum",
"size",
"files",
"tarball",
"disk_name",
"verified",
)

def __init__(
self,
backup_path: str,
Expand All @@ -39,12 +49,6 @@ def __init__(
self.disk_name = disk_name
self.verified = verified

def __repr__(self):
return f"PartDedupInfo({self.__dict__})"

def __eq__(self, other):
return self.__dict__ == other.__dict__


TableDedupInfo = Dict[str, PartDedupInfo]

Expand Down
2 changes: 1 addition & 1 deletion ch_backup/backup/layout.py
Original file line number Diff line number Diff line change
Expand Up @@ -593,7 +593,7 @@ def _target_part_size(self, part: PartMetadata) -> int:
"""
Predicts tar archive size after encryption.
"""
tar_size = calc_tarball_size(part.raw_metadata["files"], part.size)
tar_size = calc_tarball_size(list(part.raw_metadata.files), part.size)
return calc_encrypted_size(
tar_size, self._encryption_chunk_size, self._encryption_metadata_size
)
Expand Down
54 changes: 37 additions & 17 deletions ch_backup/backup/metadata/part_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,43 @@
Backup metadata for ClickHouse data part.
"""

from types import SimpleNamespace
from typing import Optional, Sequence

from ch_backup.clickhouse.models import FrozenPart
from ch_backup.util import Slotted


class PartMetadata(SimpleNamespace):
class RawMetadata(Slotted):
"""
Raw metadata for ClickHouse data part.
"""

__slots__ = "checksum", "size", "files", "tarball", "link", "disk_name"

def __init__(
self,
checksum: str,
size: int,
files: Sequence[str],
tarball: bool,
link: str = None,
disk_name: str = None,
) -> None:
self.checksum = checksum
self.size = size
self.files = files
self.tarball = tarball
self.link = link
self.disk_name = disk_name


class PartMetadata(Slotted):
"""
Backup metadata for ClickHouse data part.
"""

__slots__ = "database", "table", "name", "raw_metadata"

# pylint: disable=too-many-arguments
def __init__(
self,
Expand All @@ -26,60 +52,54 @@ def __init__(
link: str = None,
disk_name: str = None,
) -> None:
super().__init__()
self.database: str = database
self.table: str = table
self.name: str = name
self.raw_metadata: dict = {
"checksum": checksum,
"size": size,
"files": files,
"tarball": tarball,
"link": link,
"disk_name": disk_name,
}
self.raw_metadata: RawMetadata = RawMetadata(
checksum, size, files, tarball, link, disk_name
)

@property
def checksum(self) -> str:
"""
Return data part checksum.
"""
return self.raw_metadata["checksum"]
return self.raw_metadata.checksum

@property
def size(self) -> int:
"""
Return data part size.
"""
return self.raw_metadata["size"]
return self.raw_metadata.size

@property
def files(self) -> Sequence[str]:
"""
Return data part files.
"""
return self.raw_metadata["files"]
return self.raw_metadata.files

@property
def link(self) -> Optional[str]:
"""
For deduplicated data parts it returns link to the source backup (its path). Otherwise None is returned.
"""
return self.raw_metadata["link"]
return self.raw_metadata.link

@property
def disk_name(self) -> str:
"""
Return disk name where part is stored.
"""
return self.raw_metadata.get("disk_name", "default")
return self.raw_metadata.disk_name if self.raw_metadata.disk_name else "default"

@property
def tarball(self) -> bool:
"""
Returns true if part files stored as single tarball.
"""
return self.raw_metadata["tarball"]
return self.raw_metadata.tarball

@classmethod
def load(
Expand Down
14 changes: 13 additions & 1 deletion ch_backup/clickhouse/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from typing import List, Optional, Tuple

import ch_backup.logging
from ch_backup.util import Slotted


class Disk(SimpleNamespace):
Expand Down Expand Up @@ -163,11 +164,22 @@ def set_engine_from_sql(self, db_sql: str) -> None:
self.engine = match.group("engine")


class FrozenPart(SimpleNamespace):
class FrozenPart(Slotted):
"""
Freezed data part.
"""

__slots__ = (
"database",
"table",
"name",
"disk_name",
"path",
"checksum",
"size",
"files",
)

def __init__(
self,
database: str,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,16 +8,20 @@
from ch_backup.storage.async_pipeline.base_pipeline.handler import Handler
from ch_backup.storage.async_pipeline.stages.types import StageType
from ch_backup.storage.engine.base import PipeLineCompatibleStorageEngine
from ch_backup.util import Slotted


@dataclass
class UploadingPart:
class UploadingPart(Slotted):
"""
Passed between uploading stages.
We could use dataclass(slots=true) from functools when the supported version of python would be >= 3.10.
"""

__slots__ = "data", "upload_id"
data: bytes
upload_id: Optional[str] = None
upload_id: Optional[str]


class StartMultipartUploadStage(Handler):
Expand Down
22 changes: 22 additions & 0 deletions ch_backup/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -444,3 +444,25 @@ def replace_macros(string: str, macros: dict) -> str:
pattern=r"{([^{}]+)}",
repl=lambda m: macros.get(m.group(1), m.group(0)),
)


class Slotted:
"""
Allow to explicitly declare data members and deny the creation of __dict__ and __weakref__.
The space saved over using __dict__ can be significant. Attribute lookup speed can be significantly improved as well.
All child classes must declare __slots__.
"""

__slots__ = ()

def __repr__(self):
repr_ = [f"{attr}: {getattr(self, attr)}" for attr in self.__slots__] # type: ignore
return f"{type(self).__name__}({repr_})"

def __eq__(self, other):
if not isinstance(other, self.__class__):
return False
for slot in self.__slots__: # type: ignore
if not getattr(self, slot) == getattr(other, slot):
return False
return True

0 comments on commit 89ef858

Please sign in to comment.