Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use __slots__ for frequently created classes #153

Merged
merged 5 commits into from
May 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 12 additions & 8 deletions ch_backup/backup/deduplication.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,24 @@
from ch_backup.backup_context import BackupContext
from ch_backup.clickhouse.models import Database, FrozenPart
from ch_backup.clickhouse.schema import is_replicated
from ch_backup.util import utcnow
from ch_backup.util import Slotted, utcnow


class PartDedupInfo:
class PartDedupInfo(Slotted):
"""
Information about data part to use for deduplication / creation incremental backups.
"""

__slots__ = (
"backup_path",
"checksum",
"size",
"files",
"tarball",
"disk_name",
"verified",
)

def __init__(
self,
backup_path: str,
Expand All @@ -39,12 +49,6 @@ def __init__(
self.disk_name = disk_name
self.verified = verified

def __repr__(self):
return f"PartDedupInfo({self.__dict__})"

def __eq__(self, other):
return self.__dict__ == other.__dict__


TableDedupInfo = Dict[str, PartDedupInfo]

Expand Down
2 changes: 1 addition & 1 deletion ch_backup/backup/layout.py
Original file line number Diff line number Diff line change
Expand Up @@ -593,7 +593,7 @@ def _target_part_size(self, part: PartMetadata) -> int:
"""
Predicts tar archive size after encryption.
"""
tar_size = calc_tarball_size(part.raw_metadata["files"], part.size)
tar_size = calc_tarball_size(list(part.raw_metadata.files), part.size)
return calc_encrypted_size(
tar_size, self._encryption_chunk_size, self._encryption_metadata_size
)
Expand Down
54 changes: 37 additions & 17 deletions ch_backup/backup/metadata/part_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,43 @@
Backup metadata for ClickHouse data part.
"""

from types import SimpleNamespace
from typing import Optional, Sequence

from ch_backup.clickhouse.models import FrozenPart
from ch_backup.util import Slotted


class PartMetadata(SimpleNamespace):
class RawMetadata(Slotted):
"""
Raw metadata for ClickHouse data part.
"""

__slots__ = "checksum", "size", "files", "tarball", "link", "disk_name"

def __init__(
self,
checksum: str,
size: int,
files: Sequence[str],
tarball: bool,
link: str = None,
disk_name: str = None,
) -> None:
self.checksum = checksum
self.size = size
self.files = files
self.tarball = tarball
self.link = link
self.disk_name = disk_name


class PartMetadata(Slotted):
"""
Backup metadata for ClickHouse data part.
"""

__slots__ = "database", "table", "name", "raw_metadata"

# pylint: disable=too-many-arguments
def __init__(
self,
Expand All @@ -26,60 +52,54 @@ def __init__(
link: str = None,
disk_name: str = None,
) -> None:
super().__init__()
self.database: str = database
self.table: str = table
self.name: str = name
self.raw_metadata: dict = {
"checksum": checksum,
"size": size,
"files": files,
"tarball": tarball,
"link": link,
"disk_name": disk_name,
}
self.raw_metadata: RawMetadata = RawMetadata(
checksum, size, files, tarball, link, disk_name
)

@property
def checksum(self) -> str:
"""
Return data part checksum.
"""
return self.raw_metadata["checksum"]
return self.raw_metadata.checksum

@property
def size(self) -> int:
"""
Return data part size.
"""
return self.raw_metadata["size"]
return self.raw_metadata.size

@property
def files(self) -> Sequence[str]:
"""
Return data part files.
"""
return self.raw_metadata["files"]
return self.raw_metadata.files

@property
def link(self) -> Optional[str]:
"""
For deduplicated data parts it returns link to the source backup (its path). Otherwise None is returned.
"""
return self.raw_metadata["link"]
return self.raw_metadata.link

@property
def disk_name(self) -> str:
"""
Return disk name where part is stored.
"""
return self.raw_metadata.get("disk_name", "default")
return self.raw_metadata.disk_name if self.raw_metadata.disk_name else "default"

@property
def tarball(self) -> bool:
"""
Returns true if part files stored as single tarball.
"""
return self.raw_metadata["tarball"]
return self.raw_metadata.tarball

@classmethod
def load(
Expand Down
14 changes: 13 additions & 1 deletion ch_backup/clickhouse/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from typing import List, Optional, Tuple

import ch_backup.logging
from ch_backup.util import Slotted


class Disk(SimpleNamespace):
Expand Down Expand Up @@ -163,11 +164,22 @@ def set_engine_from_sql(self, db_sql: str) -> None:
self.engine = match.group("engine")


class FrozenPart(SimpleNamespace):
class FrozenPart(Slotted):
"""
Freezed data part.
"""

__slots__ = (
"database",
"table",
"name",
"disk_name",
"path",
"checksum",
"size",
"files",
)

def __init__(
self,
database: str,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,16 +8,20 @@
from ch_backup.storage.async_pipeline.base_pipeline.handler import Handler
from ch_backup.storage.async_pipeline.stages.types import StageType
from ch_backup.storage.engine.base import PipeLineCompatibleStorageEngine
from ch_backup.util import Slotted


@dataclass
class UploadingPart:
class UploadingPart(Slotted):
"""
Passed between uploading stages.

We could use dataclass(slots=true) from functools when the supported version of python would be >= 3.10.
"""

__slots__ = "data", "upload_id"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's add comment that @dataclasses support slots=True in python >= 3.10

data: bytes
upload_id: Optional[str] = None
upload_id: Optional[str]


class StartMultipartUploadStage(Handler):
Expand Down
22 changes: 22 additions & 0 deletions ch_backup/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -444,3 +444,25 @@ def replace_macros(string: str, macros: dict) -> str:
pattern=r"{([^{}]+)}",
repl=lambda m: macros.get(m.group(1), m.group(0)),
)


class Slotted:
"""
Allow to explicitly declare data members and deny the creation of __dict__ and __weakref__.
The space saved over using __dict__ can be significant. Attribute lookup speed can be significantly improved as well.
All child classes must declare __slots__.
"""

__slots__ = ()

def __repr__(self):
repr_ = [f"{attr}: {getattr(self, attr)}" for attr in self.__slots__] # type: ignore
return f"{type(self).__name__}({repr_})"

def __eq__(self, other):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's add if not isinstance(other, self.__class__): return False for strictness

if not isinstance(other, self.__class__):
return False
for slot in self.__slots__: # type: ignore
if not getattr(self, slot) == getattr(other, slot):
return False
return True