Skip to content

Commit

Permalink
Merge EditableReportFile into ReportFile
Browse files Browse the repository at this point in the history
The main purpose of the `EditableReportFile` was to offer methods to delete labels/sessions, as well as to properly maintain uptodate versions of `present_sessions` and `totals` when doing mutations on the file.

All the methods were now moved to `ReportFile`, which has also gained new functionality related to making sure that caches are cleared when necessary, and re-computed on-demand, so that they are always correct.

---

I plan to do a similar thing for `EditableReport` as well in a followup, as it has a similar purpose in relation to maintaining uptodate `totals`.
  • Loading branch information
Swatinem committed Feb 27, 2025
1 parent fd58134 commit 676f04d
Showing 2 changed files with 124 additions and 142 deletions.
114 changes: 5 additions & 109 deletions shared/reports/editable.py
Original file line number Diff line number Diff line change
@@ -1,116 +1,13 @@
import dataclasses
import logging
from copy import copy
from typing import List

import sentry_sdk

from shared.reports.resources import Report, ReportFile
from shared.reports.types import EMPTY

log = logging.getLogger(__name__)


class EditableReportFile(ReportFile):
__slots__ = ("_details",)

@classmethod
def from_ReportFile(cls, report_file: ReportFile):
name = report_file.name
editable_file = cls(name)
editable_file._totals = report_file._totals
editable_file._lines = report_file._lines
editable_file._ignore = report_file._ignore
editable_file._details = report_file._details
editable_file.fix_details()
return editable_file

def fix_details(self):
if self._details is None:
self._details = {}
if self._details.get("present_sessions") is not None:
self._details["present_sessions"] = set(
self._details.get("present_sessions")
)

def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.fix_details()

@property
def details(self):
if not self._details:
return self._details
if self._details.get("present_sessions") is None:
return self._details
res = copy(self._details)
res["present_sessions"] = sorted(self._details.get("present_sessions"))
return res

def delete_labels(
self, session_ids_to_delete: List[int], label_ids_to_delete: List[int]
):
"""Given a list of session_ids and label_ids to delete
Remove all datapoints that belong to at least 1 session_ids to delete and include
at least 1 of the label_ids to be removed
"""
for index, line in self.lines:
if line.datapoints is not None:
if any(
(
dp.sessionid in session_ids_to_delete
and label_id in label_ids_to_delete
)
for dp in line.datapoints
for label_id in dp.label_ids
):
# Line fits change requirements
new_line = self.line_without_labels(
line, session_ids_to_delete, label_ids_to_delete
)
if new_line == EMPTY:
del self[index]
else:
self[index] = new_line
self._totals = None
self.calculate_present_sessions()

def calculate_present_sessions(self):
all_sessions = set()
for _, line in self.lines:
all_sessions.update(int(s.id) for s in line.sessions)
self._details["present_sessions"] = all_sessions

def merge(self, *args, **kwargs):
res = super().merge(*args, **kwargs)
self.calculate_present_sessions()
return res

def delete_multiple_sessions(self, session_ids_to_delete: set[int]):
if "present_sessions" not in self._details:
self.calculate_present_sessions()
current_sessions = self._details["present_sessions"]

new_sessions = current_sessions.difference(session_ids_to_delete)
if current_sessions == new_sessions:
return # nothing to do

self._details["present_sessions"] = new_sessions
self._totals = None # force a refresh of the on-demand totals

if not new_sessions:
self._lines = [] # no remaining sessions means no line data
return

for index, line in self.lines:
if any(s.id in session_ids_to_delete for s in line.sessions):
new_line = self.line_without_multiple_sessions(
line, session_ids_to_delete
)
if new_line == EMPTY:
del self[index]
else:
self[index] = new_line
EditableReportFile = ReportFile # re-export


class EditableReport(Report):
@@ -124,9 +21,7 @@ def merge(self, new_report, joined=True):
super().merge(new_report, joined)
for file in self:
if isinstance(file, ReportFile):
self._chunks[self._files.get(file.name).file_index] = (
EditableReportFile.from_ReportFile(file)
)
self._chunks[self._files.get(file.name).file_index] = file

def turn_chunks_into_reports(self):
filename_mapping = {
@@ -140,7 +35,7 @@ def turn_chunks_into_reports(self):
if chunk is not None and file_summary is not None:
if isinstance(chunk, ReportFile):
chunk = chunk._lines
report_file = self.file_class(
report_file = ReportFile(
name=filename,
totals=file_summary.file_totals,
lines=chunk,
@@ -216,4 +111,5 @@ def change_sessionid(self, old_id: int, new_id: int):
if point.sessionid == old_id:
point.sessionid = new_id

report_file._details["present_sessions"] = all_sessions
report_file._invalidate_caches()
report_file.__present_sessions = all_sessions
152 changes: 119 additions & 33 deletions shared/reports/resources.py
Original file line number Diff line number Diff line change
@@ -62,13 +62,14 @@ class ReportFile(object):
"_lines",
"_ignore",
"_totals",
"__present_sessions",
]

def __init__(
self,
name,
totals=None,
lines=None,
name: str,
totals: ReportTotals | list | None = None,
lines: list[None | str | ReportLine] | str | None = None,
ignore=None,
):
"""
@@ -82,26 +83,69 @@ def __init__(
{eof:N, lines:[1,10]}
"""
self.name = name
self._details: dict[str, Any] = {}

# lines = [<details dict()>, <Line #1>, ....]
self._lines: list[None | str | ReportLine] = []
if lines:
if isinstance(lines, list):
self._details = None
self._lines = lines

else:
lines = lines.splitlines()
self._details = orjson.loads(lines.pop(0) or "null")
if detailsline := lines.pop(0):
self._details = orjson.loads(detailsline) or {}
self._lines = lines
else:
self._details = {}
self._lines = []

self._ignore = _ignore_to_func(ignore) if ignore else None

# The `_totals` and `__present_sessions` fields are cached values for the
# `totals` and `_present_sessions` properties respectively.
# The values are loaded at initialization time, or calculated from line data on-demand.
# All mutating methods (like `append`, `merge`, etc) will either re-calculate these values
# directly, or clear them so the `@property` accessors re-calculate them when needed.

self._totals: ReportTotals | None = None
if isinstance(totals, ReportTotals):
self._totals = totals
else:
self._totals = ReportTotals(*totals) if totals else None
elif totals:
self._totals = ReportTotals(*totals)

self.__present_sessions: set[int] | None = None
if present_sessions := self._details.get("present_sessions"):
self.__present_sessions = set(present_sessions)

def _invalidate_caches(self):
self._totals = None
self.__present_sessions = None

@property
def _present_sessions(self):
if self.__present_sessions is None:
self.__present_sessions = set()
for _, line in self.lines:
self.__present_sessions.update(int(s.id) for s in line.sessions)
return self.__present_sessions

@property
def details(self):
self._details["present_sessions"] = sorted(self._present_sessions)
return self._details

@property
def totals(self):
if not self._totals:
self._totals = self._process_totals()
return self._totals

def _process_totals(self) -> ReportTotals:
return get_line_totals(line for _ln, line in self.lines)

def _encode(self) -> str:
details = orjson.dumps(self.details, option=orjson_option)
return (
details + b"\n" + b"\n".join(_dumps_not_none(line) for line in self._lines)
).decode()

def __repr__(self):
try:
@@ -176,6 +220,7 @@ def __setitem__(self, ln, line):
self._lines.extend([EMPTY] * (ln - length))

self._lines[ln - 1] = line
self._invalidate_caches()
return

def __delitem__(self, ln: int):
@@ -190,11 +235,12 @@ def __delitem__(self, ln: int):
self._lines.extend([EMPTY] * (ln - length))

self._lines[ln - 1] = EMPTY
self._invalidate_caches()
return

def __len__(self):
"""Returns count(number of lines with coverage data)"""
return len([_f for _f in self._lines if _f])
return sum(1 for _f in self._lines if _f)

@property
def eof(self):
@@ -268,6 +314,8 @@ def append(self, ln, line):
self._lines[ln - 1] = merge_line(_line, line)
else:
self._lines[ln - 1] = line

self._invalidate_caches()
return True

def merge(self, other_file, joined=True):
@@ -316,28 +364,9 @@ def merge(self, other_file, joined=True):
for before, after in zip_longest(self, other_file)
]

self._totals = None
self._invalidate_caches()
return True

@property
def details(self):
return self._details

def _encode(self) -> str:
details = orjson.dumps(self.details, option=orjson_option)
return (
details + b"\n" + b"\n".join(_dumps_not_none(line) for line in self._lines)
).decode()

@property
def totals(self):
if not self._totals:
self._totals = self._process_totals()
return self._totals

def _process_totals(self) -> ReportTotals:
return get_line_totals(line for _ln, line in self.lines)

def does_diff_adjust_tracked_lines(self, diff, future_file):
for segment in diff["segments"]:
# loop through each line
@@ -385,10 +414,11 @@ def shift_lines_by_diff(self, diff, forward=True) -> None:
except (ValueError, KeyError, TypeError, IndexError):
log.exception("Failed to shift lines by diff")
pass
self._invalidate_caches()

@classmethod
def line_without_labels(
cls, line, session_ids_to_delete: list[int], label_ids_to_delete: list[int]
cls, line, session_ids_to_delete: set[int], label_ids_to_delete: set[int]
):
new_datapoints = (
[
@@ -401,7 +431,7 @@ def line_without_labels(
else None
)
remaining_session_ids = set(dp.sessionid for dp in new_datapoints)
removed_session_ids = set(session_ids_to_delete) - remaining_session_ids
removed_session_ids = session_ids_to_delete - remaining_session_ids
if set(s.id for s in line.sessions) & removed_session_ids:
new_sessions = [s for s in line.sessions if s.id not in removed_session_ids]
else:
@@ -424,6 +454,38 @@ def line_without_labels(
sessions=new_sessions,
)

def delete_labels(
self,
session_ids_to_delete: list[int] | set[int],
label_ids_to_delete: list[int] | set[int],
):
"""
Given a list of session_ids and label_ids to delete, remove all datapoints
that belong to at least 1 session_ids to delete and include at least 1 of the label_ids to be removed.
"""
session_ids_to_delete = set(session_ids_to_delete)
label_ids_to_delete = set(label_ids_to_delete)
for index, line in self.lines:
if line.datapoints is not None:
if any(
(
dp.sessionid in session_ids_to_delete
and label_id in label_ids_to_delete
)
for dp in line.datapoints
for label_id in dp.label_ids
):
# Line fits change requirements
new_line = self.line_without_labels(
line, session_ids_to_delete, label_ids_to_delete
)
if new_line == EMPTY:
del self[index]
else:
self[index] = new_line

self._invalidate_caches()

@classmethod
def line_without_multiple_sessions(
cls, line: ReportLine, session_ids_to_delete: set[int]
@@ -446,6 +508,30 @@ def line_without_multiple_sessions(
datapoints=new_datapoints,
)

def delete_multiple_sessions(self, session_ids_to_delete: set[int]):
current_sessions = self._present_sessions
new_sessions = current_sessions.difference(session_ids_to_delete)
if current_sessions == new_sessions:
return # nothing to do

self._invalidate_caches()

if not new_sessions:
self._lines = [] # no remaining sessions means no line data
return

for index, line in self.lines:
if any(s.id in session_ids_to_delete for s in line.sessions):
new_line = self.line_without_multiple_sessions(
line, session_ids_to_delete
)
if new_line == EMPTY:
del self[index]
else:
self[index] = new_line

self.__present_sessions = current_sessions


def chunks_from_storage_contains_header(chunks: str) -> bool:
try:

0 comments on commit 676f04d

Please sign in to comment.