Skip to content
This repository was archived by the owner on May 5, 2025. It is now read-only.

Commit 676f04d

Browse files
committed
Merge EditableReportFile into ReportFile
The main purpose of the `EditableReportFile` was to offer methods to delete labels/sessions, as well as to properly maintain uptodate versions of `present_sessions` and `totals` when doing mutations on the file. All the methods were now moved to `ReportFile`, which has also gained new functionality related to making sure that caches are cleared when necessary, and re-computed on-demand, so that they are always correct. --- I plan to do a similar thing for `EditableReport` as well in a followup, as it has a similar purpose in relation to maintaining uptodate `totals`.
1 parent fd58134 commit 676f04d

File tree

2 files changed

+124
-142
lines changed

2 files changed

+124
-142
lines changed

shared/reports/editable.py

Lines changed: 5 additions & 109 deletions
Original file line numberDiff line numberDiff line change
@@ -1,116 +1,13 @@
11
import dataclasses
22
import logging
3-
from copy import copy
4-
from typing import List
53

64
import sentry_sdk
75

86
from shared.reports.resources import Report, ReportFile
9-
from shared.reports.types import EMPTY
107

118
log = logging.getLogger(__name__)
129

13-
14-
class EditableReportFile(ReportFile):
15-
__slots__ = ("_details",)
16-
17-
@classmethod
18-
def from_ReportFile(cls, report_file: ReportFile):
19-
name = report_file.name
20-
editable_file = cls(name)
21-
editable_file._totals = report_file._totals
22-
editable_file._lines = report_file._lines
23-
editable_file._ignore = report_file._ignore
24-
editable_file._details = report_file._details
25-
editable_file.fix_details()
26-
return editable_file
27-
28-
def fix_details(self):
29-
if self._details is None:
30-
self._details = {}
31-
if self._details.get("present_sessions") is not None:
32-
self._details["present_sessions"] = set(
33-
self._details.get("present_sessions")
34-
)
35-
36-
def __init__(self, *args, **kwargs):
37-
super().__init__(*args, **kwargs)
38-
self.fix_details()
39-
40-
@property
41-
def details(self):
42-
if not self._details:
43-
return self._details
44-
if self._details.get("present_sessions") is None:
45-
return self._details
46-
res = copy(self._details)
47-
res["present_sessions"] = sorted(self._details.get("present_sessions"))
48-
return res
49-
50-
def delete_labels(
51-
self, session_ids_to_delete: List[int], label_ids_to_delete: List[int]
52-
):
53-
"""Given a list of session_ids and label_ids to delete
54-
Remove all datapoints that belong to at least 1 session_ids to delete and include
55-
at least 1 of the label_ids to be removed
56-
"""
57-
for index, line in self.lines:
58-
if line.datapoints is not None:
59-
if any(
60-
(
61-
dp.sessionid in session_ids_to_delete
62-
and label_id in label_ids_to_delete
63-
)
64-
for dp in line.datapoints
65-
for label_id in dp.label_ids
66-
):
67-
# Line fits change requirements
68-
new_line = self.line_without_labels(
69-
line, session_ids_to_delete, label_ids_to_delete
70-
)
71-
if new_line == EMPTY:
72-
del self[index]
73-
else:
74-
self[index] = new_line
75-
self._totals = None
76-
self.calculate_present_sessions()
77-
78-
def calculate_present_sessions(self):
79-
all_sessions = set()
80-
for _, line in self.lines:
81-
all_sessions.update(int(s.id) for s in line.sessions)
82-
self._details["present_sessions"] = all_sessions
83-
84-
def merge(self, *args, **kwargs):
85-
res = super().merge(*args, **kwargs)
86-
self.calculate_present_sessions()
87-
return res
88-
89-
def delete_multiple_sessions(self, session_ids_to_delete: set[int]):
90-
if "present_sessions" not in self._details:
91-
self.calculate_present_sessions()
92-
current_sessions = self._details["present_sessions"]
93-
94-
new_sessions = current_sessions.difference(session_ids_to_delete)
95-
if current_sessions == new_sessions:
96-
return # nothing to do
97-
98-
self._details["present_sessions"] = new_sessions
99-
self._totals = None # force a refresh of the on-demand totals
100-
101-
if not new_sessions:
102-
self._lines = [] # no remaining sessions means no line data
103-
return
104-
105-
for index, line in self.lines:
106-
if any(s.id in session_ids_to_delete for s in line.sessions):
107-
new_line = self.line_without_multiple_sessions(
108-
line, session_ids_to_delete
109-
)
110-
if new_line == EMPTY:
111-
del self[index]
112-
else:
113-
self[index] = new_line
10+
EditableReportFile = ReportFile # re-export
11411

11512

11613
class EditableReport(Report):
@@ -124,9 +21,7 @@ def merge(self, new_report, joined=True):
12421
super().merge(new_report, joined)
12522
for file in self:
12623
if isinstance(file, ReportFile):
127-
self._chunks[self._files.get(file.name).file_index] = (
128-
EditableReportFile.from_ReportFile(file)
129-
)
24+
self._chunks[self._files.get(file.name).file_index] = file
13025

13126
def turn_chunks_into_reports(self):
13227
filename_mapping = {
@@ -140,7 +35,7 @@ def turn_chunks_into_reports(self):
14035
if chunk is not None and file_summary is not None:
14136
if isinstance(chunk, ReportFile):
14237
chunk = chunk._lines
143-
report_file = self.file_class(
38+
report_file = ReportFile(
14439
name=filename,
14540
totals=file_summary.file_totals,
14641
lines=chunk,
@@ -216,4 +111,5 @@ def change_sessionid(self, old_id: int, new_id: int):
216111
if point.sessionid == old_id:
217112
point.sessionid = new_id
218113

219-
report_file._details["present_sessions"] = all_sessions
114+
report_file._invalidate_caches()
115+
report_file.__present_sessions = all_sessions

shared/reports/resources.py

Lines changed: 119 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -62,13 +62,14 @@ class ReportFile(object):
6262
"_lines",
6363
"_ignore",
6464
"_totals",
65+
"__present_sessions",
6566
]
6667

6768
def __init__(
6869
self,
69-
name,
70-
totals=None,
71-
lines=None,
70+
name: str,
71+
totals: ReportTotals | list | None = None,
72+
lines: list[None | str | ReportLine] | str | None = None,
7273
ignore=None,
7374
):
7475
"""
@@ -82,26 +83,69 @@ def __init__(
8283
{eof:N, lines:[1,10]}
8384
"""
8485
self.name = name
86+
self._details: dict[str, Any] = {}
87+
8588
# lines = [<details dict()>, <Line #1>, ....]
89+
self._lines: list[None | str | ReportLine] = []
8690
if lines:
8791
if isinstance(lines, list):
88-
self._details = None
8992
self._lines = lines
9093

9194
else:
9295
lines = lines.splitlines()
93-
self._details = orjson.loads(lines.pop(0) or "null")
96+
if detailsline := lines.pop(0):
97+
self._details = orjson.loads(detailsline) or {}
9498
self._lines = lines
95-
else:
96-
self._details = {}
97-
self._lines = []
9899

99100
self._ignore = _ignore_to_func(ignore) if ignore else None
100101

102+
# The `_totals` and `__present_sessions` fields are cached values for the
103+
# `totals` and `_present_sessions` properties respectively.
104+
# The values are loaded at initialization time, or calculated from line data on-demand.
105+
# All mutating methods (like `append`, `merge`, etc) will either re-calculate these values
106+
# directly, or clear them so the `@property` accessors re-calculate them when needed.
107+
108+
self._totals: ReportTotals | None = None
101109
if isinstance(totals, ReportTotals):
102110
self._totals = totals
103-
else:
104-
self._totals = ReportTotals(*totals) if totals else None
111+
elif totals:
112+
self._totals = ReportTotals(*totals)
113+
114+
self.__present_sessions: set[int] | None = None
115+
if present_sessions := self._details.get("present_sessions"):
116+
self.__present_sessions = set(present_sessions)
117+
118+
def _invalidate_caches(self):
119+
self._totals = None
120+
self.__present_sessions = None
121+
122+
@property
123+
def _present_sessions(self):
124+
if self.__present_sessions is None:
125+
self.__present_sessions = set()
126+
for _, line in self.lines:
127+
self.__present_sessions.update(int(s.id) for s in line.sessions)
128+
return self.__present_sessions
129+
130+
@property
131+
def details(self):
132+
self._details["present_sessions"] = sorted(self._present_sessions)
133+
return self._details
134+
135+
@property
136+
def totals(self):
137+
if not self._totals:
138+
self._totals = self._process_totals()
139+
return self._totals
140+
141+
def _process_totals(self) -> ReportTotals:
142+
return get_line_totals(line for _ln, line in self.lines)
143+
144+
def _encode(self) -> str:
145+
details = orjson.dumps(self.details, option=orjson_option)
146+
return (
147+
details + b"\n" + b"\n".join(_dumps_not_none(line) for line in self._lines)
148+
).decode()
105149

106150
def __repr__(self):
107151
try:
@@ -176,6 +220,7 @@ def __setitem__(self, ln, line):
176220
self._lines.extend([EMPTY] * (ln - length))
177221

178222
self._lines[ln - 1] = line
223+
self._invalidate_caches()
179224
return
180225

181226
def __delitem__(self, ln: int):
@@ -190,11 +235,12 @@ def __delitem__(self, ln: int):
190235
self._lines.extend([EMPTY] * (ln - length))
191236

192237
self._lines[ln - 1] = EMPTY
238+
self._invalidate_caches()
193239
return
194240

195241
def __len__(self):
196242
"""Returns count(number of lines with coverage data)"""
197-
return len([_f for _f in self._lines if _f])
243+
return sum(1 for _f in self._lines if _f)
198244

199245
@property
200246
def eof(self):
@@ -268,6 +314,8 @@ def append(self, ln, line):
268314
self._lines[ln - 1] = merge_line(_line, line)
269315
else:
270316
self._lines[ln - 1] = line
317+
318+
self._invalidate_caches()
271319
return True
272320

273321
def merge(self, other_file, joined=True):
@@ -316,28 +364,9 @@ def merge(self, other_file, joined=True):
316364
for before, after in zip_longest(self, other_file)
317365
]
318366

319-
self._totals = None
367+
self._invalidate_caches()
320368
return True
321369

322-
@property
323-
def details(self):
324-
return self._details
325-
326-
def _encode(self) -> str:
327-
details = orjson.dumps(self.details, option=orjson_option)
328-
return (
329-
details + b"\n" + b"\n".join(_dumps_not_none(line) for line in self._lines)
330-
).decode()
331-
332-
@property
333-
def totals(self):
334-
if not self._totals:
335-
self._totals = self._process_totals()
336-
return self._totals
337-
338-
def _process_totals(self) -> ReportTotals:
339-
return get_line_totals(line for _ln, line in self.lines)
340-
341370
def does_diff_adjust_tracked_lines(self, diff, future_file):
342371
for segment in diff["segments"]:
343372
# loop through each line
@@ -385,10 +414,11 @@ def shift_lines_by_diff(self, diff, forward=True) -> None:
385414
except (ValueError, KeyError, TypeError, IndexError):
386415
log.exception("Failed to shift lines by diff")
387416
pass
417+
self._invalidate_caches()
388418

389419
@classmethod
390420
def line_without_labels(
391-
cls, line, session_ids_to_delete: list[int], label_ids_to_delete: list[int]
421+
cls, line, session_ids_to_delete: set[int], label_ids_to_delete: set[int]
392422
):
393423
new_datapoints = (
394424
[
@@ -401,7 +431,7 @@ def line_without_labels(
401431
else None
402432
)
403433
remaining_session_ids = set(dp.sessionid for dp in new_datapoints)
404-
removed_session_ids = set(session_ids_to_delete) - remaining_session_ids
434+
removed_session_ids = session_ids_to_delete - remaining_session_ids
405435
if set(s.id for s in line.sessions) & removed_session_ids:
406436
new_sessions = [s for s in line.sessions if s.id not in removed_session_ids]
407437
else:
@@ -424,6 +454,38 @@ def line_without_labels(
424454
sessions=new_sessions,
425455
)
426456

457+
def delete_labels(
458+
self,
459+
session_ids_to_delete: list[int] | set[int],
460+
label_ids_to_delete: list[int] | set[int],
461+
):
462+
"""
463+
Given a list of session_ids and label_ids to delete, remove all datapoints
464+
that belong to at least 1 session_ids to delete and include at least 1 of the label_ids to be removed.
465+
"""
466+
session_ids_to_delete = set(session_ids_to_delete)
467+
label_ids_to_delete = set(label_ids_to_delete)
468+
for index, line in self.lines:
469+
if line.datapoints is not None:
470+
if any(
471+
(
472+
dp.sessionid in session_ids_to_delete
473+
and label_id in label_ids_to_delete
474+
)
475+
for dp in line.datapoints
476+
for label_id in dp.label_ids
477+
):
478+
# Line fits change requirements
479+
new_line = self.line_without_labels(
480+
line, session_ids_to_delete, label_ids_to_delete
481+
)
482+
if new_line == EMPTY:
483+
del self[index]
484+
else:
485+
self[index] = new_line
486+
487+
self._invalidate_caches()
488+
427489
@classmethod
428490
def line_without_multiple_sessions(
429491
cls, line: ReportLine, session_ids_to_delete: set[int]
@@ -446,6 +508,30 @@ def line_without_multiple_sessions(
446508
datapoints=new_datapoints,
447509
)
448510

511+
def delete_multiple_sessions(self, session_ids_to_delete: set[int]):
512+
current_sessions = self._present_sessions
513+
new_sessions = current_sessions.difference(session_ids_to_delete)
514+
if current_sessions == new_sessions:
515+
return # nothing to do
516+
517+
self._invalidate_caches()
518+
519+
if not new_sessions:
520+
self._lines = [] # no remaining sessions means no line data
521+
return
522+
523+
for index, line in self.lines:
524+
if any(s.id in session_ids_to_delete for s in line.sessions):
525+
new_line = self.line_without_multiple_sessions(
526+
line, session_ids_to_delete
527+
)
528+
if new_line == EMPTY:
529+
del self[index]
530+
else:
531+
self[index] = new_line
532+
533+
self.__present_sessions = current_sessions
534+
449535

450536
def chunks_from_storage_contains_header(chunks: str) -> bool:
451537
try:

0 commit comments

Comments
 (0)