Merge EditableReport into Report

Swatinem · Swatinem · commit 3b3bf9cf7422 · 2025-02-27T15:11:38.000+01:00
The primary usecase for `EditableReport` was to always keep a list of "partially parsed" `ReportFile`s around, as well as making sure that `totals` (and to some extent `file_totals`) are uptodate.

This now moves all the methods from `EditableReport` onto `Report`, and introduces a new method to iterate over "partially parsed" `ReportFile`s, making sure those are being kept around as well.
diff --git a/shared/reports/editable.py b/shared/reports/editable.py
@@ -1,115 +1,5 @@
-import dataclasses
-import logging
-
-import sentry_sdk
-
 from shared.reports.resources import Report, ReportFile
 
-log = logging.getLogger(__name__)
-
-EditableReportFile = ReportFile  # re-export
-
-
-class EditableReport(Report):
-    file_class = EditableReportFile
-
-    def __init__(self, *args, **kwargs):
-        super().__init__(*args, **kwargs)
-        self.turn_chunks_into_reports()
-
-    def merge(self, new_report, joined=True):
-        super().merge(new_report, joined)
-        for file in self:
-            if isinstance(file, ReportFile):
-                self._chunks[self._files.get(file.name).file_index] = file
-
-    def turn_chunks_into_reports(self):
-        filename_mapping = {
-            file_summary.file_index: filename
-            for filename, file_summary in self._files.items()
-        }
-        for chunk_index in range(len(self._chunks)):
-            filename = filename_mapping.get(chunk_index)
-            file_summary = self._files.get(filename)
-            chunk = self._chunks[chunk_index]
-            if chunk is not None and file_summary is not None:
-                if isinstance(chunk, ReportFile):
-                    chunk = chunk._lines
-                report_file = ReportFile(
-                    name=filename,
-                    totals=file_summary.file_totals,
-                    lines=chunk,
-                )
-                self._chunks[chunk_index] = report_file
-            else:
-                self._chunks[chunk_index] = None
-
-    def delete_labels(self, sessionids, labels_to_delete):
-        self._totals = None
-        for file in self._chunks:
-            if file is not None:
-                file.delete_labels(sessionids, labels_to_delete)
-                if file:
-                    self._files[file.name] = dataclasses.replace(
-                        self._files[file.name],
-                        file_totals=file.totals,
-                    )
-                else:
-                    del self[file.name]
-        return sessionids
-
-    def delete_multiple_sessions(self, session_ids_to_delete: list[int] | set[int]):
-        session_ids_to_delete = set(session_ids_to_delete)
-        self._totals = None
-        for sessionid in session_ids_to_delete:
-            self.sessions.pop(sessionid)
-
-        for file in self._chunks:
-            if file is not None:
-                file.delete_multiple_sessions(session_ids_to_delete)
-                if file:
-                    self._files[file.name] = dataclasses.replace(
-                        self._files[file.name],
-                        file_totals=file.totals,
-                    )
-                else:
-                    del self[file.name]
-
-    @sentry_sdk.trace
-    def change_sessionid(self, old_id: int, new_id: int):
-        """
-        This changes the session with `old_id` to have `new_id` instead.
-        It patches up all the references to that session across all files and line records.
-
-        In particular, it changes the id in all the `LineSession`s and `CoverageDatapoint`s,
-        and does the equivalent of `calculate_present_sessions`.
-        """
-        session = self.sessions[new_id] = self.sessions.pop(old_id)
-        session.id = new_id
-
-        report_file: EditableReportFile
-        for report_file in self._chunks:
-            if report_file is None:
-                continue
-
-            all_sessions = set()
-
-            for idx, _line in enumerate(report_file._lines):
-                if not _line:
-                    continue
-
-                # this turns the line into an actual `ReportLine`
-                line = report_file._lines[idx] = report_file._line(_line)
-
-                for session in line.sessions:
-                    if session.id == old_id:
-                        session.id = new_id
-                    all_sessions.add(session.id)
-
-                if line.datapoints:
-                    for point in line.datapoints:
-                        if point.sessionid == old_id:
-                            point.sessionid = new_id
-
-            report_file._invalidate_caches()
-            report_file.__present_sessions = all_sessions
+# re-export to avoid having to patch the whole world:
+EditableReportFile = ReportFile
+EditableReport = Report
diff --git a/shared/reports/resources.py b/shared/reports/resources.py
@@ -5,7 +5,7 @@
 from fractions import Fraction
 from itertools import filterfalse, zip_longest
 from types import GeneratorType
-from typing import Any, cast
+from typing import Any, Generator, cast
 
 import orjson
 import sentry_sdk
@@ -599,7 +599,6 @@ def parse_chunks(chunks: str) -> tuple[list[str], ReportHeader]:
 
 
 class Report(object):
-    file_class = ReportFile
     _files: dict[str, ReportFileSummary]
     _header: ReportHeader
 
@@ -618,22 +617,57 @@ def __init__(
         self.sessions = get_sessions(sessions) if sessions else {}
 
         # ["<json>", ...]
+        self._chunks: list[str | ReportFile]
         self._chunks, self._header = (
             parse_chunks(chunks)
             if chunks and isinstance(chunks, str)
             else (chunks or [], ReportHeader())
         )
 
         # <ReportTotals>
+        self._totals: ReportTotals | None = None
         if isinstance(totals, ReportTotals):
             self._totals = totals
         elif totals:
             self._totals = ReportTotals(*migrate_totals(totals))
-        else:
-            self._totals = None
 
         self.diff_totals = diff_totals
 
+    def _invalidate_caches(self):
+        self._totals = None
+
+    @property
+    def totals(self):
+        if not self._totals:
+            self._totals = self._process_totals()
+        return self._totals
+
+    def _process_totals(self):
+        """Runs through the file network to aggregate totals
+        returns <ReportTotals>
+        """
+
+        def _iter_totals():
+            for filename, data in self._files.items():
+                if data.file_totals is None:
+                    yield self.get(filename).totals
+                else:
+                    yield data.file_totals
+
+        totals = agg_totals(_iter_totals())
+        totals.sessions = len(self.sessions)
+        return totals
+
+    def _iter_parsed_files(self) -> Generator[ReportFile, None, None]:
+        for name, summary in self._files.items():
+            idx = summary.file_index
+            file = self._chunks[idx]
+            if not isinstance(file, ReportFile):
+                file = self._chunks[idx] = ReportFile(
+                    name=name, totals=summary.file_totals, lines=file
+                )
+            yield file
+
     @property
     def header(self) -> ReportHeader:
         return self._header
@@ -796,7 +830,7 @@ def get(self, filename, _else=None, bind=False):
                 lines = None
             if isinstance(lines, ReportFile):
                 return lines
-            report_file = self.file_class(
+            report_file = ReportFile(
                 name=filename,
                 totals=_file.file_totals,
                 lines=lines,
@@ -866,29 +900,6 @@ def get_file_totals(self, path: str) -> ReportTotals | None:
         else:
             return ReportTotals(*totals)
 
-    @property
-    def totals(self):
-        if not self._totals:
-            # reprocess totals
-            self._totals = self._process_totals()
-        return self._totals
-
-    def _process_totals(self):
-        """Runs through the file network to aggregate totals
-        returns <ReportTotals>
-        """
-
-        def _iter_totals():
-            for filename, data in self._files.items():
-                if data.file_totals is None:
-                    yield self.get(filename).totals
-                else:
-                    yield data.file_totals
-
-        totals = agg_totals(_iter_totals())
-        totals.sessions = len(self.sessions)
-        return totals
-
     def next_session_number(self):
         start_number = len(self.sessions)
         while start_number in self.sessions or str(start_number) in self.sessions:
@@ -921,7 +932,7 @@ def __iter__(self):
             if isinstance(report, ReportFile):
                 yield report
             else:
-                yield self.file_class(
+                yield ReportFile(
                     name=filename,
                     totals=_file.file_totals,
                     lines=report,
@@ -1239,6 +1250,76 @@ def _passes_integrity_analysis(self):
             return False
         return True
 
+    def delete_labels(
+        self, sessionids: list[int] | set[int], labels_to_delete: list[int] | set[int]
+    ):
+        for file in self._iter_parsed_files():
+            file.delete_labels(sessionids, labels_to_delete)
+            if file:
+                self._files[file.name] = dataclasses.replace(
+                    self._files[file.name],
+                    file_totals=file.totals,
+                )
+            else:
+                del self[file.name]
+
+        self._invalidate_caches()
+        return sessionids
+
+    def delete_multiple_sessions(self, session_ids_to_delete: list[int] | set[int]):
+        session_ids_to_delete = set(session_ids_to_delete)
+        for sessionid in session_ids_to_delete:
+            self.sessions.pop(sessionid)
+
+        for file in self._iter_parsed_files():
+            file.delete_multiple_sessions(session_ids_to_delete)
+            if file:
+                self._files[file.name] = dataclasses.replace(
+                    self._files[file.name],
+                    file_totals=file.totals,
+                )
+            else:
+                del self[file.name]
+
+        self._invalidate_caches()
+
+    @sentry_sdk.trace
+    def change_sessionid(self, old_id: int, new_id: int):
+        """
+        This changes the session with `old_id` to have `new_id` instead.
+        It patches up all the references to that session across all files and line records.
+
+        In particular, it changes the id in all the `LineSession`s and `CoverageDatapoint`s,
+        and does the equivalent of `calculate_present_sessions`.
+        """
+        session = self.sessions[new_id] = self.sessions.pop(old_id)
+        session.id = new_id
+
+        for file in self._iter_parsed_files():
+            all_sessions = set()
+
+            for idx, _line in enumerate(file._lines):
+                if not _line:
+                    continue
+
+                # this turns the line into an actual `ReportLine`
+                line = file._lines[idx] = file._line(_line)
+
+                for session in line.sessions:
+                    if session.id == old_id:
+                        session.id = new_id
+                    all_sessions.add(session.id)
+
+                if line.datapoints:
+                    for point in line.datapoints:
+                        if point.sessionid == old_id:
+                            point.sessionid = new_id
+
+            file._invalidate_caches()
+            file.__present_sessions = all_sessions
+
+        self._invalidate_caches()
+
 
 def _ignore_to_func(ignore):
     """Returns a function to determine whether a a line should be saved to the ReportFile
diff --git a/tests/benchmarks/test_report.py b/tests/benchmarks/test_report.py
@@ -3,7 +3,6 @@
 import zstandard as zstd
 
 from shared.reports.carryforward import generate_carryforward_report
-from shared.reports.editable import EditableReport
 from shared.reports.readonly import ReadOnlyReport
 from shared.reports.resources import Report
 from shared.torngit.base import TorngitBaseAdapter
@@ -21,10 +20,9 @@ def read_fixture(name: str) -> bytes:
     pytest.param(Report, False, id="Report"),
     pytest.param(ReadOnlyReport, False, id="ReadOnlyReport"),
     pytest.param(ReadOnlyReport, True, id="Rust ReadOnlyReport"),
-    pytest.param(EditableReport, False, id="EditableReport"),
 ]
 
-EDITABLE_VARIANTS = [Report, EditableReport]
+EDITABLE_VARIANTS = [Report]
 
 
 def init_mocks(mocker, should_load_rust) -> tuple[bytes, bytes]: