Skip to content

Commit

Permalink
Merge EditableReport into Report
Browse files Browse the repository at this point in the history
The primary usecase for `EditableReport` was to always keep a list of "partially parsed" `ReportFile`s around, as well as making sure that `totals` (and to some extent `file_totals`) are uptodate.

This now moves all the methods from `EditableReport` onto `Report`, and introduces a new method to iterate over "partially parsed" `ReportFile`s, making sure those are being kept around as well.
  • Loading branch information
Swatinem committed Feb 27, 2025
1 parent 676f04d commit 3b3bf9c
Showing 3 changed files with 114 additions and 145 deletions.
116 changes: 3 additions & 113 deletions shared/reports/editable.py
Original file line number Diff line number Diff line change
@@ -1,115 +1,5 @@
import dataclasses
import logging

import sentry_sdk

from shared.reports.resources import Report, ReportFile

log = logging.getLogger(__name__)

EditableReportFile = ReportFile # re-export


class EditableReport(Report):
file_class = EditableReportFile

def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.turn_chunks_into_reports()

def merge(self, new_report, joined=True):
super().merge(new_report, joined)
for file in self:
if isinstance(file, ReportFile):
self._chunks[self._files.get(file.name).file_index] = file

def turn_chunks_into_reports(self):
filename_mapping = {
file_summary.file_index: filename
for filename, file_summary in self._files.items()
}
for chunk_index in range(len(self._chunks)):
filename = filename_mapping.get(chunk_index)
file_summary = self._files.get(filename)
chunk = self._chunks[chunk_index]
if chunk is not None and file_summary is not None:
if isinstance(chunk, ReportFile):
chunk = chunk._lines
report_file = ReportFile(
name=filename,
totals=file_summary.file_totals,
lines=chunk,
)
self._chunks[chunk_index] = report_file
else:
self._chunks[chunk_index] = None

def delete_labels(self, sessionids, labels_to_delete):
self._totals = None
for file in self._chunks:
if file is not None:
file.delete_labels(sessionids, labels_to_delete)
if file:
self._files[file.name] = dataclasses.replace(
self._files[file.name],
file_totals=file.totals,
)
else:
del self[file.name]
return sessionids

def delete_multiple_sessions(self, session_ids_to_delete: list[int] | set[int]):
session_ids_to_delete = set(session_ids_to_delete)
self._totals = None
for sessionid in session_ids_to_delete:
self.sessions.pop(sessionid)

for file in self._chunks:
if file is not None:
file.delete_multiple_sessions(session_ids_to_delete)
if file:
self._files[file.name] = dataclasses.replace(
self._files[file.name],
file_totals=file.totals,
)
else:
del self[file.name]

@sentry_sdk.trace
def change_sessionid(self, old_id: int, new_id: int):
"""
This changes the session with `old_id` to have `new_id` instead.
It patches up all the references to that session across all files and line records.
In particular, it changes the id in all the `LineSession`s and `CoverageDatapoint`s,
and does the equivalent of `calculate_present_sessions`.
"""
session = self.sessions[new_id] = self.sessions.pop(old_id)
session.id = new_id

report_file: EditableReportFile
for report_file in self._chunks:
if report_file is None:
continue

all_sessions = set()

for idx, _line in enumerate(report_file._lines):
if not _line:
continue

# this turns the line into an actual `ReportLine`
line = report_file._lines[idx] = report_file._line(_line)

for session in line.sessions:
if session.id == old_id:
session.id = new_id
all_sessions.add(session.id)

if line.datapoints:
for point in line.datapoints:
if point.sessionid == old_id:
point.sessionid = new_id

report_file._invalidate_caches()
report_file.__present_sessions = all_sessions
# re-export to avoid having to patch the whole world:
EditableReportFile = ReportFile
EditableReport = Report
139 changes: 110 additions & 29 deletions shared/reports/resources.py
Original file line number Diff line number Diff line change
@@ -5,7 +5,7 @@
from fractions import Fraction
from itertools import filterfalse, zip_longest
from types import GeneratorType
from typing import Any, cast
from typing import Any, Generator, cast

import orjson
import sentry_sdk
@@ -599,7 +599,6 @@ def parse_chunks(chunks: str) -> tuple[list[str], ReportHeader]:


class Report(object):
file_class = ReportFile
_files: dict[str, ReportFileSummary]
_header: ReportHeader

@@ -618,22 +617,57 @@ def __init__(
self.sessions = get_sessions(sessions) if sessions else {}

# ["<json>", ...]
self._chunks: list[str | ReportFile]
self._chunks, self._header = (
parse_chunks(chunks)
if chunks and isinstance(chunks, str)
else (chunks or [], ReportHeader())
)

# <ReportTotals>
self._totals: ReportTotals | None = None
if isinstance(totals, ReportTotals):
self._totals = totals
elif totals:
self._totals = ReportTotals(*migrate_totals(totals))
else:
self._totals = None

self.diff_totals = diff_totals

def _invalidate_caches(self):
self._totals = None

@property
def totals(self):
if not self._totals:
self._totals = self._process_totals()
return self._totals

def _process_totals(self):
"""Runs through the file network to aggregate totals
returns <ReportTotals>
"""

def _iter_totals():
for filename, data in self._files.items():
if data.file_totals is None:
yield self.get(filename).totals
else:
yield data.file_totals

totals = agg_totals(_iter_totals())
totals.sessions = len(self.sessions)
return totals

def _iter_parsed_files(self) -> Generator[ReportFile, None, None]:
for name, summary in self._files.items():
idx = summary.file_index
file = self._chunks[idx]
if not isinstance(file, ReportFile):
file = self._chunks[idx] = ReportFile(
name=name, totals=summary.file_totals, lines=file
)
yield file

@property
def header(self) -> ReportHeader:
return self._header
@@ -796,7 +830,7 @@ def get(self, filename, _else=None, bind=False):
lines = None
if isinstance(lines, ReportFile):
return lines
report_file = self.file_class(
report_file = ReportFile(
name=filename,
totals=_file.file_totals,
lines=lines,
@@ -866,29 +900,6 @@ def get_file_totals(self, path: str) -> ReportTotals | None:
else:
return ReportTotals(*totals)

@property
def totals(self):
if not self._totals:
# reprocess totals
self._totals = self._process_totals()
return self._totals

def _process_totals(self):
"""Runs through the file network to aggregate totals
returns <ReportTotals>
"""

def _iter_totals():
for filename, data in self._files.items():
if data.file_totals is None:
yield self.get(filename).totals
else:
yield data.file_totals

totals = agg_totals(_iter_totals())
totals.sessions = len(self.sessions)
return totals

def next_session_number(self):
start_number = len(self.sessions)
while start_number in self.sessions or str(start_number) in self.sessions:
@@ -921,7 +932,7 @@ def __iter__(self):
if isinstance(report, ReportFile):
yield report
else:
yield self.file_class(
yield ReportFile(
name=filename,
totals=_file.file_totals,
lines=report,
@@ -1239,6 +1250,76 @@ def _passes_integrity_analysis(self):
return False
return True

def delete_labels(
self, sessionids: list[int] | set[int], labels_to_delete: list[int] | set[int]
):
for file in self._iter_parsed_files():
file.delete_labels(sessionids, labels_to_delete)
if file:
self._files[file.name] = dataclasses.replace(
self._files[file.name],
file_totals=file.totals,
)
else:
del self[file.name]

self._invalidate_caches()
return sessionids

def delete_multiple_sessions(self, session_ids_to_delete: list[int] | set[int]):
session_ids_to_delete = set(session_ids_to_delete)
for sessionid in session_ids_to_delete:
self.sessions.pop(sessionid)

for file in self._iter_parsed_files():
file.delete_multiple_sessions(session_ids_to_delete)
if file:
self._files[file.name] = dataclasses.replace(
self._files[file.name],
file_totals=file.totals,
)
else:
del self[file.name]

self._invalidate_caches()

@sentry_sdk.trace
def change_sessionid(self, old_id: int, new_id: int):
"""
This changes the session with `old_id` to have `new_id` instead.
It patches up all the references to that session across all files and line records.
In particular, it changes the id in all the `LineSession`s and `CoverageDatapoint`s,
and does the equivalent of `calculate_present_sessions`.
"""
session = self.sessions[new_id] = self.sessions.pop(old_id)
session.id = new_id

for file in self._iter_parsed_files():
all_sessions = set()

for idx, _line in enumerate(file._lines):
if not _line:
continue

# this turns the line into an actual `ReportLine`
line = file._lines[idx] = file._line(_line)

for session in line.sessions:
if session.id == old_id:
session.id = new_id
all_sessions.add(session.id)

if line.datapoints:
for point in line.datapoints:
if point.sessionid == old_id:
point.sessionid = new_id

file._invalidate_caches()
file.__present_sessions = all_sessions

self._invalidate_caches()


def _ignore_to_func(ignore):
"""Returns a function to determine whether a a line should be saved to the ReportFile
4 changes: 1 addition & 3 deletions tests/benchmarks/test_report.py
Original file line number Diff line number Diff line change
@@ -3,7 +3,6 @@
import zstandard as zstd

from shared.reports.carryforward import generate_carryforward_report
from shared.reports.editable import EditableReport
from shared.reports.readonly import ReadOnlyReport
from shared.reports.resources import Report
from shared.torngit.base import TorngitBaseAdapter
@@ -21,10 +20,9 @@ def read_fixture(name: str) -> bytes:
pytest.param(Report, False, id="Report"),
pytest.param(ReadOnlyReport, False, id="ReadOnlyReport"),
pytest.param(ReadOnlyReport, True, id="Rust ReadOnlyReport"),
pytest.param(EditableReport, False, id="EditableReport"),
]

EDITABLE_VARIANTS = [Report, EditableReport]
EDITABLE_VARIANTS = [Report]


def init_mocks(mocker, should_load_rust) -> tuple[bytes, bytes]:

0 comments on commit 3b3bf9c

Please sign in to comment.