Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merge EditableReport into Report #549

Merged
merged 2 commits into from
Mar 3, 2025
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
116 changes: 3 additions & 113 deletions shared/reports/editable.py
Original file line number Diff line number Diff line change
@@ -1,115 +1,5 @@
import dataclasses
import logging

import sentry_sdk

from shared.reports.resources import Report, ReportFile

log = logging.getLogger(__name__)

EditableReportFile = ReportFile # re-export


class EditableReport(Report):
file_class = EditableReportFile

def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.turn_chunks_into_reports()

def merge(self, new_report, joined=True):
super().merge(new_report, joined)
for file in self:
if isinstance(file, ReportFile):
self._chunks[self._files.get(file.name).file_index] = file

def turn_chunks_into_reports(self):
filename_mapping = {
file_summary.file_index: filename
for filename, file_summary in self._files.items()
}
for chunk_index in range(len(self._chunks)):
filename = filename_mapping.get(chunk_index)
file_summary = self._files.get(filename)
chunk = self._chunks[chunk_index]
if chunk is not None and file_summary is not None:
if isinstance(chunk, ReportFile):
chunk = chunk._lines
report_file = ReportFile(
name=filename,
totals=file_summary.file_totals,
lines=chunk,
)
self._chunks[chunk_index] = report_file
else:
self._chunks[chunk_index] = None

def delete_labels(self, sessionids, labels_to_delete):
self._totals = None
for file in self._chunks:
if file is not None:
file.delete_labels(sessionids, labels_to_delete)
if file:
self._files[file.name] = dataclasses.replace(
self._files[file.name],
file_totals=file.totals,
)
else:
del self[file.name]
return sessionids

def delete_multiple_sessions(self, session_ids_to_delete: list[int] | set[int]):
session_ids_to_delete = set(session_ids_to_delete)
self._totals = None
for sessionid in session_ids_to_delete:
self.sessions.pop(sessionid)

for file in self._chunks:
if file is not None:
file.delete_multiple_sessions(session_ids_to_delete)
if file:
self._files[file.name] = dataclasses.replace(
self._files[file.name],
file_totals=file.totals,
)
else:
del self[file.name]

@sentry_sdk.trace
def change_sessionid(self, old_id: int, new_id: int):
"""
This changes the session with `old_id` to have `new_id` instead.
It patches up all the references to that session across all files and line records.

In particular, it changes the id in all the `LineSession`s and `CoverageDatapoint`s,
and does the equivalent of `calculate_present_sessions`.
"""
session = self.sessions[new_id] = self.sessions.pop(old_id)
session.id = new_id

report_file: EditableReportFile
for report_file in self._chunks:
if report_file is None:
continue

all_sessions = set()

for idx, _line in enumerate(report_file._lines):
if not _line:
continue

# this turns the line into an actual `ReportLine`
line = report_file._lines[idx] = report_file._line(_line)

for session in line.sessions:
if session.id == old_id:
session.id = new_id
all_sessions.add(session.id)

if line.datapoints:
for point in line.datapoints:
if point.sessionid == old_id:
point.sessionid = new_id

report_file._invalidate_caches()
report_file.__present_sessions = all_sessions
# re-export to avoid having to patch the whole world:
EditableReportFile = ReportFile
EditableReport = Report
145 changes: 116 additions & 29 deletions shared/reports/resources.py
Original file line number Diff line number Diff line change
@@ -5,7 +5,7 @@
from fractions import Fraction
from itertools import filterfalse, zip_longest
from types import GeneratorType
from typing import Any, cast
from typing import Any, Generator, cast

import orjson
import sentry_sdk
@@ -590,7 +590,6 @@


class Report(object):
file_class = ReportFile
_files: dict[str, ReportFileSummary]
_header: ReportHeader

@@ -609,22 +608,57 @@
self.sessions = get_sessions(sessions) if sessions else {}

# ["<json>", ...]
self._chunks: list[str | ReportFile]
self._chunks, self._header = (
parse_chunks(chunks)
if chunks and isinstance(chunks, str)
else (chunks or [], ReportHeader())
)

# <ReportTotals>
self._totals: ReportTotals | None = None
if isinstance(totals, ReportTotals):
self._totals = totals
elif totals:
self._totals = ReportTotals(*migrate_totals(totals))
else:
self._totals = None

self.diff_totals = diff_totals

def _invalidate_caches(self):
self._totals = None

@property
def totals(self):
if not self._totals:
self._totals = self._process_totals()
return self._totals

def _process_totals(self):
"""Runs through the file network to aggregate totals
returns <ReportTotals>
"""

def _iter_totals():
for filename, data in self._files.items():
if data.file_totals is None:
yield self.get(filename).totals

Check warning on line 644 in shared/reports/resources.py

Codecov Notifications / codecov/patch

shared/reports/resources.py#L644

Added line #L644 was not covered by tests
else:
yield data.file_totals

totals = agg_totals(_iter_totals())
totals.sessions = len(self.sessions)
return totals

def _iter_parsed_files(self) -> Generator[ReportFile, None, None]:
for name, summary in self._files.items():
idx = summary.file_index
file = self._chunks[idx]
if not isinstance(file, ReportFile):
file = self._chunks[idx] = ReportFile(
name=name, totals=summary.file_totals, lines=file
)
yield file

@property
def header(self) -> ReportHeader:
return self._header
@@ -787,7 +821,7 @@
lines = None
if isinstance(lines, ReportFile):
return lines
report_file = self.file_class(
report_file = ReportFile(
name=filename,
totals=_file.file_totals,
lines=lines,
@@ -857,29 +891,6 @@
else:
return ReportTotals(*totals)

@property
def totals(self):
if not self._totals:
# reprocess totals
self._totals = self._process_totals()
return self._totals

def _process_totals(self):
"""Runs through the file network to aggregate totals
returns <ReportTotals>
"""

def _iter_totals():
for filename, data in self._files.items():
if data.file_totals is None:
yield self.get(filename).totals
else:
yield data.file_totals

totals = agg_totals(_iter_totals())
totals.sessions = len(self.sessions)
return totals

def next_session_number(self):
start_number = len(self.sessions)
while start_number in self.sessions or str(start_number) in self.sessions:
@@ -912,7 +923,7 @@
if isinstance(report, ReportFile):
yield report
else:
yield self.file_class(
yield ReportFile(
name=filename,
totals=_file.file_totals,
lines=report,
@@ -1230,6 +1241,82 @@
return False
return True

def delete_labels(
self, sessionids: list[int] | set[int], labels_to_delete: list[int] | set[int]
):
files_to_delete = []
for file in self._iter_parsed_files():
file.delete_labels(sessionids, labels_to_delete)
if file:
self._files[file.name] = dataclasses.replace(
self._files[file.name],
file_totals=file.totals,
)
else:
files_to_delete.append(file.name)
for file in files_to_delete:
del self[file]

self._invalidate_caches()
return sessionids

def delete_multiple_sessions(self, session_ids_to_delete: list[int] | set[int]):
session_ids_to_delete = set(session_ids_to_delete)
for sessionid in session_ids_to_delete:
self.sessions.pop(sessionid)

files_to_delete = []
for file in self._iter_parsed_files():
file.delete_multiple_sessions(session_ids_to_delete)
if file:
self._files[file.name] = dataclasses.replace(
self._files[file.name],
file_totals=file.totals,
)
else:
files_to_delete.append(file.name)
for file in files_to_delete:
del self[file]

self._invalidate_caches()

@sentry_sdk.trace
def change_sessionid(self, old_id: int, new_id: int):
"""
This changes the session with `old_id` to have `new_id` instead.
It patches up all the references to that session across all files and line records.

In particular, it changes the id in all the `LineSession`s and `CoverageDatapoint`s,
and does the equivalent of `calculate_present_sessions`.
"""
session = self.sessions[new_id] = self.sessions.pop(old_id)
session.id = new_id

for file in self._iter_parsed_files():
all_sessions = set()

for idx, _line in enumerate(file._lines):
if not _line:
continue

Check warning on line 1300 in shared/reports/resources.py

Codecov Notifications / codecov/patch

shared/reports/resources.py#L1300

Added line #L1300 was not covered by tests

# this turns the line into an actual `ReportLine`
line = file._lines[idx] = file._line(_line)

for session in line.sessions:
if session.id == old_id:
session.id = new_id
all_sessions.add(session.id)

if line.datapoints:
for point in line.datapoints:
if point.sessionid == old_id:
point.sessionid = new_id

file._invalidate_caches()
file.__present_sessions = all_sessions

self._invalidate_caches()


def _ignore_to_func(ignore):
"""Returns a function to determine whether a a line should be saved to the ReportFile
23 changes: 8 additions & 15 deletions tests/benchmarks/test_report.py
Original file line number Diff line number Diff line change
@@ -3,7 +3,6 @@
import zstandard as zstd

from shared.reports.carryforward import generate_carryforward_report
from shared.reports.editable import EditableReport
from shared.reports.readonly import ReadOnlyReport
from shared.reports.resources import Report
from shared.torngit.base import TorngitBaseAdapter
@@ -21,11 +20,8 @@ def read_fixture(name: str) -> bytes:
pytest.param(Report, False, id="Report"),
pytest.param(ReadOnlyReport, False, id="ReadOnlyReport"),
pytest.param(ReadOnlyReport, True, id="Rust ReadOnlyReport"),
pytest.param(EditableReport, False, id="EditableReport"),
]

EDITABLE_VARIANTS = [Report, EditableReport]


def init_mocks(mocker, should_load_rust) -> tuple[bytes, bytes]:
mocker.patch(
@@ -145,7 +141,7 @@ def test_report_diff_calculation(mocker, do_filter, benchmark):
raw_chunks, raw_report_json = init_mocks(mocker, False)
diff = load_diff()

report = do_parse(Report, raw_report_json, raw_chunks)
report = do_full_parse(Report, raw_report_json, raw_chunks)
if do_filter:
report = report.filter(paths=[".*"], flags=["unit"])

@@ -155,11 +151,10 @@ def bench_fn():
benchmark(bench_fn)


@pytest.mark.parametrize("report_class", EDITABLE_VARIANTS)
def test_report_serialize(report_class, mocker, benchmark):
def test_report_serialize(mocker, benchmark):
raw_chunks, raw_report_json = init_mocks(mocker, False)

report = do_parse(report_class, raw_report_json, raw_chunks)
report = do_parse(Report, raw_report_json, raw_chunks)

def bench_fn():
report.to_database()
@@ -168,24 +163,22 @@ def bench_fn():
benchmark(bench_fn)


@pytest.mark.parametrize("report_class", EDITABLE_VARIANTS)
def test_report_merge(report_class, mocker, benchmark):
def test_report_merge(mocker, benchmark):
raw_chunks, raw_report_json = init_mocks(mocker, False)

report1 = do_parse(report_class, raw_report_json, raw_chunks)
report2 = do_parse(report_class, raw_report_json, raw_chunks)
report2 = do_full_parse(Report, raw_report_json, raw_chunks)

def bench_fn():
report1 = do_parse(Report, raw_report_json, raw_chunks)
report1.merge(report2)

benchmark(bench_fn)


@pytest.mark.parametrize("report_class", EDITABLE_VARIANTS)
def test_report_carryforward(report_class, mocker, benchmark):
def test_report_carryforward(mocker, benchmark):
raw_chunks, raw_report_json = init_mocks(mocker, False)

report = do_parse(report_class, raw_report_json, raw_chunks)
report = do_full_parse(Report, raw_report_json, raw_chunks)

def bench_fn():
generate_carryforward_report(report, paths=[".*"], flags=["unit"])
Loading
Oops, something went wrong.