Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Start parsing the chunks file with serde #31

Open
wants to merge 9 commits into
base: main
Choose a base branch
from
Prev Previous commit
Next Next commit
get closer to the existing parser interface dealing with report builders
  • Loading branch information
Swatinem committed Nov 19, 2024
commit 6f8af3896e61a2bc5be6ab1023f50dfedb28f6a2
28 changes: 17 additions & 11 deletions core/benches/pyreport.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use std::{collections::HashMap, hint::black_box};
use std::collections::HashMap;

use codecov_rs::{
parsers::pyreport::{chunks, chunks_serde, report_json},
@@ -130,8 +130,13 @@ fn simple_chunks_serde() {
b"{}\n<<<<< end_of_header >>>>>\n{}\n[1, null, [[0, 1]]]\n\n<<<<< end_of_chunk >>>>>\n{}\n[1, null, [[0, 1]]]\n[1, null, [[0, 1]]]\n",
];

let report_json = report_json::ParsedReportJson {
files: Default::default(),
sessions: Default::default(),
};

for input in chunks {
parse_chunks_file_serde(input)
parse_chunks_file_serde(input, &report_json);
}
}

@@ -142,17 +147,18 @@ fn complex_chunks_serde(bencher: Bencher) {
let chunks =
load_fixture("pyreport/large/worker-c71ddfd4cb1753c7a540e5248c2beaa079fc3341-chunks.txt");

bencher.bench(|| parse_chunks_file_serde(&chunks));
// parsing the chunks depends on having loaded the `report_json`
let report = load_fixture(
"pyreport/large/worker-c71ddfd4cb1753c7a540e5248c2beaa079fc3341-report_json.json",
);
let report_json = parse_report_json(&report);

bencher.bench(|| parse_chunks_file_serde(&chunks, &report_json));
}

fn parse_chunks_file_serde(input: &[u8]) {
let chunks_file = chunks_serde::ChunksFile::new(input).unwrap();
let mut chunks = chunks_file.chunks();
while let Some(mut chunk) = chunks.next_chunk().unwrap() {
while let Some(line) = chunk.next_line().unwrap() {
black_box(line);
}
}
fn parse_chunks_file_serde(input: &[u8], report_json: &report_json::ParsedReportJson) {
let mut report_builder = TestReportBuilder::default();
chunks_serde::parse_chunks_file(input, report_json, &mut report_builder).unwrap();
}

#[track_caller]
5 changes: 5 additions & 0 deletions core/src/error.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
use thiserror::Error;

use crate::parsers::pyreport::chunks_serde::ChunksFileParseError;

pub type Result<T, E = CodecovError> = std::result::Result<T, E>;

#[derive(Error, Debug)]
@@ -26,4 +28,7 @@ pub enum CodecovError {
#[cfg(feature = "pyreport")]
#[error("failed to convert sqlite to pyreport: '{0}'")]
PyreportConversionError(String),

#[error(transparent)]
ChunksFileParseError(#[from] ChunksFileParseError),
}
115 changes: 101 additions & 14 deletions core/src/parsers/pyreport/chunks_serde.rs
Original file line number Diff line number Diff line change
@@ -37,10 +37,84 @@ use std::{collections::HashMap, fmt, mem, sync::OnceLock};
use memchr::{memchr, memmem};
use serde::{de, de::IgnoredAny, Deserialize};

use crate::report::pyreport::{CHUNKS_FILE_END_OF_CHUNK, CHUNKS_FILE_HEADER_TERMINATOR};
use super::report_json::ParsedReportJson;
use crate::{
error::CodecovError,
report::{
models,
pyreport::{
types::{self, PyreportCoverage, ReportLine},
CHUNKS_FILE_END_OF_CHUNK, CHUNKS_FILE_HEADER_TERMINATOR,
},
Report, ReportBuilder,
},
};

pub fn parse_chunks_file<B, R>(
input: &[u8],
_report_json: &ParsedReportJson,
builder: &mut B,
) -> Result<(), CodecovError>
where
B: ReportBuilder<R>,
R: Report,
{
let chunks_file = ChunksFile::new(input)?;

let mut labels_index = HashMap::with_capacity(chunks_file.labels_index().len());
for (index, name) in chunks_file.labels_index() {
let context = builder.insert_context(name)?;
labels_index.insert(index.clone(), context.id);
}

let mut report_lines = vec![];

let mut chunks = chunks_file.chunks();
while let Some(mut chunk) = chunks.next_chunk()? {
let mut line_no = 0;
report_lines.clear();
while let Some(line) = chunk.next_line()? {
line_no += 1;
if let Some(line) = line {
let coverage_type = match line.1.unwrap_or_default() {
CoverageType::Line => models::CoverageType::Line,
CoverageType::Branch => models::CoverageType::Branch,
CoverageType::Method => models::CoverageType::Method,
};
let sessions = line
.2
.into_iter()
.map(|session| types::LineSession {
session_id: session.0,
coverage: session.1.into(),
branches: None, // TODO
partials: None, // TODO
complexity: None, // TODO
})
.collect();

let mut report_line = ReportLine {
line_no,
coverage: line.0.into(),
coverage_type,
sessions,
_messages: None,
_complexity: None,
datapoints: None, // TODO
};
report_line.normalize();
report_lines.push(report_line);
}
}
// TODO:
// utils::save_report_lines()?;
}

Ok(())
}

#[derive(Debug, thiserror::Error)]
pub enum ParserError {
pub enum ChunksFileParseError {
#[error("unexpected EOF")]
UnexpectedEof,
#[error("unexpected input")]
@@ -53,12 +127,12 @@ pub enum ParserError {
InvalidLineRecord(#[source] serde_json::Error),
}

impl PartialEq for ParserError {
impl PartialEq for ChunksFileParseError {
fn eq(&self, other: &Self) -> bool {
core::mem::discriminant(self) == core::mem::discriminant(other)
}
}
impl Eq for ParserError {}
impl Eq for ChunksFileParseError {}

#[derive(Debug)]
pub struct ChunksFile<'d> {
@@ -67,16 +141,16 @@ pub struct ChunksFile<'d> {
}

impl<'d> ChunksFile<'d> {
pub fn new(mut input: &'d [u8]) -> Result<Self, ParserError> {
pub fn new(mut input: &'d [u8]) -> Result<Self, ChunksFileParseError> {
static HEADER_FINDER: OnceLock<memmem::Finder> = OnceLock::new();
let header_finder =
HEADER_FINDER.get_or_init(|| memmem::Finder::new(CHUNKS_FILE_HEADER_TERMINATOR));

let file_header = if let Some(pos) = header_finder.find(input) {
let header_bytes = &input[..pos];
input = &input[pos + header_finder.needle().len()..];
let file_header: FileHeader =
serde_json::from_slice(header_bytes).map_err(ParserError::InvalidFileHeader)?;
let file_header: FileHeader = serde_json::from_slice(header_bytes)
.map_err(ChunksFileParseError::InvalidFileHeader)?;
file_header
} else {
FileHeader::default()
@@ -99,7 +173,7 @@ pub struct Chunks<'d> {
}

impl<'d> Chunks<'d> {
pub fn next_chunk(&mut self) -> Result<Option<Chunk<'d>>, ParserError> {
pub fn next_chunk(&mut self) -> Result<Option<Chunk<'d>>, ChunksFileParseError> {
if self.input.is_empty() {
return Ok(None);
}
@@ -123,9 +197,10 @@ impl<'d> Chunks<'d> {
}));
}

let header_bytes = next_line(&mut chunk_bytes).ok_or(ParserError::UnexpectedInput)?;
let chunk_header: ChunkHeader =
serde_json::from_slice(header_bytes).map_err(ParserError::InvalidFileHeader)?;
let header_bytes =
next_line(&mut chunk_bytes).ok_or(ChunksFileParseError::UnexpectedInput)?;
let chunk_header: ChunkHeader = serde_json::from_slice(header_bytes)
.map_err(ChunksFileParseError::InvalidFileHeader)?;

Ok(Some(Chunk {
chunk_header,
@@ -144,7 +219,7 @@ impl<'d> Chunk<'d> {
&self.chunk_header.present_sessions
}

pub fn next_line(&mut self) -> Result<Option<Option<LineRecord>>, ParserError> {
pub fn next_line(&mut self) -> Result<Option<Option<LineRecord>>, ChunksFileParseError> {
let Some(line) = next_line(&mut self.input) else {
return Ok(None);
};
@@ -154,7 +229,7 @@ impl<'d> Chunk<'d> {
}

let line_record: LineRecord =
serde_json::from_slice(line).map_err(ParserError::InvalidLineRecord)?;
serde_json::from_slice(line).map_err(ChunksFileParseError::InvalidLineRecord)?;
return Ok(Some(Some(line_record)));
}
}
@@ -217,7 +292,7 @@ pub struct LineRecord(
#[derive(Debug, Clone, PartialEq, Eq, Deserialize)]
pub struct LineSession(
/// session id
u32,
usize,
/// coverage
Coverage,
/// TODO: branches
@@ -260,6 +335,18 @@ pub enum Coverage {
HitCount(u32),
}

impl Into<PyreportCoverage> for Coverage {
fn into(self) -> PyreportCoverage {
match self {
Coverage::Partial => PyreportCoverage::Partial(),
Coverage::BranchTaken(covered, total) => {
PyreportCoverage::BranchesTaken { covered, total }
}
Coverage::HitCount(hits) => PyreportCoverage::HitCount(hits),
}
}
}

impl<'de> Deserialize<'de> for Coverage {
fn deserialize<D>(deserializer: D) -> Result<Coverage, D::Error>
where
17 changes: 17 additions & 0 deletions core/src/report/pyreport/types.rs
Original file line number Diff line number Diff line change
@@ -187,6 +187,23 @@ pub struct ReportLine {
pub datapoints: Option<Option<HashMap<u32, CoverageDatapoint>>>,
}

impl ReportLine {
pub fn normalize(&mut self) {
// Fix issues like recording branch coverage with `CoverageType::Method`
let (correct_coverage, correct_type) =
normalize_coverage_measurement(&self.coverage, &self.coverage_type);
self.coverage = correct_coverage;
self.coverage_type = correct_type;

// Fix the `coverage` values in each `LineSession` as well
for line_session in &mut self.sessions {
let (correct_coverage, _) =
normalize_coverage_measurement(&line_session.coverage, &self.coverage_type);
line_session.coverage = correct_coverage;
}
}
}

/// Account for some quirks and malformed data. See code comments for details.
pub(crate) fn normalize_coverage_measurement(
coverage: &PyreportCoverage,