Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Start parsing the chunks file with serde #31

Open
wants to merge 9 commits into
base: main
Choose a base branch
from
Prev Previous commit
Next Next commit
Implement missing chunks parser features
This should implement everything except for the `complexity` parser.
  • Loading branch information
Swatinem committed Nov 19, 2024
commit d6f4a47f6fc06c2503c245dc05f0de90fb5bd6e1
92 changes: 13 additions & 79 deletions core/benches/pyreport.rs
Original file line number Diff line number Diff line change
@@ -1,12 +1,11 @@
use std::collections::HashMap;

use codecov_rs::{
parsers::pyreport::{chunks, chunks_serde, report_json},
test_utils::test_report::{TestReport, TestReportBuilder},
parsers::pyreport::{chunks_serde, report_json},
test_utils::test_report::TestReportBuilder,
};
use criterion::{criterion_group, criterion_main, Criterion};
use test_utils::fixtures::{read_fixture, FixtureFormat::Pyreport, FixtureSize::Large};
use winnow::Parser as _;

criterion_group!(
benches,
@@ -55,24 +54,25 @@ fn parse_report_json(input: &[u8]) -> report_json::ParsedReportJson {
}

fn simple_chunks(c: &mut Criterion) {
let chunks = &[
let chunks: &[&[u8]] = &[
// Header and one chunk with an empty line
"{}\n<<<<< end_of_header >>>>>\n{}\n",
b"{}\n<<<<< end_of_header >>>>>\n{}\n",
// No header, one chunk with a populated line and an empty line
"{}\n[1, null, [[0, 1]]]\n",
b"{}\n[1, null, [[0, 1]]]\n",
// No header, two chunks, the second having just one empty line
"{}\n[1, null, [[0, 1]]]\n\n<<<<< end_of_chunk >>>>>\n{}\n",
b"{}\n[1, null, [[0, 1]]]\n\n<<<<< end_of_chunk >>>>>\n{}\n",
// Header, two chunks, the second having multiple data lines and an empty line
"{}\n<<<<< end_of_header >>>>>\n{}\n[1, null, [[0, 1]]]\n\n<<<<< end_of_chunk >>>>>\n{}\n[1, null, [[0, 1]]]\n[1, null, [[0, 1]]]\n",
b"{}\n<<<<< end_of_header >>>>>\n{}\n[1, null, [[0, 1]]]\n\n<<<<< end_of_chunk >>>>>\n{}\n[1, null, [[0, 1]]]\n[1, null, [[0, 1]]]\n",
];

let files = HashMap::from([(0, 0), (1, 1), (2, 2)]);
let sessions = HashMap::from([(0, 0), (1, 1), (2, 2)]);

let report_json = report_json::ParsedReportJson { files, sessions };
c.bench_function("simple_chunks", |b| {
b.iter(|| {
for input in chunks {
parse_chunks_file(input, files.clone(), sessions.clone())
parse_chunks_file_serde(input, report_json.clone());
}
})
});
@@ -87,7 +87,6 @@ fn complex_chunks(c: &mut Criterion) {
"worker-c71ddfd4cb1753c7a540e5248c2beaa079fc3341-chunks.txt",
)
.unwrap();
let chunks = std::str::from_utf8(&chunks).unwrap();

// parsing the chunks depends on having loaded the `report_json`
let report = read_fixture(
@@ -96,79 +95,14 @@ fn complex_chunks(c: &mut Criterion) {
"worker-c71ddfd4cb1753c7a540e5248c2beaa079fc3341-report_json.json",
)
.unwrap();
let report_json::ParsedReportJson { files, sessions } = parse_report_json(&report);
let report_json = parse_report_json(&report);

c.bench_function("complex_chunks", |b| {
b.iter(|| parse_chunks_file(chunks, files.clone(), sessions.clone()))
b.iter(|| parse_chunks_file_serde(&chunks, report_json.clone()))
});
}

fn parse_chunks_file(input: &str, files: HashMap<usize, i64>, sessions: HashMap<usize, i64>) {
fn parse_chunks_file_serde(input: &[u8], report_json: report_json::ParsedReportJson) {
let report_builder = TestReportBuilder::default();

let chunks_ctx = chunks::ParseCtx::new(report_builder, files, sessions);
let mut chunks_stream = chunks::ReportOutputStream::<&str, TestReport, TestReportBuilder> {
input,
state: chunks_ctx,
};

chunks::parse_chunks_file
.parse_next(&mut chunks_stream)
.unwrap();
}

#[divan::bench]
fn simple_chunks_serde() {
let chunks: &[&[u8]] = &[
// Header and one chunk with an empty line
b"{}\n<<<<< end_of_header >>>>>\n{}\n",
// No header, one chunk with a populated line and an empty line
b"{}\n[1, null, [[0, 1]]]\n",
// No header, two chunks, the second having just one empty line
b"{}\n[1, null, [[0, 1]]]\n\n<<<<< end_of_chunk >>>>>\n{}\n",
// Header, two chunks, the second having multiple data lines and an empty line
b"{}\n<<<<< end_of_header >>>>>\n{}\n[1, null, [[0, 1]]]\n\n<<<<< end_of_chunk >>>>>\n{}\n[1, null, [[0, 1]]]\n[1, null, [[0, 1]]]\n",
];

let report_json = report_json::ParsedReportJson {
files: Default::default(),
sessions: Default::default(),
};

for input in chunks {
parse_chunks_file_serde(input, &report_json);
}
}

// this is currently <300 ms on my machine
#[divan::bench(sample_count = 10)]
fn complex_chunks_serde(bencher: Bencher) {
// this is a ~96M `chunks` file
let chunks =
load_fixture("pyreport/large/worker-c71ddfd4cb1753c7a540e5248c2beaa079fc3341-chunks.txt");

// parsing the chunks depends on having loaded the `report_json`
let report = load_fixture(
"pyreport/large/worker-c71ddfd4cb1753c7a540e5248c2beaa079fc3341-report_json.json",
);
let report_json = parse_report_json(&report);

bencher.bench(|| parse_chunks_file_serde(&chunks, &report_json));
}

fn parse_chunks_file_serde(input: &[u8], report_json: &report_json::ParsedReportJson) {
let mut report_builder = TestReportBuilder::default();
chunks_serde::parse_chunks_file(input, report_json, &mut report_builder).unwrap();
}

#[track_caller]
fn load_fixture(path: &str) -> Vec<u8> {
let path = format!("./fixtures/{path}");
let contents = std::fs::read(path).unwrap();

if contents.starts_with(b"version https://git-lfs.github.com/spec/v1") {
panic!("Fixture has not been pulled from Git LFS");
}

contents
chunks_serde::parse_chunks_file(input, report_json, report_builder).unwrap();
}
Loading
Oops, something went wrong.