diff --git a/src/sources/kubernetes_logs/mod.rs b/src/sources/kubernetes_logs/mod.rs index f59bc7b236d0a..11e6fb554b692 100644 --- a/src/sources/kubernetes_logs/mod.rs +++ b/src/sources/kubernetes_logs/mod.rs @@ -83,6 +83,11 @@ pub struct Config { #[serde(default = "default_max_read_bytes")] max_read_bytes: usize, + /// The maximum number of a bytes a line can contain before being discarded. This protects + /// against malformed lines or tailing incorrect files. + #[serde(default = "default_max_line_bytes")] + max_line_bytes: usize, + /// This value specifies not exactly the globbing, but interval /// between the polling the files to watch from the `paths_provider`. /// This is quite efficient, yet might still create some load of the @@ -154,6 +159,7 @@ struct Source { label_selector: String, exclude_paths: Vec, max_read_bytes: usize, + max_line_bytes: usize, glob_minimum_cooldown: Duration, ingestion_timestamp_field: Option, timezone: TimeZone, @@ -198,6 +204,7 @@ impl Source { label_selector, exclude_paths, max_read_bytes: config.max_read_bytes, + max_line_bytes: config.max_line_bytes, glob_minimum_cooldown, ingestion_timestamp_field: config.ingestion_timestamp_field.clone(), timezone, @@ -218,6 +225,7 @@ impl Source { label_selector, exclude_paths, max_read_bytes, + max_line_bytes, glob_minimum_cooldown, ingestion_timestamp_field, timezone, @@ -246,12 +254,6 @@ impl Source { // TODO: maybe more of the parameters have to be configurable. - // The 16KB is the maximum size of the payload at single line for both - // docker and CRI log formats. - // We take a double of that to account for metadata and padding, and to - // have a power of two rounding. Line splitting is countered at the - // parsers, see the `partial_events_merger` logic. - let max_line_bytes = 32 * 1024; // 32 KiB let file_server = FileServer { // Use our special paths provider. paths_provider, @@ -271,8 +273,8 @@ impl Source { // be other, more sound ways for users considering the use of this // option to solve their use case, so take consideration. ignore_before: None, - // Max line length to expect during regular log reads, see the - // explanation above. + // The maximum number of a bytes a line can contain before being discarded. This + // protects against malformed lines or tailing incorrect files. max_line_bytes, // Delimiter bytes that is used to read the file line-by-line line_delimiter: Bytes::from("\n"), @@ -426,6 +428,19 @@ fn default_max_read_bytes() -> usize { 2048 } +fn default_max_line_bytes() -> usize { + // NOTE: The below comment documents an incorrect assumption, see + // https://github.com/timberio/vector/issues/6967 + // + // The 16KB is the maximum size of the payload at single line for both + // docker and CRI log formats. + // We take a double of that to account for metadata and padding, and to + // have a power of two rounding. Line splitting is countered at the + // parsers, see the `partial_events_merger` logic. + + 32 * 1024 // 32 KiB +} + fn default_glob_minimum_cooldown_ms() -> usize { 60000 }