From de4c6a8327386b8ae14f17e94eecabe8eed62606 Mon Sep 17 00:00:00 2001 From: Will <43912346+001wwang@users.noreply.github.com> Date: Thu, 14 Oct 2021 09:52:42 -0400 Subject: [PATCH 01/38] chore(buffers): Revert to _bytes naming convention for buffer metrics (#9591) * Update spec with _bytes naming Signed-off-by: 001wwang * Update buffer internal event implementation Signed-off-by: 001wwang * Add buffer_type field to buffer span Signed-off-by: 001wwang * Use cfg guard for setting disk buffer_type Signed-off-by: 001wwang --- docs/specs/buffer.md | 14 ++++++++------ lib/vector-core/buffers/src/internal_events.rs | 4 ++-- lib/vector-core/src/metrics/label_filter.rs | 1 + src/topology/builder.rs | 6 ++++++ 4 files changed, 17 insertions(+), 8 deletions(-) diff --git a/docs/specs/buffer.md b/docs/specs/buffer.md index 42b6e310759c2..ad50c8d761fb4 100644 --- a/docs/specs/buffer.md +++ b/docs/specs/buffer.md @@ -18,7 +18,9 @@ Vector buffers MUST be instrumented for optimal observability and monitoring. Th ### Terms and Definitions -`component_metadata` - Refers to the metadata (component id, component scope, component kind, and component type) of the component associated with the buffer. Buffer metrics MUST be tagged with all or partial `component_metadata` unless specified otherwise. In most cases, these tags are automatically added from tracing span context and do not need to be included as event properties. +- `component_metadata` - Refers to the metadata (component id, component scope, component kind, and component type) of the component associated with the buffer. Buffer metrics MUST be tagged with all or partial `component_metadata` unless specified otherwise. In most cases, these tags are automatically added from tracing span context and do not need to be included as event properties. +- `byte_size` - Refers to the byte size of events from a buffer's perspective. For memory buffers, `byte_size` represents the in-memory byte size of events. For disk buffers, `byte_size` represents the serialized byte size of events. +- `buffer_type` - One of `memory`, `disk`. Buffer metrics MUST be tagged with `buffer_type` unless otherwise specified. ### Events @@ -36,7 +38,7 @@ Vector buffers MUST be instrumented for optimal observability and monitoring. Th * MUST emit the `buffer_max_event_size` gauge (in-memory buffers) if the defined `max_size_events` value is present * MUST emit the `buffer_max_byte_size` gauge (disk buffers) if the defined `max_size_bytes` value is present * MUST emit the `buffer_received_events_total` counter with the defined `initial_events_size` value - * MUST emit the `buffer_received_event_bytes_total` counter with the defined `initial_bytes_size` value + * MUST emit the `buffer_received_bytes_total` counter with the defined `initial_bytes_size` value #### `BufferEventsReceived` @@ -44,10 +46,10 @@ Vector buffers MUST be instrumented for optimal observability and monitoring. Th * Properties * `count` - the number of received events - * `byte_size` - the byte size of received events + * `byte_size` - as defined in [Terms and Definitions](#terms-and-definitions) * Metric * MUST increment the `buffer_received_events_total` counter by the defined `count` - * MUST increment the `buffer_received_event_bytes_total` counter by the defined `byte_size` + * MUST increment the `buffer_received_bytes_total` counter by the defined `byte_size` * MUST increment the `buffer_events` gauge by the defined `count` * MUST increment the `buffer_byte_size` gauge by the defined `byte_size` @@ -57,10 +59,10 @@ Vector buffers MUST be instrumented for optimal observability and monitoring. Th * Properties * `count` - the number of sent events - * `byte_size` - the byte size of sent events + * `byte_size` - as defined in [Terms and Definitions](#terms-and-definitions) * Metric * MUST increment the `buffer_sent_events_total` counter by the defined `count` - * MUST increment the `buffer_sent_event_bytes_total` counter by the defined `byte_size` + * MUST increment the `buffer_sent_bytes_total` counter by the defined `byte_size` * MUST decrement the `buffer_events` gauge by the defined `count` * MUST decrement the `buffer_byte_size` gauge by the defined `byte_size` diff --git a/lib/vector-core/buffers/src/internal_events.rs b/lib/vector-core/buffers/src/internal_events.rs index 2f226d142df02..6dc6f6124c9a0 100644 --- a/lib/vector-core/buffers/src/internal_events.rs +++ b/lib/vector-core/buffers/src/internal_events.rs @@ -10,7 +10,7 @@ impl InternalEvent for BufferEventsReceived { #[allow(clippy::cast_precision_loss)] fn emit_metrics(&self) { counter!("buffer_received_events_total", self.count); - counter!("buffer_received_event_bytes_total", self.byte_size as u64); + counter!("buffer_received_bytes_total", self.byte_size as u64); increment_gauge!("buffer_events", self.count as f64); increment_gauge!("buffer_byte_size", self.byte_size as f64); } @@ -25,7 +25,7 @@ impl InternalEvent for BufferEventsSent { #[allow(clippy::cast_precision_loss)] fn emit_metrics(&self) { counter!("buffer_sent_events_total", self.count); - counter!("buffer_sent_event_bytes_total", self.byte_size as u64); + counter!("buffer_sent_bytes_total", self.byte_size as u64); decrement_gauge!("buffer_events", self.count as f64); decrement_gauge!("buffer_byte_size", self.byte_size as f64); } diff --git a/lib/vector-core/src/metrics/label_filter.rs b/lib/vector-core/src/metrics/label_filter.rs index 81b7ab7bb158b..80f99e28c3515 100644 --- a/lib/vector-core/src/metrics/label_filter.rs +++ b/lib/vector-core/src/metrics/label_filter.rs @@ -11,5 +11,6 @@ impl LabelFilter for VectorLabelFilter { || key == "component_type" || key == "component_kind" || key == "component_name" + || key == "buffer_type" } } diff --git a/src/topology/builder.rs b/src/topology/builder.rs index 7434943ca52fd..0dba59a872dd3 100644 --- a/src/topology/builder.rs +++ b/src/topology/builder.rs @@ -361,6 +361,11 @@ pub async fn build_pieces( let (tx, rx, acker) = if let Some(buffer) = buffers.remove(key) { buffer } else { + let buffer_type = match sink.buffer { + buffers::BufferConfig::Memory { .. } => "memory", + #[cfg(feature = "disk-buffer")] + buffers::BufferConfig::Disk { .. } => "disk", + }; let buffer_span = error_span!( "sink", component_kind = "sink", @@ -368,6 +373,7 @@ pub async fn build_pieces( component_scope = %key.scope(), component_type = typetag, component_name = %key.id(), + buffer_type = buffer_type, ); let buffer = sink.buffer.build(&config.global.data_dir, key, buffer_span); match buffer { From 0e4ba6505184b30a21c9f67d9aabc757dde1a41c Mon Sep 17 00:00:00 2001 From: Will <43912346+001wwang@users.noreply.github.com> Date: Thu, 14 Oct 2021 15:22:28 -0400 Subject: [PATCH 02/38] fix: Resolve markdown lint error in buffer specification (#9607) Signed-off-by: 001wwang --- docs/specs/buffer.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/specs/buffer.md b/docs/specs/buffer.md index ad50c8d761fb4..91644b22710fb 100644 --- a/docs/specs/buffer.md +++ b/docs/specs/buffer.md @@ -18,9 +18,9 @@ Vector buffers MUST be instrumented for optimal observability and monitoring. Th ### Terms and Definitions -- `component_metadata` - Refers to the metadata (component id, component scope, component kind, and component type) of the component associated with the buffer. Buffer metrics MUST be tagged with all or partial `component_metadata` unless specified otherwise. In most cases, these tags are automatically added from tracing span context and do not need to be included as event properties. -- `byte_size` - Refers to the byte size of events from a buffer's perspective. For memory buffers, `byte_size` represents the in-memory byte size of events. For disk buffers, `byte_size` represents the serialized byte size of events. -- `buffer_type` - One of `memory`, `disk`. Buffer metrics MUST be tagged with `buffer_type` unless otherwise specified. +* `component_metadata` - Refers to the metadata (component id, component scope, component kind, and component type) of the component associated with the buffer. Buffer metrics MUST be tagged with all or partial `component_metadata` unless specified otherwise. In most cases, these tags are automatically added from tracing span context and do not need to be included as event properties. +* `byte_size` - Refers to the byte size of events from a buffer's perspective. For memory buffers, `byte_size` represents the in-memory byte size of events. For disk buffers, `byte_size` represents the serialized byte size of events. +* `buffer_type` - One of `memory`, `disk`. Buffer metrics MUST be tagged with `buffer_type` unless otherwise specified. ### Events From d0c30e33bfc758ae3e1c066da59325357696af3a Mon Sep 17 00:00:00 2001 From: Bruce Guenter Date: Thu, 14 Oct 2021 14:13:19 -0600 Subject: [PATCH 03/38] chore(unit tests): Unify component/trace test init (#9606) * Rename `components::init` to `init_test` * Add `trace_init` to `component::init_test` Signed-off-by: Bruce Guenter --- src/sinks/datadog/events.rs | 2 +- src/sinks/influxdb/logs.rs | 2 +- src/sinks/logdna.rs | 5 ++--- src/sources/apache_metrics/mod.rs | 2 +- src/sources/file.rs | 2 +- src/sources/heroku_logs.rs | 12 ++---------- src/sources/http.rs | 27 ++------------------------ src/sources/prometheus/remote_write.rs | 4 ++-- src/sources/socket/mod.rs | 14 ++++++------- src/test_util/components.rs | 9 +++++---- 10 files changed, 24 insertions(+), 55 deletions(-) diff --git a/src/sinks/datadog/events.rs b/src/sinks/datadog/events.rs index 333794b1408e4..652c8d53263de 100644 --- a/src/sinks/datadog/events.rs +++ b/src/sinks/datadog/events.rs @@ -326,7 +326,7 @@ mod tests { let (batch, mut receiver) = BatchNotifier::new_with_receiver(); let (expected, events) = random_events_with_stream(100, 10, Some(batch)); - components::init(); + components::init_test(); sink.run(events).await.unwrap(); if batch_status == BatchStatus::Delivered { components::SINK_TESTS.assert(&HTTP_SINK_TAGS); diff --git a/src/sinks/influxdb/logs.rs b/src/sinks/influxdb/logs.rs index e3588ca984da4..1b53fef99c6dc 100644 --- a/src/sinks/influxdb/logs.rs +++ b/src/sinks/influxdb/logs.rs @@ -591,7 +591,7 @@ mod tests { } drop(batch); - components::init(); + components::init_test(); sink.run(stream::iter(events)).await.unwrap(); if batch_status == BatchStatus::Delivered { components::SINK_TESTS.assert(&HTTP_SINK_TAGS); diff --git a/src/sinks/logdna.rs b/src/sinks/logdna.rs index 546b63429b79d..00f0ef0305b9a 100644 --- a/src/sinks/logdna.rs +++ b/src/sinks/logdna.rs @@ -297,7 +297,7 @@ mod tests { config::SinkConfig, sinks::util::test::{build_test_server_status, load_sink}, test_util::components::{self, HTTP_SINK_TAGS}, - test_util::{next_addr, random_lines, trace_init}, + test_util::{next_addr, random_lines}, }; use futures::{channel::mpsc, stream, StreamExt}; use http::{request::Parts, StatusCode}; @@ -357,7 +357,7 @@ mod tests { Vec>, mpsc::Receiver<(Parts, bytes::Bytes)>, ) { - trace_init(); + components::init_test(); let (mut config, cx) = load_sink::( r#" @@ -402,7 +402,6 @@ mod tests { } drop(batch); - components::init(); sink.run(stream::iter(events)).await.unwrap(); if batch_status == BatchStatus::Delivered { components::SINK_TESTS.assert(&HTTP_SINK_TAGS); diff --git a/src/sources/apache_metrics/mod.rs b/src/sources/apache_metrics/mod.rs index b8cf7e12ff973..553b9eee3e12f 100644 --- a/src/sources/apache_metrics/mod.rs +++ b/src/sources/apache_metrics/mod.rs @@ -350,7 +350,7 @@ Scoreboard: ____S_____I______R____I_______KK___D__C__G_L____________W___________ let (tx, rx) = Pipeline::new_test(); - components::init(); + components::init_test(); let source = ApacheMetricsConfig { endpoints: vec![format!("http://foo:bar@{}/metrics", in_addr)], scrape_interval_secs: 1, diff --git a/src/sources/file.rs b/src/sources/file.rs index add6e0d741cc7..e98493966b47a 100644 --- a/src/sources/file.rs +++ b/src/sources/file.rs @@ -1648,7 +1648,7 @@ mod tests { acking_mode: AckingMode, inner: impl Future, ) -> Vec { - components::init(); + components::init_test(); let (tx, rx) = if acking_mode == Acks { let (tx, rx) = Pipeline::new_test_finalize(EventStatus::Delivered); diff --git a/src/sources/heroku_logs.rs b/src/sources/heroku_logs.rs index 042f03b3dd8d6..1a0cb948c8073 100644 --- a/src/sources/heroku_logs.rs +++ b/src/sources/heroku_logs.rs @@ -258,9 +258,7 @@ mod tests { use super::{HttpSourceAuthConfig, LogplexConfig}; use crate::{ config::{log_schema, SourceConfig, SourceContext}, - test_util::{ - components, next_addr, random_string, spawn_collect_n, trace_init, wait_for_tcp, - }, + test_util::{components, next_addr, random_string, spawn_collect_n, wait_for_tcp}, Pipeline, }; use chrono::{DateTime, Utc}; @@ -280,7 +278,7 @@ mod tests { status: EventStatus, acknowledgements: bool, ) -> (impl Stream, SocketAddr) { - components::init(); + components::init_test(); let (sender, recv) = Pipeline::new_test_finalize(status); let address = next_addr(); let mut context = SourceContext::new_test(sender); @@ -336,8 +334,6 @@ mod tests { #[tokio::test] async fn logplex_handles_router_log() { - trace_init(); - let auth = make_auth(); let (rx, addr) = source( @@ -383,8 +379,6 @@ mod tests { #[tokio::test] async fn logplex_handles_failures() { - trace_init(); - let auth = make_auth(); let (rx, addr) = source(Some(auth.clone()), vec![], EventStatus::Failed, true).await; @@ -407,8 +401,6 @@ mod tests { #[tokio::test] async fn logplex_ignores_disabled_acknowledgements() { - trace_init(); - let auth = make_auth(); let (rx, addr) = source(Some(auth.clone()), vec![], EventStatus::Failed, false).await; diff --git a/src/sources/http.rs b/src/sources/http.rs index 697fc258db463..717aa4826ed8d 100644 --- a/src/sources/http.rs +++ b/src/sources/http.rs @@ -177,7 +177,7 @@ mod tests { codecs::{DecodingConfig, JsonParserConfig}, config::{log_schema, SourceConfig, SourceContext}, event::{Event, EventStatus, Value}, - test_util::{components, next_addr, spawn_collect_n, trace_init, wait_for_tcp}, + test_util::{components, next_addr, spawn_collect_n, wait_for_tcp}, Pipeline, }; use flate2::{ @@ -206,7 +206,7 @@ mod tests { acknowledgements: bool, decoding: DecodingConfig, ) -> (impl Stream, SocketAddr) { - components::init(); + components::init_test(); let (sender, recv) = Pipeline::new_test_finalize(status); let address = next_addr(); let path = path.to_owned(); @@ -304,8 +304,6 @@ mod tests { #[tokio::test] async fn http_multiline_text() { - trace_init(); - let body = "test body\ntest body 2"; let (rx, addr) = source( @@ -342,8 +340,6 @@ mod tests { #[tokio::test] async fn http_multiline_text2() { - trace_init(); - //same as above test but with a newline at the end let body = "test body\ntest body 2\n"; @@ -381,8 +377,6 @@ mod tests { #[tokio::test] async fn http_json_parsing() { - trace_init(); - let (rx, addr) = source( vec![], vec![], @@ -423,8 +417,6 @@ mod tests { #[tokio::test] async fn http_json_values() { - trace_init(); - let (rx, addr) = source( vec![], vec![], @@ -468,8 +460,6 @@ mod tests { #[tokio::test] async fn http_json_dotted_keys() { - trace_init(); - let (rx, addr) = source( vec![], vec![], @@ -512,8 +502,6 @@ mod tests { #[tokio::test] async fn http_ndjson() { - trace_init(); - let (rx, addr) = source( vec![], vec![], @@ -580,8 +568,6 @@ mod tests { #[tokio::test] async fn http_headers() { - trace_init(); - let mut headers = HeaderMap::new(); headers.insert("User-Agent", "test_client".parse().unwrap()); headers.insert("Upgrade-Insecure-Requests", "false".parse().unwrap()); @@ -624,7 +610,6 @@ mod tests { #[tokio::test] async fn http_query() { - trace_init(); let (rx, addr) = source( vec![], vec![ @@ -663,8 +648,6 @@ mod tests { #[tokio::test] async fn http_gzip_deflate() { - trace_init(); - let body = "test body"; let mut encoder = GzEncoder::new(Vec::new(), Compression::default()); @@ -704,7 +687,6 @@ mod tests { #[tokio::test] async fn http_path() { - trace_init(); let (rx, addr) = source( vec![], vec![], @@ -736,7 +718,6 @@ mod tests { #[tokio::test] async fn http_path_no_restriction() { - trace_init(); let (rx, addr) = source( vec![], vec![], @@ -806,8 +787,6 @@ mod tests { #[tokio::test] async fn http_delivery_failure() { - trace_init(); - let (rx, addr) = source( vec![], vec![], @@ -833,8 +812,6 @@ mod tests { #[tokio::test] async fn ignores_disabled_acknowledgements() { - trace_init(); - let (rx, addr) = source( vec![], vec![], diff --git a/src/sources/prometheus/remote_write.rs b/src/sources/prometheus/remote_write.rs index 9115d7d563c76..c3a36ef490983 100644 --- a/src/sources/prometheus/remote_write.rs +++ b/src/sources/prometheus/remote_write.rs @@ -133,7 +133,7 @@ mod test { } async fn receives_metrics(tls: Option) { - components::init(); + components::init_test(); let address = test_util::next_addr(); let (tx, rx) = Pipeline::new_test_finalize(EventStatus::Delivered); @@ -228,7 +228,7 @@ mod integration_tests { #[tokio::test] async fn receive_something() { - components::init(); + components::init_test(); let config = PrometheusRemoteWriteConfig { address: PROMETHEUS_RECEIVE_ADDRESS.parse().unwrap(), auth: None, diff --git a/src/sources/socket/mod.rs b/src/sources/socket/mod.rs index e35ea1a6e2db2..2dbce9311d9da 100644 --- a/src/sources/socket/mod.rs +++ b/src/sources/socket/mod.rs @@ -215,7 +215,7 @@ mod test { //////// TCP TESTS //////// #[tokio::test] async fn tcp_it_includes_host() { - components::init(); + components::init_test(); let (tx, mut rx) = Pipeline::new_test(); let addr = next_addr(); @@ -238,7 +238,7 @@ mod test { #[tokio::test] async fn tcp_it_includes_source_type() { - components::init(); + components::init_test(); let (tx, mut rx) = Pipeline::new_test(); let addr = next_addr(); @@ -264,7 +264,7 @@ mod test { #[tokio::test] async fn tcp_continue_after_long_line() { - components::init(); + components::init_test(); let (tx, mut rx) = Pipeline::new_test(); let addr = next_addr(); @@ -305,7 +305,7 @@ mod test { #[tokio::test] async fn tcp_with_tls() { - components::init(); + components::init_test(); let (tx, mut rx) = Pipeline::new_test(); let addr = next_addr(); @@ -342,7 +342,7 @@ mod test { #[tokio::test] async fn tcp_with_tls_intermediate_ca() { - components::init(); + components::init_test(); let (tx, mut rx) = Pipeline::new_test(); let addr = next_addr(); @@ -391,7 +391,7 @@ mod test { #[tokio::test] async fn tcp_shutdown_simple() { - components::init(); + components::init_test(); let source_id = ComponentKey::from("tcp_shutdown_simple"); let (tx, mut rx) = Pipeline::new_test(); let addr = next_addr(); @@ -427,7 +427,7 @@ mod test { #[tokio::test] async fn tcp_shutdown_infinite_stream() { - components::init(); + components::init_test(); // It's important that the buffer be large enough that the TCP source doesn't have // to block trying to forward its input into the Sender because the channel is full, // otherwise even sending the signal to shut down won't wake it up. diff --git a/src/test_util/components.rs b/src/test_util/components.rs index 69827d6673034..b70b3f326195e 100644 --- a/src/test_util/components.rs +++ b/src/test_util/components.rs @@ -82,7 +82,8 @@ impl ComponentTests { } /// Initialize the necessary bits needed to run a component test specification. -pub fn init() { +pub fn init_test() { + super::trace_init(); EVENTS_RECORDED.with(|er| er.borrow_mut().clear()); // Handle multiple initializations. if let Err(error) = metrics::init_test() { @@ -173,14 +174,14 @@ pub async fn run_sink(sink: VectorSink, events: S, tags: &[&str]) where S: Stream + Send, { - init(); + init_test(); sink.run(events).await.expect("Running sink failed"); SINK_TESTS.assert(tags); } /// Convenience wrapper for running a sink with a single event pub async fn run_sink_event(sink: VectorSink, event: Event, tags: &[&str]) { - init(); + init_test(); run_sink(sink, stream::once(std::future::ready(event)), tags).await } @@ -198,7 +199,7 @@ pub async fn sink_send_stream(sink: VectorSink, mut events: S, tags: &[&str]) where S: Stream> + Send + Unpin, { - init(); + init_test(); sink.into_sink() .send_all(&mut events) .await From a7cf0144940afdfbc7f17acff53083d65a113e70 Mon Sep 17 00:00:00 2001 From: Bruce Guenter Date: Thu, 14 Oct 2021 14:56:57 -0600 Subject: [PATCH 04/38] chore(buffers): Fix compilation error in buffers soak example (#9609) Signed-off-by: Bruce Guenter --- lib/vector-core/buffers/examples/soak.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/lib/vector-core/buffers/examples/soak.rs b/lib/vector-core/buffers/examples/soak.rs index 9a0544199ae84..048cc6b70fe6e 100644 --- a/lib/vector-core/buffers/examples/soak.rs +++ b/lib/vector-core/buffers/examples/soak.rs @@ -122,6 +122,7 @@ async fn main() { let variant = Variant::Memory { max_events: 99999, when_full: WhenFull::DropNewest, + instrument: true, }; let (writer, reader, acker) = From b287b1de025979aeb89b51a3f1549ad7d3df1717 Mon Sep 17 00:00:00 2001 From: Jesse Szwedko Date: Thu, 14 Oct 2021 16:20:24 -0500 Subject: [PATCH 05/38] fix(codecs): Reset default max_length for character_delimited framing (#9594) * fix(codecs): Reset default max_length for character_delimited framing `CharacterDelimitedCodec` replaced `BytesDelimitedCodec` in bd426029c5 but the default was changed at that time from `usize::max` to `100kib`. This sets the default back to `usize::max`. The 100kib default is then used as the default in the `framing.character_delimited.max_length`config to overwrite this if specified by the user. Fixes: #9592 Signed-off-by: Jesse Szwedko --- src/codecs/framers/character_delimited.rs | 6 +++--- src/codecs/framers/newline_delimited.rs | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/codecs/framers/character_delimited.rs b/src/codecs/framers/character_delimited.rs index b2d27f4be5a90..761b3ec7a1722 100644 --- a/src/codecs/framers/character_delimited.rs +++ b/src/codecs/framers/character_delimited.rs @@ -48,10 +48,10 @@ pub struct CharacterDelimitedCodec { impl CharacterDelimitedCodec { /// Creates a `CharacterDelimitedCodec` with the specified delimiter. - pub fn new(delimiter: char) -> Self { + pub const fn new(delimiter: char) -> Self { CharacterDelimitedCodec { delimiter, - max_length: crate::serde::default_max_length(), + max_length: usize::MAX, is_discarding: false, next_index: 0, } @@ -60,7 +60,7 @@ impl CharacterDelimitedCodec { /// Creates a `CharacterDelimitedCodec` with a maximum frame length limit. /// /// Any frames longer than `max_length` bytes will be discarded entirely. - pub fn new_with_max_length(delimiter: char, max_length: usize) -> Self { + pub const fn new_with_max_length(delimiter: char, max_length: usize) -> Self { CharacterDelimitedCodec { max_length, ..CharacterDelimitedCodec::new(delimiter) diff --git a/src/codecs/framers/newline_delimited.rs b/src/codecs/framers/newline_delimited.rs index 91ea22984bcb8..743a07219d2df 100644 --- a/src/codecs/framers/newline_delimited.rs +++ b/src/codecs/framers/newline_delimited.rs @@ -58,14 +58,14 @@ pub struct NewlineDelimitedCodec(CharacterDelimitedCodec); impl NewlineDelimitedCodec { /// Creates a new `NewlineDelimitedCodec`. - pub fn new() -> Self { + pub const fn new() -> Self { Self(CharacterDelimitedCodec::new('\n')) } /// Creates a `NewlineDelimitedCodec` with a maximum frame length limit. /// /// Any frames longer than `max_length` bytes will be discarded entirely. - pub fn new_with_max_length(max_length: usize) -> Self { + pub const fn new_with_max_length(max_length: usize) -> Self { Self(CharacterDelimitedCodec::new_with_max_length( '\n', max_length, )) From 72545b310c8f362dd692e26120a3f14c257c9617 Mon Sep 17 00:00:00 2001 From: Jesse Szwedko Date: Thu, 14 Oct 2021 16:22:13 -0500 Subject: [PATCH 06/38] fix(ci): Fix markdown check guard (#9610) Signed-off-by: Jesse Szwedko --- .github/workflows/test.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index b057d02dfa68d..084e127cc163b 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -40,7 +40,7 @@ jobs: internal_events: ${{ steps.filter.outputs.internal_events }} helm: ${{ steps.filter.outputs.helm }} cue: ${{ steps.filter.outputs.cue }} - markdown: ${{ steps.filter.outputs.cue }} + markdown: ${{ steps.filter.outputs.markdown }} steps: - uses: actions/checkout@v2.3.4 - uses: dorny/paths-filter@v2 @@ -74,7 +74,7 @@ jobs: cue: - 'website/cue/**' markdown: - - '**/**.md' + - '*/**.md' internal_events: - 'src/internal_events/**' helm: From 85312b5065ac5c7503724cd6450d898775744f68 Mon Sep 17 00:00:00 2001 From: Pablo Sichert Date: Thu, 14 Oct 2021 23:32:47 +0200 Subject: [PATCH 07/38] fix(codecs): Default framing to `bytes` for message based sources and to `newline_delimited` for stream based sources (#9567) * Default to `bytes`/`newline_delimited` framing for message/stream based sources Signed-off-by: Pablo Sichert * Test newline behavior for TCP, UDP and Unix sockets Signed-off-by: Pablo Sichert * Guard imports that are used on Unix only Signed-off-by: Pablo Sichert * Test newline behavior for HTTP and use approriate framing in tests Signed-off-by: Pablo Sichert * Update outdated vector configs in tests Signed-off-by: Pablo Sichert * Choose smaller default value for `count` that can be parsed by TOML Signed-off-by: Pablo Sichert * Use `default_framing_stream_based` on `exec` both for `Scheduled` and `Streaming` Signed-off-by: Pablo Sichert * Simplify defaults with struct level `#[serde(default)]` attribute Signed-off-by: Pablo Sichert * Use `default_framing_stream_based` on HTTP source Signed-off-by: Pablo Sichert * Document framing default on a per-source basis Signed-off-by: Pablo Sichert * Remove backticks Signed-off-by: Jesse Szwedko Co-authored-by: Jesse Szwedko --- src/codecs/framers/bytes.rs | 7 + src/codecs/mod.rs | 35 +--- src/generate.rs | 27 +++ src/serde.rs | 20 +++ src/sources/aws_kinesis_firehose/mod.rs | 20 ++- src/sources/datadog/agent.rs | 19 +- src/sources/exec/mod.rs | 22 ++- src/sources/generator.rs | 33 +++- src/sources/heroku_logs.rs | 19 +- src/sources/http.rs | 109 +++++++++--- src/sources/kafka.rs | 17 +- src/sources/nats.rs | 31 ++-- src/sources/socket/mod.rs | 164 ++++++++++++------ src/sources/socket/tcp.rs | 19 +- src/sources/socket/udp.rs | 13 +- src/sources/socket/unix.rs | 12 +- src/sources/stdin.rs | 14 +- website/cue/reference/components.cue | 3 +- website/cue/reference/components/sources.cue | 10 +- .../components/sources/datadog_agent.cue | 5 +- .../cue/reference/components/sources/exec.cue | 5 +- .../components/sources/generator.cue | 5 +- .../components/sources/heroku_logs.cue | 5 +- .../cue/reference/components/sources/http.cue | 5 +- .../reference/components/sources/kafka.cue | 5 +- .../cue/reference/components/sources/nats.cue | 5 +- .../reference/components/sources/socket.cue | 5 +- .../reference/components/sources/stdin.cue | 5 +- 28 files changed, 444 insertions(+), 195 deletions(-) diff --git a/src/codecs/framers/bytes.rs b/src/codecs/framers/bytes.rs index 5659c7ae34ceb..c6dd5348e79e6 100644 --- a/src/codecs/framers/bytes.rs +++ b/src/codecs/framers/bytes.rs @@ -7,6 +7,13 @@ use tokio_util::codec::Decoder; #[derive(Debug, Clone, Default, Deserialize, Serialize)] pub struct BytesDecoderConfig; +impl BytesDecoderConfig { + /// Creates a new `BytesDecoderConfig`. + pub const fn new() -> Self { + Self + } +} + #[typetag::serde(name = "bytes")] impl FramingConfig for BytesDecoderConfig { fn build(&self) -> crate::Result { diff --git a/src/codecs/mod.rs b/src/codecs/mod.rs index ac0896284e809..0c89d451c0c0d 100644 --- a/src/codecs/mod.rs +++ b/src/codecs/mod.rs @@ -126,45 +126,28 @@ impl tokio_util::codec::Decoder for Decoder { } /// Config used to build a `Decoder`. -/// -/// Usually used in source configs via `#[serde(flatten)]`. -#[derive(Debug, Clone, Default, Deserialize, Serialize)] -#[serde(default)] +#[derive(Debug, Clone, Deserialize, Serialize)] pub struct DecodingConfig { /// The framing config. - framing: Option>, + framing: Box, /// The decoding config. - decoding: Option>, + decoding: Box, } impl DecodingConfig { /// Creates a new `DecodingConfig` with the provided `FramingConfig` and /// `ParserConfig`. - pub fn new( - framing: Option>, - decoding: Option>, - ) -> Self { + pub fn new(framing: Box, decoding: Box) -> Self { Self { framing, decoding } } /// Builds a `Decoder` from the provided configuration. - /// - /// Fails if any of the provided `framing` or `decoding` configs fail to - /// build. pub fn build(&self) -> crate::Result { - // Build the framer or use a newline delimited decoder if not provided. - let framer: BoxedFramer = self - .framing - .as_ref() - .map(|config| config.build()) - .unwrap_or_else(|| NewlineDelimitedDecoderConfig::new().build())?; - - // Build the parser or use a plain bytes parser if not provided. - let parser: BoxedParser = self - .decoding - .as_ref() - .map(|config| config.build()) - .unwrap_or_else(|| BytesParserConfig::new().build())?; + // Build the framer. + let framer: BoxedFramer = self.framing.build()?; + + // Build the parser. + let parser: BoxedParser = self.decoding.build()?; Ok(Decoder::new(framer, parser)) } diff --git a/src/generate.rs b/src/generate.rs index 2cfcd18b276a7..76f18d883660e 100644 --- a/src/generate.rs +++ b/src/generate.rs @@ -441,6 +441,15 @@ mod tests { max_length = 102400 type = "stdin" + [sources.source0.decoding] + codec = "bytes" + + [sources.source0.framing] + method = "newline_delimited" + + [sources.source0.framing.newline_delimited] + max_length = 102400 + [transforms.transform0] inputs = ["source0"] drop_field = true @@ -474,6 +483,15 @@ mod tests { max_length = 102400 type = "stdin" + [sources.source0.decoding] + codec = "bytes" + + [sources.source0.framing] + method = "newline_delimited" + + [sources.source0.framing.newline_delimited] + max_length = 102400 + [transforms.transform0] inputs = ["source0"] drop_field = true @@ -507,6 +525,15 @@ mod tests { max_length = 102400 type = "stdin" + [sources.source0.decoding] + codec = "bytes" + + [sources.source0.framing] + method = "newline_delimited" + + [sources.source0.framing.newline_delimited] + max_length = 102400 + [sinks.sink0] inputs = ["source0"] target = "stdout" diff --git a/src/serde.rs b/src/serde.rs index a41901846be95..19359465b7f02 100644 --- a/src/serde.rs +++ b/src/serde.rs @@ -1,3 +1,8 @@ +#[cfg(feature = "codecs")] +use crate::codecs::{ + BytesDecoderConfig, BytesParserConfig, FramingConfig, NewlineDelimitedDecoderConfig, + ParserConfig, +}; use indexmap::map::IndexMap; use serde::{de, Deserialize, Serialize}; use std::fmt; @@ -19,6 +24,21 @@ pub fn default_max_length() -> usize { bytesize::kib(100u64) as usize } +#[cfg(feature = "codecs")] +pub fn default_framing_message_based() -> Box { + Box::new(BytesDecoderConfig::new()) +} + +#[cfg(feature = "codecs")] +pub fn default_framing_stream_based() -> Box { + Box::new(NewlineDelimitedDecoderConfig::new()) +} + +#[cfg(feature = "codecs")] +pub fn default_decoding() -> Box { + Box::new(BytesParserConfig::new()) +} + pub fn to_string(value: impl serde::Serialize) -> String { let value = serde_json::to_value(value).unwrap(); value.as_str().unwrap().into() diff --git a/src/sources/aws_kinesis_firehose/mod.rs b/src/sources/aws_kinesis_firehose/mod.rs index 3280001018aed..ecb01e84b0b2f 100644 --- a/src/sources/aws_kinesis_firehose/mod.rs +++ b/src/sources/aws_kinesis_firehose/mod.rs @@ -1,6 +1,7 @@ use crate::{ - codecs::DecodingConfig, + codecs::{DecodingConfig, FramingConfig, ParserConfig}, config::{DataType, GenerateConfig, Resource, SourceConfig, SourceContext, SourceDescription}, + serde::{default_decoding, default_framing_message_based}, tls::{MaybeTlsSettings, TlsConfig}, }; use futures::FutureExt; @@ -19,8 +20,10 @@ pub struct AwsKinesisFirehoseConfig { access_key: Option, tls: Option, record_compression: Option, - #[serde(default)] - decoding: DecodingConfig, + #[serde(default = "default_framing_message_based")] + framing: Box, + #[serde(default = "default_decoding")] + decoding: Box, } #[derive(Derivative, Copy, Clone, Debug, Deserialize, Serialize, PartialEq)] @@ -47,10 +50,12 @@ impl fmt::Display for Compression { #[typetag::serde(name = "aws_kinesis_firehose")] impl SourceConfig for AwsKinesisFirehoseConfig { async fn build(&self, cx: SourceContext) -> crate::Result { + let decoder = DecodingConfig::new(self.framing.clone(), self.decoding.clone()).build()?; + let svc = filters::firehose( self.access_key.clone(), self.record_compression.unwrap_or_default(), - self.decoding.build()?, + decoder, cx.out, ); @@ -94,7 +99,8 @@ impl GenerateConfig for AwsKinesisFirehoseConfig { access_key: None, tls: None, record_compression: None, - decoding: Default::default(), + framing: default_framing_message_based(), + decoding: default_decoding(), }) .unwrap() } @@ -104,7 +110,6 @@ impl GenerateConfig for AwsKinesisFirehoseConfig { mod tests { use super::*; use crate::{ - codecs::BytesDecoderConfig, event::Event, log_event, test_util::{collect_ready, next_addr, wait_for_tcp}, @@ -138,7 +143,8 @@ mod tests { tls: None, access_key, record_compression, - decoding: DecodingConfig::new(Some(Box::new(BytesDecoderConfig)), None), + framing: default_framing_message_based(), + decoding: default_decoding(), } .build(SourceContext::new_test(sender)) .await diff --git a/src/sources/datadog/agent.rs b/src/sources/datadog/agent.rs index 99f7597e79e90..f44a140a23882 100644 --- a/src/sources/datadog/agent.rs +++ b/src/sources/datadog/agent.rs @@ -1,11 +1,12 @@ use crate::{ - codecs::{self, DecodingConfig}, + codecs::{self, DecodingConfig, FramingConfig, ParserConfig}, config::{ log_schema, DataType, GenerateConfig, Resource, SourceConfig, SourceContext, SourceDescription, }, event::Event, internal_events::HttpDecompressError, + serde::{default_decoding, default_framing_message_based}, sources::{ self, util::{ErrorMessage, TcpError}, @@ -43,8 +44,10 @@ pub struct DatadogAgentConfig { tls: Option, #[serde(default = "crate::serde::default_true")] store_api_key: bool, - #[serde(flatten, default)] - decoding: DecodingConfig, + #[serde(default = "default_framing_message_based")] + framing: Box, + #[serde(default = "default_decoding")] + decoding: Box, } inventory::submit! { @@ -63,7 +66,8 @@ impl GenerateConfig for DatadogAgentConfig { address: "0.0.0.0:8080".parse().unwrap(), tls: None, store_api_key: true, - decoding: Default::default(), + framing: default_framing_message_based(), + decoding: default_decoding(), }) .unwrap() } @@ -73,7 +77,8 @@ impl GenerateConfig for DatadogAgentConfig { #[typetag::serde(name = "datadog_agent")] impl SourceConfig for DatadogAgentConfig { async fn build(&self, cx: SourceContext) -> crate::Result { - let source = DatadogAgentSource::new(self.store_api_key, self.decoding.build()?); + let decoder = DecodingConfig::new(self.framing.clone(), self.decoding.clone()).build()?; + let source = DatadogAgentSource::new(self.store_api_key, decoder); let tls = MaybeTlsSettings::from_config(&self.tls, true)?; let listener = tls.bind(&self.address).await?; @@ -358,6 +363,7 @@ mod tests { codecs::{self, BytesCodec, BytesParser}, config::{log_schema, SourceConfig, SourceContext}, event::{Event, EventStatus}, + serde::{default_decoding, default_framing_message_based}, sources::datadog::agent::DatadogAgentSource, test_util::{next_addr, spawn_collect_n, trace_init, wait_for_tcp}, Pipeline, @@ -434,7 +440,8 @@ mod tests { address, tls: None, store_api_key, - decoding: Default::default(), + framing: default_framing_message_based(), + decoding: default_decoding(), } .build(context) .await diff --git a/src/sources/exec/mod.rs b/src/sources/exec/mod.rs index 2f89a4b235136..082c87474b8a5 100644 --- a/src/sources/exec/mod.rs +++ b/src/sources/exec/mod.rs @@ -1,9 +1,10 @@ use crate::{ async_read::VecAsyncReadExt, - codecs::{self, DecodingConfig}, + codecs::{self, DecodingConfig, FramingConfig, ParserConfig}, config::{log_schema, DataType, SourceConfig, SourceContext, SourceDescription}, event::Event, internal_events::{ExecCommandExecuted, ExecEventsReceived, ExecFailed, ExecTimeout}, + serde::{default_decoding, default_framing_stream_based}, shutdown::ShutdownSignal, sources::util::TcpError, Pipeline, @@ -38,8 +39,10 @@ pub struct ExecConfig { pub include_stderr: bool, #[serde(default = "default_maximum_buffer_size")] pub maximum_buffer_size_bytes: usize, - #[serde(default, flatten)] - pub decoding: DecodingConfig, + #[serde(default = "default_framing_stream_based")] + framing: Box, + #[serde(default = "default_decoding")] + decoding: Box, } // TODO: Would be nice to combine the scheduled and streaming config with the mode enum once @@ -87,7 +90,8 @@ impl Default for ExecConfig { working_directory: None, include_stderr: default_include_stderr(), maximum_buffer_size_bytes: default_maximum_buffer_size(), - decoding: Default::default(), + framing: default_framing_stream_based(), + decoding: default_decoding(), } } } @@ -173,10 +177,11 @@ impl SourceConfig for ExecConfig { async fn build(&self, cx: SourceContext) -> crate::Result { self.validate()?; let hostname = get_hostname(); - let decoder = self.decoding.build()?; + let decoder = DecodingConfig::new(self.framing.clone(), self.decoding.clone()).build()?; match &self.mode { Mode::Scheduled => { let exec_interval_secs = self.exec_interval_secs_or_default(); + Ok(Box::pin(run_scheduled( self.clone(), hostname, @@ -189,6 +194,7 @@ impl SourceConfig for ExecConfig { Mode::Streaming => { let respawn_on_exit = self.respawn_on_exit_or_default(); let respawn_interval_secs = self.respawn_interval_secs_or_default(); + Ok(Box::pin(run_streaming( self.clone(), hostname, @@ -587,7 +593,8 @@ mod tests { working_directory: Some(PathBuf::from("/tmp")), include_stderr: default_include_stderr(), maximum_buffer_size_bytes: default_maximum_buffer_size(), - decoding: Default::default(), + framing: default_framing_stream_based(), + decoding: default_decoding(), }; let command = build_command(&config); @@ -698,7 +705,8 @@ mod tests { working_directory: None, include_stderr: default_include_stderr(), maximum_buffer_size_bytes: default_maximum_buffer_size(), - decoding: Default::default(), + framing: default_framing_stream_based(), + decoding: default_decoding(), } } } diff --git a/src/sources/generator.rs b/src/sources/generator.rs index 2d5d6635420c1..556719438d1e4 100644 --- a/src/sources/generator.rs +++ b/src/sources/generator.rs @@ -1,7 +1,8 @@ use crate::{ - codecs::{self, DecodingConfig}, + codecs::{self, DecodingConfig, FramingConfig, ParserConfig}, config::{DataType, SourceConfig, SourceContext, SourceDescription}, internal_events::GeneratorEventProcessed, + serde::{default_decoding, default_framing_message_based}, shutdown::ShutdownSignal, sources::util::TcpError, Pipeline, @@ -15,22 +16,31 @@ use std::task::Poll; use tokio::time::{self, Duration}; use tokio_util::codec::FramedRead; -#[derive(Clone, Debug, Default, Deserialize, Serialize)] +#[derive(Clone, Debug, Derivative, Deserialize, Serialize)] +#[derivative(Default)] +#[serde(default)] pub struct GeneratorConfig { - #[serde(alias = "batch_interval", default = "default_interval")] + #[serde(alias = "batch_interval")] + #[derivative(Default(value = "default_interval()"))] interval: f64, - #[serde(default = "usize::max_value")] + #[derivative(Default(value = "default_count()"))] count: usize, #[serde(flatten)] format: OutputFormat, - #[serde(flatten, default)] - decoding: DecodingConfig, + #[derivative(Default(value = "default_framing_message_based()"))] + framing: Box, + #[derivative(Default(value = "default_decoding()"))] + decoding: Box, } const fn default_interval() -> f64 { 1.0 } +const fn default_count() -> usize { + isize::MAX as usize +} + #[derive(Debug, PartialEq, Snafu)] pub enum GeneratorConfigError { #[snafu(display("A non-empty list of lines is required for the shuffle format"))] @@ -109,7 +119,8 @@ impl GeneratorConfig { lines, sequence: false, }, - decoding: Default::default(), + framing: default_framing_message_based(), + decoding: default_decoding(), } } } @@ -178,11 +189,12 @@ impl_generate_config_from_default!(GeneratorConfig); impl SourceConfig for GeneratorConfig { async fn build(&self, cx: SourceContext) -> crate::Result { self.format.validate()?; + let decoder = DecodingConfig::new(self.framing.clone(), self.decoding.clone()).build()?; Ok(Box::pin(generator_source( self.interval, self.count, self.format.clone(), - self.decoding.build()?, + decoder, cx.shutdown, cx.out, ))) @@ -212,11 +224,14 @@ mod tests { async fn runit(config: &str) -> mpsc::Receiver { let (tx, rx) = Pipeline::new_test(); let config: GeneratorConfig = toml::from_str(config).unwrap(); + let decoder = DecodingConfig::new(default_framing_message_based(), default_decoding()) + .build() + .unwrap(); generator_source( config.interval, config.count, config.format, - config.decoding.build().unwrap(), + decoder, ShutdownSignal::noop(), tx, ) diff --git a/src/sources/heroku_logs.rs b/src/sources/heroku_logs.rs index 1a0cb948c8073..2d8f448bd5770 100644 --- a/src/sources/heroku_logs.rs +++ b/src/sources/heroku_logs.rs @@ -1,11 +1,12 @@ use crate::{ - codecs::{self, DecodingConfig}, + codecs::{self, DecodingConfig, FramingConfig, ParserConfig}, config::{ log_schema, DataType, GenerateConfig, Resource, SourceConfig, SourceContext, SourceDescription, }, event::Event, internal_events::{HerokuLogplexRequestReadError, HerokuLogplexRequestReceived}, + serde::{default_decoding, default_framing_message_based}, sources::util::{ add_query_parameters, ErrorMessage, HttpSource, HttpSourceAuthConfig, TcpError, }, @@ -32,8 +33,10 @@ pub struct LogplexConfig { query_parameters: Vec, tls: Option, auth: Option, - #[serde(flatten, default)] - decoding: DecodingConfig, + #[serde(default = "default_framing_message_based")] + framing: Box, + #[serde(default = "default_decoding")] + decoding: Box, } inventory::submit! { @@ -51,7 +54,8 @@ impl GenerateConfig for LogplexConfig { query_parameters: Vec::new(), tls: None, auth: None, - decoding: Default::default(), + framing: default_framing_message_based(), + decoding: default_decoding(), }) .unwrap() } @@ -81,9 +85,10 @@ impl HttpSource for LogplexSource { #[typetag::serde(name = "heroku_logs")] impl SourceConfig for LogplexConfig { async fn build(&self, cx: SourceContext) -> crate::Result { + let decoder = DecodingConfig::new(self.framing.clone(), self.decoding.clone()).build()?; let source = LogplexSource { query_parameters: self.query_parameters.clone(), - decoder: self.decoding.build()?, + decoder, }; source.run(self.address, "events", true, &self.tls, &self.auth, cx) } @@ -258,6 +263,7 @@ mod tests { use super::{HttpSourceAuthConfig, LogplexConfig}; use crate::{ config::{log_schema, SourceConfig, SourceContext}, + serde::{default_decoding, default_framing_message_based}, test_util::{components, next_addr, random_string, spawn_collect_n, wait_for_tcp}, Pipeline, }; @@ -289,7 +295,8 @@ mod tests { query_parameters, tls: None, auth, - decoding: Default::default(), + framing: default_framing_message_based(), + decoding: default_decoding(), } .build(context) .await diff --git a/src/sources/http.rs b/src/sources/http.rs index 717aa4826ed8d..fac44ecae2ef5 100644 --- a/src/sources/http.rs +++ b/src/sources/http.rs @@ -1,10 +1,11 @@ use crate::{ - codecs::{self, DecodingConfig}, + codecs::{self, DecodingConfig, FramingConfig, ParserConfig}, config::{ log_schema, DataType, GenerateConfig, Resource, SourceConfig, SourceContext, SourceDescription, }, event::{Event, Value}, + serde::{default_decoding, default_framing_stream_based}, sources::util::{add_query_parameters, ErrorMessage, HttpSource, HttpSourceAuthConfig}, tls::TlsConfig, }; @@ -31,8 +32,10 @@ pub struct SimpleHttpConfig { path: String, #[serde(default = "default_path_key")] path_key: String, - #[serde(flatten, default)] - decoding: DecodingConfig, + #[serde(default = "default_framing_stream_based")] + framing: Box, + #[serde(default = "default_decoding")] + decoding: Box, } inventory::submit! { @@ -50,7 +53,8 @@ impl GenerateConfig for SimpleHttpConfig { path_key: "path".to_string(), path: "/".to_string(), strict_path: true, - decoding: DecodingConfig::default(), + framing: default_framing_stream_based(), + decoding: default_decoding(), }) .unwrap() } @@ -120,11 +124,12 @@ impl HttpSource for SimpleHttpSource { #[typetag::serde(name = "http")] impl SourceConfig for SimpleHttpConfig { async fn build(&self, cx: SourceContext) -> crate::Result { + let decoder = DecodingConfig::new(self.framing.clone(), self.decoding.clone()).build()?; let source = SimpleHttpSource { headers: self.headers.clone(), query_parameters: self.query_parameters.clone(), path_key: self.path_key.clone(), - decoder: self.decoding.build()?, + decoder, }; source.run( self.address, @@ -174,10 +179,11 @@ fn add_headers(events: &mut [Event], headers_config: &[String], headers: HeaderM mod tests { use super::SimpleHttpConfig; use crate::{ - codecs::{DecodingConfig, JsonParserConfig}, + codecs::{BytesDecoderConfig, FramingConfig, JsonParserConfig, ParserConfig}, config::{log_schema, SourceConfig, SourceContext}, event::{Event, EventStatus, Value}, - test_util::{components, next_addr, spawn_collect_n, wait_for_tcp}, + serde::{default_decoding, default_framing_stream_based}, + test_util::{components, next_addr, spawn_collect_n, trace_init, wait_for_tcp}, Pipeline, }; use flate2::{ @@ -196,16 +202,17 @@ mod tests { crate::test_util::test_generate_config::(); } - async fn source( + async fn source<'a>( headers: Vec, query_parameters: Vec, - path_key: &str, - path: &str, + path_key: &'a str, + path: &'a str, strict_path: bool, status: EventStatus, acknowledgements: bool, - decoding: DecodingConfig, - ) -> (impl Stream, SocketAddr) { + framing: Option>, + decoding: Option>, + ) -> (impl Stream + 'a, SocketAddr) { components::init_test(); let (sender, recv) = Pipeline::new_test_finalize(status); let address = next_addr(); @@ -223,7 +230,8 @@ mod tests { strict_path, path_key, path, - decoding, + framing: framing.unwrap_or_else(default_framing_stream_based), + decoding: decoding.unwrap_or_else(default_decoding), } .build(context) .await @@ -314,7 +322,8 @@ mod tests { true, EventStatus::Delivered, true, - DecodingConfig::default(), + None, + None, ) .await; @@ -351,7 +360,8 @@ mod tests { true, EventStatus::Delivered, true, - DecodingConfig::default(), + None, + None, ) .await; @@ -375,6 +385,39 @@ mod tests { } } + #[tokio::test] + async fn http_bytes_codec_preserves_newlines() { + trace_init(); + + let body = "foo\nbar"; + + let (rx, addr) = source( + vec![], + vec![], + "http_path", + "/", + true, + EventStatus::Delivered, + true, + Some(Box::new(BytesDecoderConfig::new())), + None, + ) + .await; + + let mut events = spawn_ok_collect_n(send(addr, body), rx, 1).await; + + assert_eq!(events.len(), 1); + + { + let event = events.remove(0); + let log = event.as_log(); + assert_eq!(log[log_schema().message_key()], "foo\nbar".into()); + assert!(log.get(log_schema().timestamp_key()).is_some()); + assert_eq!(log[log_schema().source_type_key()], "http".into()); + assert_eq!(log["http_path"], "/".into()); + } + } + #[tokio::test] async fn http_json_parsing() { let (rx, addr) = source( @@ -385,7 +428,8 @@ mod tests { true, EventStatus::Delivered, true, - DecodingConfig::new(None, Some(Box::new(JsonParserConfig::new()))), + None, + Some(Box::new(JsonParserConfig::new())), ) .await; @@ -425,7 +469,8 @@ mod tests { true, EventStatus::Delivered, true, - DecodingConfig::new(None, Some(Box::new(JsonParserConfig::new()))), + None, + Some(Box::new(JsonParserConfig::new())), ) .await; @@ -468,7 +513,8 @@ mod tests { true, EventStatus::Delivered, true, - DecodingConfig::new(None, Some(Box::new(JsonParserConfig::new()))), + None, + Some(Box::new(JsonParserConfig::new())), ) .await; @@ -510,7 +556,8 @@ mod tests { true, EventStatus::Delivered, true, - DecodingConfig::new(None, Some(Box::new(JsonParserConfig::new()))), + None, + Some(Box::new(JsonParserConfig::new())), ) .await; @@ -584,7 +631,8 @@ mod tests { true, EventStatus::Delivered, true, - DecodingConfig::new(None, Some(Box::new(JsonParserConfig::new()))), + None, + Some(Box::new(JsonParserConfig::new())), ) .await; @@ -622,7 +670,8 @@ mod tests { true, EventStatus::Delivered, true, - DecodingConfig::new(None, Some(Box::new(JsonParserConfig::new()))), + None, + Some(Box::new(JsonParserConfig::new())), ) .await; @@ -669,7 +718,8 @@ mod tests { true, EventStatus::Delivered, true, - DecodingConfig::default(), + None, + None, ) .await; @@ -695,7 +745,8 @@ mod tests { true, EventStatus::Delivered, true, - DecodingConfig::new(None, Some(Box::new(JsonParserConfig::new()))), + None, + Some(Box::new(JsonParserConfig::new())), ) .await; @@ -726,7 +777,8 @@ mod tests { false, EventStatus::Delivered, true, - DecodingConfig::new(None, Some(Box::new(JsonParserConfig::new()))), + None, + Some(Box::new(JsonParserConfig::new())), ) .await; @@ -775,7 +827,8 @@ mod tests { true, EventStatus::Delivered, true, - DecodingConfig::new(None, Some(Box::new(JsonParserConfig::new()))), + None, + Some(Box::new(JsonParserConfig::new())), ) .await; @@ -795,7 +848,8 @@ mod tests { true, EventStatus::Failed, true, - DecodingConfig::default(), + None, + None, ) .await; @@ -820,7 +874,8 @@ mod tests { true, EventStatus::Failed, false, - DecodingConfig::default(), + None, + None, ) .await; diff --git a/src/sources/kafka.rs b/src/sources/kafka.rs index cedac1f7a57fb..11f151bf240a0 100644 --- a/src/sources/kafka.rs +++ b/src/sources/kafka.rs @@ -1,10 +1,11 @@ use super::util::finalizer::OrderedFinalizer; use crate::{ - codecs::{self, DecodingConfig}, + codecs::{self, DecodingConfig, FramingConfig, ParserConfig}, config::{log_schema, DataType, SourceConfig, SourceContext, SourceDescription}, event::{BatchNotifier, Event, Value}, internal_events::{KafkaEventFailed, KafkaEventReceived, KafkaOffsetUpdateFailed}, kafka::{KafkaAuthConfig, KafkaStatisticsContext}, + serde::{default_decoding, default_framing_message_based}, shutdown::ShutdownSignal, sources::util::TcpError, Pipeline, @@ -36,7 +37,8 @@ enum BuildError { KafkaSubscribeError { source: rdkafka::error::KafkaError }, } -#[derive(Clone, Debug, Default, Deserialize, Serialize)] +#[derive(Clone, Debug, Derivative, Deserialize, Serialize)] +#[derivative(Default)] #[serde(deny_unknown_fields)] pub struct KafkaSourceConfig { bootstrap_servers: String, @@ -65,8 +67,12 @@ pub struct KafkaSourceConfig { librdkafka_options: Option>, #[serde(flatten)] auth: KafkaAuthConfig, - #[serde(flatten)] - decoding: DecodingConfig, + #[serde(default = "default_framing_message_based")] + #[derivative(Default(value = "default_framing_message_based()"))] + framing: Box, + #[serde(default = "default_decoding")] + #[derivative(Default(value = "default_decoding()"))] + decoding: Box, } const fn default_session_timeout_ms() -> u64 { @@ -120,6 +126,7 @@ impl_generate_config_from_default!(KafkaSourceConfig); impl SourceConfig for KafkaSourceConfig { async fn build(&self, cx: SourceContext) -> crate::Result { let consumer = create_consumer(self)?; + let decoder = DecodingConfig::new(self.framing.clone(), self.decoding.clone()).build()?; Ok(Box::pin(kafka_source( consumer, @@ -128,7 +135,7 @@ impl SourceConfig for KafkaSourceConfig { self.partition_key.clone(), self.offset_key.clone(), self.headers_key.clone(), - self.decoding.build()?, + decoder, cx.shutdown, cx.out, cx.acknowledgements, diff --git a/src/sources/nats.rs b/src/sources/nats.rs index d709907c336ce..656b854d9f9af 100644 --- a/src/sources/nats.rs +++ b/src/sources/nats.rs @@ -1,10 +1,11 @@ use crate::{ - codecs::{self, DecodingConfig}, + codecs::{self, DecodingConfig, FramingConfig, ParserConfig}, config::{ log_schema, DataType, GenerateConfig, SourceConfig, SourceContext, SourceDescription, }, event::Event, internal_events::NatsEventsReceived, + serde::{default_decoding, default_framing_message_based}, shutdown::ShutdownSignal, sources::util::TcpError, Pipeline, @@ -24,7 +25,8 @@ enum BuildError { NatsSubscribeError { source: std::io::Error }, } -#[derive(Clone, Debug, Default, Deserialize, Serialize)] +#[derive(Clone, Debug, Derivative, Deserialize, Serialize)] +#[derivative(Default)] #[serde(deny_unknown_fields)] pub struct NatsSourceConfig { url: String, @@ -32,8 +34,12 @@ pub struct NatsSourceConfig { connection_name: String, subject: String, queue: Option, - #[serde(flatten, default)] - decoding: DecodingConfig, + #[serde(default = "default_framing_message_based")] + #[derivative(Default(value = "default_framing_message_based()"))] + framing: Box, + #[serde(default = "default_decoding")] + #[derivative(Default(value = "default_decoding()"))] + decoding: Box, } inventory::submit! { @@ -57,11 +63,12 @@ impl GenerateConfig for NatsSourceConfig { impl SourceConfig for NatsSourceConfig { async fn build(&self, cx: SourceContext) -> crate::Result { let (connection, subscription) = create_subscription(self).await?; + let decoder = DecodingConfig::new(self.framing.clone(), self.decoding.clone()).build()?; Ok(Box::pin(nats_source( connection, subscription, - self.decoding.build()?, + decoder, cx.shutdown, cx.out, ))) @@ -197,20 +204,18 @@ mod integration_tests { subject: subject.clone(), url: "nats://127.0.0.1:4222".to_owned(), queue: None, - decoding: Default::default(), + framing: default_framing_message_based(), + decoding: default_decoding(), }; let (nc, sub) = create_subscription(&conf).await.unwrap(); let nc_pub = nc.clone(); let (tx, rx) = Pipeline::new_test(); - tokio::spawn(nats_source( - nc, - sub, - conf.decoding.build().unwrap(), - ShutdownSignal::noop(), - tx, - )); + let decoder = DecodingConfig::new(conf.framing.clone(), conf.decoding.clone()) + .build() + .unwrap(); + tokio::spawn(nats_source(nc, sub, decoder, ShutdownSignal::noop(), tx)); let msg = "my message"; nc_pub.publish(&subject, msg).await.unwrap(); diff --git a/src/sources/socket/mod.rs b/src/sources/socket/mod.rs index 2dbce9311d9da..0b1d015a67f7b 100644 --- a/src/sources/socket/mod.rs +++ b/src/sources/socket/mod.rs @@ -3,7 +3,10 @@ mod udp; #[cfg(unix)] mod unix; +#[cfg(unix)] +use crate::serde::{default_framing_message_based, default_framing_stream_based}; use crate::{ + codecs::DecodingConfig, config::{ log_schema, DataType, GenerateConfig, Resource, SourceConfig, SourceContext, SourceDescription, @@ -80,7 +83,9 @@ impl SourceConfig for SocketConfig { async fn build(&self, cx: SourceContext) -> crate::Result { match self.mode.clone() { Mode::Tcp(config) => { - let decoder = config.decoding().build()?; + let decoder = + DecodingConfig::new(config.framing().clone(), config.decoding().clone()) + .build()?; let tcp = tcp::RawTcpSource::new(config.clone(), decoder); let tls = MaybeTlsSettings::from_config(config.tls(), true)?; tcp.run( @@ -98,7 +103,9 @@ impl SourceConfig for SocketConfig { .host_key() .clone() .unwrap_or_else(|| log_schema().host_key().to_string()); - let decoder = config.decoding().build()?; + let decoder = + DecodingConfig::new(config.framing().clone(), config.decoding().clone()) + .build()?; Ok(udp::udp( config.address(), host_key, @@ -113,7 +120,11 @@ impl SourceConfig for SocketConfig { let host_key = config .host_key .unwrap_or_else(|| log_schema().host_key().to_string()); - let decoder = config.decoding.build()?; + let decoder = DecodingConfig::new( + config.framing.unwrap_or_else(default_framing_message_based), + config.decoding.clone(), + ) + .build()?; Ok(unix::unix_datagram( config.path, config.max_length, @@ -128,7 +139,11 @@ impl SourceConfig for SocketConfig { let host_key = config .host_key .unwrap_or_else(|| log_schema().host_key().to_string()); - let decoder = config.decoding.build()?; + let decoder = DecodingConfig::new( + config.framing.unwrap_or_else(default_framing_stream_based), + config.decoding.clone(), + ) + .build()?; Ok(unix::unix_stream( config.path, host_key, @@ -164,7 +179,7 @@ impl SourceConfig for SocketConfig { mod test { use super::{tcp::TcpConfig, udp::UdpConfig, SocketConfig}; use crate::{ - codecs::{DecodingConfig, NewlineDelimitedDecoderConfig}, + codecs::NewlineDelimitedDecoderConfig, config::{ log_schema, ComponentKey, GlobalOptions, SinkContext, SourceConfig, SourceContext, }, @@ -180,6 +195,8 @@ mod test { Pipeline, }; use bytes::Bytes; + #[cfg(unix)] + use futures::channel::mpsc::Receiver; use futures::{stream, StreamExt}; use std::{ net::{SocketAddr, UdpSocket}, @@ -236,6 +253,29 @@ mod test { SOURCE_TESTS.assert(&TCP_SOURCE_TAGS); } + #[tokio::test] + async fn tcp_splits_on_newline() { + let (tx, rx) = Pipeline::new_test(); + let addr = next_addr(); + + let server = SocketConfig::from(TcpConfig::from_address(addr.into())) + .build(SourceContext::new_test(tx)) + .await + .unwrap(); + tokio::spawn(server); + + wait_for_tcp(addr).await; + send_lines(addr, vec!["foo\nbar".to_owned()].into_iter()) + .await + .unwrap(); + + let events = collect_n(rx, 2).await; + + assert_eq!(events.len(), 2); + assert_eq!(events[0].as_log()[log_schema().message_key()], "foo".into()); + assert_eq!(events[1].as_log()[log_schema().message_key()], "bar".into()); + } + #[tokio::test] async fn tcp_it_includes_source_type() { components::init_test(); @@ -269,11 +309,8 @@ mod test { let addr = next_addr(); let mut config = TcpConfig::from_address(addr.into()); - config.set_decoding(DecodingConfig::new( - Some(Box::new( - NewlineDelimitedDecoderConfig::new_with_max_length(10), - )), - None, + config.set_framing(Box::new( + NewlineDelimitedDecoderConfig::new_with_max_length(10), )); let server = SocketConfig::from(config) @@ -575,20 +612,16 @@ mod test { } #[tokio::test] - async fn udp_multiple_messages() { + async fn udp_message_preserves_newline() { let (tx, rx) = Pipeline::new_test(); let address = init_udp(tx).await; - send_lines_udp(address, vec!["test\ntest2".to_string()]); - let events = collect_n(rx, 2).await; + send_lines_udp(address, vec!["foo\nbar".to_string()]); + let events = collect_n(rx, 1).await; assert_eq!( events[0].as_log()[log_schema().message_key()], - "test".into() - ); - assert_eq!( - events[1].as_log()[log_schema().message_key()], - "test2".into() + "foo\nbar".into() ); } @@ -742,42 +775,13 @@ mod test { } #[cfg(unix)] - async fn unix_message(stream: bool) { - let (tx, rx) = Pipeline::new_test(); - let path = init_unix(tx, stream).await; - - unix_send_lines(stream, path, &["test"]).await; - - let events = collect_n(rx, 1).await; - - assert_eq!(1, events.len()); - assert_eq!( - events[0].as_log()[log_schema().message_key()], - "test".into() - ); - assert_eq!( - events[0].as_log()[log_schema().source_type_key()], - "socket".into() - ); - } - - #[cfg(unix)] - async fn unix_multiple_messages(stream: bool) { + async fn unix_message(message: &str, stream: bool) -> Receiver { let (tx, rx) = Pipeline::new_test(); let path = init_unix(tx, stream).await; - unix_send_lines(stream, path, &["test\ntest2"]).await; - let events = collect_n(rx, 2).await; + unix_send_lines(stream, path, &[message]).await; - assert_eq!(2, events.len()); - assert_eq!( - events[0].as_log()[log_schema().message_key()], - "test".into() - ); - assert_eq!( - events[1].as_log()[log_schema().message_key()], - "test2".into() - ); + rx } #[cfg(unix)] @@ -818,7 +822,7 @@ mod test { socket.connect(path).unwrap(); for line in lines { - socket.send(format!("{}\n", line).as_bytes()).await.unwrap(); + socket.send(line.as_bytes()).await.unwrap(); } socket.shutdown(std::net::Shutdown::Both).unwrap(); } @@ -826,13 +830,35 @@ mod test { #[cfg(unix)] #[tokio::test] async fn unix_datagram_message() { - unix_message(false).await + let rx = unix_message("test", false).await; + let events = collect_n(rx, 1).await; + + assert_eq!(events.len(), 1); + assert_eq!( + events[0].as_log()[log_schema().message_key()], + "test".into() + ); + assert_eq!( + events[0].as_log()[log_schema().source_type_key()], + "socket".into() + ); } #[cfg(unix)] #[tokio::test] - async fn unix_datagram_multiple_messages() { - unix_multiple_messages(false).await + async fn unix_datagram_message_preserves_newline() { + let rx = unix_message("foo\nbar", false).await; + let events = collect_n(rx, 1).await; + + assert_eq!(events.len(), 1); + assert_eq!( + events[0].as_log()[log_schema().message_key()], + "foo\nbar".into() + ); + assert_eq!( + events[0].as_log()[log_schema().source_type_key()], + "socket".into() + ); } #[cfg(unix)] @@ -865,13 +891,37 @@ mod test { #[cfg(unix)] #[tokio::test] async fn unix_stream_message() { - unix_message(true).await + let rx = unix_message("test", true).await; + let events = collect_n(rx, 1).await; + + assert_eq!(1, events.len()); + assert_eq!( + events[0].as_log()[log_schema().message_key()], + "test".into() + ); + assert_eq!( + events[0].as_log()[log_schema().source_type_key()], + "socket".into() + ); } #[cfg(unix)] #[tokio::test] - async fn unix_stream_multiple_messages() { - unix_multiple_messages(true).await + async fn unix_stream_message_splits_on_newline() { + let rx = unix_message("foo\nbar", true).await; + let events = collect_n(rx, 2).await; + + assert_eq!(events.len(), 2); + assert_eq!(events[0].as_log()[log_schema().message_key()], "foo".into()); + assert_eq!( + events[0].as_log()[log_schema().source_type_key()], + "socket".into() + ); + assert_eq!(events[1].as_log()[log_schema().message_key()], "bar".into()); + assert_eq!( + events[1].as_log()[log_schema().source_type_key()], + "socket".into() + ); } #[cfg(unix)] diff --git a/src/sources/socket/tcp.rs b/src/sources/socket/tcp.rs index 856c74642f6f3..793e68d693cfb 100644 --- a/src/sources/socket/tcp.rs +++ b/src/sources/socket/tcp.rs @@ -1,7 +1,8 @@ use crate::{ - codecs::{self, DecodingConfig}, + codecs::{self, FramingConfig, ParserConfig}, event::Event, internal_events::{SocketEventsReceived, SocketMode}, + serde::{default_decoding, default_framing_stream_based}, sources::util::{SocketListenAddr, TcpSource}, tcp::TcpKeepaliveConfig, tls::TlsConfig, @@ -26,9 +27,12 @@ pub struct TcpConfig { tls: Option, #[get_copy = "pub"] receive_buffer_bytes: Option, - #[serde(flatten, default)] + #[serde(default = "default_framing_stream_based")] #[getset(get = "pub", set = "pub")] - decoding: DecodingConfig, + framing: Box, + #[serde(default = "default_decoding")] + #[getset(get = "pub", set = "pub")] + decoding: Box, } const fn default_shutdown_timeout_secs() -> u64 { @@ -36,14 +40,15 @@ const fn default_shutdown_timeout_secs() -> u64 { } impl TcpConfig { - pub const fn new( + pub fn new( address: SocketListenAddr, keepalive: Option, shutdown_timeout_secs: u64, host_key: Option, tls: Option, receive_buffer_bytes: Option, - decoding: DecodingConfig, + framing: Box, + decoding: Box, ) -> Self { Self { address, @@ -52,6 +57,7 @@ impl TcpConfig { host_key, tls, receive_buffer_bytes, + framing, decoding, } } @@ -64,7 +70,8 @@ impl TcpConfig { host_key: None, tls: None, receive_buffer_bytes: None, - decoding: DecodingConfig::default(), + framing: default_framing_stream_based(), + decoding: default_decoding(), } } } diff --git a/src/sources/socket/udp.rs b/src/sources/socket/udp.rs index fe151738e6d2b..2b0e4e64bea07 100644 --- a/src/sources/socket/udp.rs +++ b/src/sources/socket/udp.rs @@ -1,7 +1,8 @@ use crate::{ - codecs::{Decoder, DecodingConfig}, + codecs::{Decoder, FramingConfig, ParserConfig}, event::Event, internal_events::{SocketEventsReceived, SocketMode}, + serde::{default_decoding, default_framing_message_based}, shutdown::ShutdownSignal, sources::{util::TcpError, Source}, udp, Pipeline, @@ -28,9 +29,12 @@ pub struct UdpConfig { host_key: Option, #[get_copy = "pub"] receive_buffer_bytes: Option, - #[serde(flatten, default)] + #[serde(default = "default_framing_message_based")] #[get = "pub"] - decoding: DecodingConfig, + framing: Box, + #[serde(default = "default_decoding")] + #[get = "pub"] + decoding: Box, } impl UdpConfig { @@ -40,7 +44,8 @@ impl UdpConfig { max_length: crate::serde::default_max_length(), host_key: None, receive_buffer_bytes: None, - decoding: Default::default(), + framing: default_framing_message_based(), + decoding: default_decoding(), } } } diff --git a/src/sources/socket/unix.rs b/src/sources/socket/unix.rs index c8542508a80e7..050b298ace970 100644 --- a/src/sources/socket/unix.rs +++ b/src/sources/socket/unix.rs @@ -1,7 +1,8 @@ use crate::{ - codecs::{Decoder, DecodingConfig}, + codecs::{Decoder, FramingConfig, ParserConfig}, event::Event, internal_events::{SocketEventsReceived, SocketMode}, + serde::default_decoding, shutdown::ShutdownSignal, sources::{ util::{build_unix_datagram_source, build_unix_stream_source}, @@ -20,8 +21,10 @@ pub struct UnixConfig { #[serde(default = "crate::serde::default_max_length")] pub max_length: usize, pub host_key: Option, - #[serde(flatten, default)] - pub decoding: DecodingConfig, + #[serde(default)] + pub framing: Option>, + #[serde(default = "default_decoding")] + pub decoding: Box, } impl UnixConfig { @@ -30,7 +33,8 @@ impl UnixConfig { path, max_length: crate::serde::default_max_length(), host_key: None, - decoding: Default::default(), + framing: None, + decoding: default_decoding(), } } } diff --git a/src/sources/stdin.rs b/src/sources/stdin.rs index d89ee2b039bdf..1ccf71e803520 100644 --- a/src/sources/stdin.rs +++ b/src/sources/stdin.rs @@ -1,7 +1,8 @@ use crate::{ - codecs::DecodingConfig, + codecs::{DecodingConfig, FramingConfig, ParserConfig}, config::{log_schema, DataType, Resource, SourceConfig, SourceContext, SourceDescription}, internal_events::StdinEventsReceived, + serde::{default_decoding, default_framing_stream_based}, shutdown::ShutdownSignal, sources::util::TcpError, Pipeline, @@ -19,8 +20,10 @@ pub struct StdinConfig { #[serde(default = "crate::serde::default_max_length")] pub max_length: usize, pub host_key: Option, - #[serde(flatten)] - pub decoding: DecodingConfig, + #[serde(default = "default_framing_stream_based")] + pub framing: Box, + #[serde(default = "default_decoding")] + pub decoding: Box, } impl Default for StdinConfig { @@ -28,7 +31,8 @@ impl Default for StdinConfig { StdinConfig { max_length: crate::serde::default_max_length(), host_key: Default::default(), - decoding: Default::default(), + framing: default_framing_stream_based(), + decoding: default_decoding(), } } } @@ -77,7 +81,7 @@ where .host_key .unwrap_or_else(|| log_schema().host_key().to_string()); let hostname = crate::get_hostname().ok(); - let decoder = config.decoding.build()?; + let decoder = DecodingConfig::new(config.framing.clone(), config.decoding.clone()).build()?; let (mut sender, receiver) = mpsc::channel(1024); diff --git a/website/cue/reference/components.cue b/website/cue/reference/components.cue index 87bb88445f499..d25df11622098 100644 --- a/website/cue/reference/components.cue +++ b/website/cue/reference/components.cue @@ -269,7 +269,8 @@ components: { } #FeaturesCodecs: { - enabled: bool + enabled: bool + default_framing: string } #FeaturesEncoding: { diff --git a/website/cue/reference/components/sources.cue b/website/cue/reference/components/sources.cue index 3943b39aa481c..bf06c9708534c 100644 --- a/website/cue/reference/components/sources.cue +++ b/website/cue/reference/components/sources.cue @@ -80,10 +80,12 @@ components: sources: [Name=string]: { type: object: options: { method: { description: "The framing method." - required: true + required: false + common: true type: string: { + default: features.codecs.default_framing enum: { - bytes: "Byte frames are passed through as-is according to the underlying I/O boundaries (e.g. split between packets for UDP or between segments for TCP)." + bytes: "Byte frames are passed through as-is according to the underlying I/O boundaries (e.g. split between messages or stream segments)." character_delimited: "Byte frames which are delimited by a chosen character." length_delimited: "Byte frames whose length is encoded in a header." newline_delimited: "Byte frames which are delimited by a newline character." @@ -162,8 +164,10 @@ components: sources: [Name=string]: { type: object: options: { codec: { description: "The decoding method." - required: true + required: false + common: true type: string: { + default: "bytes" enum: { bytes: "Events containing the byte frame as-is." json: "Events being parsed from a JSON string." diff --git a/website/cue/reference/components/sources/datadog_agent.cue b/website/cue/reference/components/sources/datadog_agent.cue index b6c3f58894821..e2d8683ac6e47 100644 --- a/website/cue/reference/components/sources/datadog_agent.cue +++ b/website/cue/reference/components/sources/datadog_agent.cue @@ -21,7 +21,10 @@ components: sources: datadog_agent: { features: { multiline: enabled: false - codecs: enabled: true + codecs: { + enabled: true + default_framing: "bytes" + } receive: { from: { service: services.datadog_agent diff --git a/website/cue/reference/components/sources/exec.cue b/website/cue/reference/components/sources/exec.cue index f896556d2509c..f642343bb7aad 100644 --- a/website/cue/reference/components/sources/exec.cue +++ b/website/cue/reference/components/sources/exec.cue @@ -14,7 +14,10 @@ components: sources: exec: { features: { multiline: enabled: false - codecs: enabled: true + codecs: { + enabled: true + default_framing: "newline_delimited" + } receive: { from: { service: services.exec diff --git a/website/cue/reference/components/sources/generator.cue b/website/cue/reference/components/sources/generator.cue index 5697e433437fe..18acb7a7ff4e0 100644 --- a/website/cue/reference/components/sources/generator.cue +++ b/website/cue/reference/components/sources/generator.cue @@ -18,7 +18,10 @@ components: sources: generator: { features: { multiline: enabled: false - codecs: enabled: true + codecs: { + enabled: true + default_framing: "bytes" + } generate: {} } diff --git a/website/cue/reference/components/sources/heroku_logs.cue b/website/cue/reference/components/sources/heroku_logs.cue index b6a43752046c3..dce5f02c69699 100644 --- a/website/cue/reference/components/sources/heroku_logs.cue +++ b/website/cue/reference/components/sources/heroku_logs.cue @@ -22,7 +22,10 @@ components: sources: heroku_logs: { features: { multiline: enabled: false - codecs: enabled: true + codecs: { + enabled: true + default_framing: "bytes" + } receive: { from: { service: services.heroku diff --git a/website/cue/reference/components/sources/http.cue b/website/cue/reference/components/sources/http.cue index 17fa2f8fea20d..cd4c7e58d4f3a 100644 --- a/website/cue/reference/components/sources/http.cue +++ b/website/cue/reference/components/sources/http.cue @@ -16,7 +16,10 @@ components: sources: http: { features: { multiline: enabled: false - codecs: enabled: true + codecs: { + enabled: true + default_framing: "newline_delimited" + } receive: { from: { service: services.http diff --git a/website/cue/reference/components/sources/kafka.cue b/website/cue/reference/components/sources/kafka.cue index 92cf8ca126471..95a3968d2fa7a 100644 --- a/website/cue/reference/components/sources/kafka.cue +++ b/website/cue/reference/components/sources/kafka.cue @@ -16,7 +16,10 @@ components: sources: kafka: { from: components._kafka.features.collect.from } multiline: enabled: false - codecs: enabled: true + codecs: { + enabled: true + default_framing: "bytes" + } } classes: { diff --git a/website/cue/reference/components/sources/nats.cue b/website/cue/reference/components/sources/nats.cue index f1b5ac94beb17..ebc4f21e506e1 100644 --- a/website/cue/reference/components/sources/nats.cue +++ b/website/cue/reference/components/sources/nats.cue @@ -9,7 +9,10 @@ components: sources: nats: { from: components._nats.features.collect.from } multiline: enabled: false - codecs: enabled: true + codecs: { + enabled: true + default_framing: "bytes" + } } classes: { diff --git a/website/cue/reference/components/sources/socket.cue b/website/cue/reference/components/sources/socket.cue index bc338375f234d..87f83b2e0d339 100644 --- a/website/cue/reference/components/sources/socket.cue +++ b/website/cue/reference/components/sources/socket.cue @@ -16,7 +16,10 @@ components: sources: socket: { features: { multiline: enabled: false - codecs: enabled: true + codecs: { + enabled: true + default_framing: "`newline_delimited` for TCP and Unix stream, `bytes` for UDP and Unix datagram" + } receive: { from: { service: services.socket_client diff --git a/website/cue/reference/components/sources/stdin.cue b/website/cue/reference/components/sources/stdin.cue index 2d8f8a758859c..046bf75024baa 100644 --- a/website/cue/reference/components/sources/stdin.cue +++ b/website/cue/reference/components/sources/stdin.cue @@ -14,7 +14,10 @@ components: sources: stdin: { features: { multiline: enabled: false - codecs: enabled: true + codecs: { + enabled: true + default_framing: "newline_delimited" + } receive: { from: { service: services.stdin From 05b8a034f18cd0a4124710786dbaa4bb511be08f Mon Sep 17 00:00:00 2001 From: Jesse Szwedko Date: Thu, 14 Oct 2021 19:09:08 -0500 Subject: [PATCH 08/38] fix(codecs): Update bytes codec to actually read full message (#9613) Previously, it would return whatever was in the buffer when `decode` was called, but there may be still more bytes to read. Instead, we just return `Ok(None)`, signaling to the caller that it should read more, until we hit EOF and then return the full payload. Fixes: #9564 Signed-off-by: Jesse Szwedko --- src/codecs/framers/bytes.rs | 27 +++++++++++---------------- 1 file changed, 11 insertions(+), 16 deletions(-) diff --git a/src/codecs/framers/bytes.rs b/src/codecs/framers/bytes.rs index c6dd5348e79e6..daa3b767664c0 100644 --- a/src/codecs/framers/bytes.rs +++ b/src/codecs/framers/bytes.rs @@ -48,27 +48,18 @@ impl Decoder for BytesCodec { type Item = Bytes; type Error = BoxedFramingError; - fn decode(&mut self, src: &mut BytesMut) -> Result, Self::Error> { - // We don't support emitting empty frames in stream based decoding, - // since this will currently result in an infinite loop when using - // `FramedRead`. - self.flushed = true; - if src.is_empty() { - Ok(None) - } else { - let frame = src.split(); - Ok(Some(frame.freeze())) - } + fn decode(&mut self, _src: &mut BytesMut) -> Result, Self::Error> { + self.flushed = false; + Ok(None) } fn decode_eof(&mut self, src: &mut BytesMut) -> Result, Self::Error> { - if !self.flushed { + if self.flushed && src.is_empty() { + Ok(None) + } else { self.flushed = true; let frame = src.split(); Ok(Some(frame.freeze())) - } else { - self.flushed = false; - Ok(None) } } } @@ -84,7 +75,11 @@ mod tests { let mut input = BytesMut::from("some bytes"); let mut decoder = BytesCodec::new(); - assert_eq!(decoder.decode(&mut input).unwrap().unwrap(), "some bytes"); + assert_eq!(decoder.decode(&mut input).unwrap(), None); + assert_eq!( + decoder.decode_eof(&mut input).unwrap().unwrap(), + "some bytes" + ); assert_eq!(decoder.decode(&mut input).unwrap(), None); } From 9f68c3ecd0acde9fd247a7fc06a43bdb51c09d65 Mon Sep 17 00:00:00 2001 From: Jesse Szwedko Date: Thu, 14 Oct 2021 22:08:17 -0500 Subject: [PATCH 09/38] fix(sources): Re-add max_length configuration (#9621) This re-wires up `max_length` which was inadvertently dropped during the source codec work. Of the places it it appears: * `stdin` it was actually non-functional since v0.6.0 so it seems unlikely anyone was actually using it. I left it for now, but we can drop it in v0.18.0. * `socket`: * `udp`: I rewired this by switching it back closer to its original implementation (which mirrors `unix_datagram`) where the `recv_from` is executed first and then that is fed through the decoder. This should restore the previous functionality. The previous functionality, by way of `recv_from` is actually just to silently drop any byte that don't fit in the buffer so the docs have been updated. We could rethink this in future releases. * `unix_datagram`: Was still wired up as before. * `unix_stream`: Wired back up to default the max length for the `NewlineDelimitedDecoderConfig` when specified. If both framing and it are specified, returns an error. * `tcp`: Wired back up to default the max length for the `NewlineDelimitedDecoderConfig` when specified. If both framing and it are specified, returns an error. * `syslog`: Correctly passed through to socket config I think we can remove the `max_length` config in a future release after deprecating it for 0.18.0. Signed-off-by: Jesse Szwedko --- src/sources/socket/mod.rs | 56 ++++++++--- src/sources/socket/tcp.rs | 14 ++- src/sources/socket/udp.rs | 99 ++++++++++++------- src/sources/socket/unix.rs | 5 +- .../reference/components/sources/socket.cue | 7 +- .../reference/components/sources/syslog.cue | 2 +- 6 files changed, 121 insertions(+), 62 deletions(-) diff --git a/src/sources/socket/mod.rs b/src/sources/socket/mod.rs index 0b1d015a67f7b..4b3dc0dc7d07c 100644 --- a/src/sources/socket/mod.rs +++ b/src/sources/socket/mod.rs @@ -4,9 +4,9 @@ mod udp; mod unix; #[cfg(unix)] -use crate::serde::{default_framing_message_based, default_framing_stream_based}; +use crate::serde::default_framing_message_based; use crate::{ - codecs::DecodingConfig, + codecs::{DecodingConfig, NewlineDelimitedDecoderConfig}, config::{ log_schema, DataType, GenerateConfig, Resource, SourceConfig, SourceContext, SourceDescription, @@ -83,9 +83,23 @@ impl SourceConfig for SocketConfig { async fn build(&self, cx: SourceContext) -> crate::Result { match self.mode.clone() { Mode::Tcp(config) => { - let decoder = - DecodingConfig::new(config.framing().clone(), config.decoding().clone()) - .build()?; + if config.framing().is_some() && config.max_length().is_some() { + return Err("Using `max_length` is deprecated and does not have any effect when framing is provided. Configure `max_length` on the framing config instead.".into()); + } + + let max_length = config + .max_length() + .unwrap_or_else(crate::serde::default_max_length); + + let framing = match config.framing().as_ref() { + Some(framing) => framing.clone(), + None => Box::new(NewlineDelimitedDecoderConfig::new_with_max_length( + max_length, + )), + }; + + let decoder = DecodingConfig::new(framing, config.decoding().clone()).build()?; + let tcp = tcp::RawTcpSource::new(config.clone(), decoder); let tls = MaybeTlsSettings::from_config(config.tls(), true)?; tcp.run( @@ -108,6 +122,7 @@ impl SourceConfig for SocketConfig { .build()?; Ok(udp::udp( config.address(), + config.max_length(), host_key, config.receive_buffer_bytes(), decoder, @@ -127,7 +142,9 @@ impl SourceConfig for SocketConfig { .build()?; Ok(unix::unix_datagram( config.path, - config.max_length, + config + .max_length + .unwrap_or_else(crate::serde::default_max_length), host_key, decoder, cx.shutdown, @@ -136,14 +153,26 @@ impl SourceConfig for SocketConfig { } #[cfg(unix)] Mode::UnixStream(config) => { + if config.framing.is_some() && config.max_length.is_some() { + return Err("Using `max_length` is deprecated and does not have any effect when framing is provided. Configure `max_length` on the framing config instead.".into()); + } + + let max_length = config + .max_length + .unwrap_or_else(crate::serde::default_max_length); + + let framing = match config.framing.as_ref() { + Some(framing) => framing.clone(), + None => Box::new(NewlineDelimitedDecoderConfig::new_with_max_length( + max_length, + )), + }; + + let decoder = DecodingConfig::new(framing, config.decoding.clone()).build()?; + let host_key = config .host_key .unwrap_or_else(|| log_schema().host_key().to_string()); - let decoder = DecodingConfig::new( - config.framing.unwrap_or_else(default_framing_stream_based), - config.decoding.clone(), - ) - .build()?; Ok(unix::unix_stream( config.path, host_key, @@ -309,9 +338,10 @@ mod test { let addr = next_addr(); let mut config = TcpConfig::from_address(addr.into()); - config.set_framing(Box::new( + config.set_max_length(None); + config.set_framing(Some(Box::new( NewlineDelimitedDecoderConfig::new_with_max_length(10), - )); + ))); let server = SocketConfig::from(config) .build(SourceContext::new_test(tx)) diff --git a/src/sources/socket/tcp.rs b/src/sources/socket/tcp.rs index 793e68d693cfb..6599b711aa68f 100644 --- a/src/sources/socket/tcp.rs +++ b/src/sources/socket/tcp.rs @@ -2,7 +2,7 @@ use crate::{ codecs::{self, FramingConfig, ParserConfig}, event::Event, internal_events::{SocketEventsReceived, SocketMode}, - serde::{default_decoding, default_framing_stream_based}, + serde::default_decoding, sources::util::{SocketListenAddr, TcpSource}, tcp::TcpKeepaliveConfig, tls::TlsConfig, @@ -18,6 +18,8 @@ pub struct TcpConfig { address: SocketListenAddr, #[get_copy = "pub"] keepalive: Option, + #[getset(get_copy = "pub", set = "pub")] + max_length: Option, #[serde(default = "default_shutdown_timeout_secs")] #[getset(get_copy = "pub", set = "pub")] shutdown_timeout_secs: u64, @@ -27,9 +29,8 @@ pub struct TcpConfig { tls: Option, #[get_copy = "pub"] receive_buffer_bytes: Option, - #[serde(default = "default_framing_stream_based")] #[getset(get = "pub", set = "pub")] - framing: Box, + framing: Option>, #[serde(default = "default_decoding")] #[getset(get = "pub", set = "pub")] decoding: Box, @@ -43,16 +44,18 @@ impl TcpConfig { pub fn new( address: SocketListenAddr, keepalive: Option, + max_length: Option, shutdown_timeout_secs: u64, host_key: Option, tls: Option, receive_buffer_bytes: Option, - framing: Box, + framing: Option>, decoding: Box, ) -> Self { Self { address, keepalive, + max_length, shutdown_timeout_secs, host_key, tls, @@ -66,11 +69,12 @@ impl TcpConfig { Self { address, keepalive: None, + max_length: Some(crate::serde::default_max_length()), shutdown_timeout_secs: default_shutdown_timeout_secs(), host_key: None, tls: None, receive_buffer_bytes: None, - framing: default_framing_stream_based(), + framing: None, decoding: default_decoding(), } } diff --git a/src/sources/socket/udp.rs b/src/sources/socket/udp.rs index 2b0e4e64bea07..c00676ceee45d 100644 --- a/src/sources/socket/udp.rs +++ b/src/sources/socket/udp.rs @@ -1,20 +1,19 @@ use crate::{ - codecs::{Decoder, FramingConfig, ParserConfig}, + codecs::{self, Decoder, FramingConfig, ParserConfig}, event::Event, - internal_events::{SocketEventsReceived, SocketMode}, + internal_events::{SocketEventsReceived, SocketMode, SocketReceiveError}, serde::{default_decoding, default_framing_message_based}, shutdown::ShutdownSignal, sources::{util::TcpError, Source}, udp, Pipeline, }; -use async_stream::stream; -use bytes::Bytes; +use bytes::{Bytes, BytesMut}; use futures::{SinkExt, StreamExt}; use getset::{CopyGetters, Getters}; use serde::{Deserialize, Serialize}; use std::net::SocketAddr; use tokio::net::UdpSocket; -use tokio_util::udp::UdpFramed; +use tokio_util::codec::FramedRead; /// UDP processes messages per packet, where messages are separated by newline. #[derive(Deserialize, Serialize, Debug, Clone, Getters, CopyGetters)] @@ -52,10 +51,11 @@ impl UdpConfig { pub fn udp( address: SocketAddr, + max_length: usize, host_key: String, receive_buffer_bytes: Option, decoder: Decoder, - shutdown: ShutdownSignal, + mut shutdown: ShutdownSignal, out: Pipeline, ) -> Source { let mut out = out.sink_map_err(|error| error!(message = "Error sending event.", %error)); @@ -71,45 +71,72 @@ pub fn udp( } } + let max_length = if let Some(receive_buffer_bytes) = receive_buffer_bytes { + std::cmp::min(max_length, receive_buffer_bytes) + } else { + max_length + }; + info!(message = "Listening.", address = %address); - let mut stream = UdpFramed::new(socket, decoder).take_until(shutdown); - (stream! { - loop { - match stream.next().await { - Some(Ok(((events, byte_size), received_from))) => { - emit!(&SocketEventsReceived { + let mut buf = BytesMut::with_capacity(max_length); + loop { + buf.resize(max_length, 0); + tokio::select! { + recv = socket.recv_from(&mut buf) => { + let (byte_size, address) = recv.map_err(|error| { + let error = codecs::Error::FramingError(error.into()); + emit!(&SocketReceiveError { mode: SocketMode::Udp, - byte_size, - count: events.len() - }); + error: &error + }) + })?; - for mut event in events { - if let Event::Log(ref mut log) = event { - log.insert( - crate::config::log_schema().source_type_key(), - Bytes::from("socket"), - ); + let payload = buf.split_to(byte_size); - log.insert(host_key.clone(), received_from.to_string()); - } + let mut stream = FramedRead::new(payload.as_ref(), decoder.clone()); - yield event; - } - } - Some(Err(error)) => { - // Error is logged by `crate::codecs::Decoder`, no - // further handling is needed here. - if !error.can_continue() { - break; + loop { + match stream.next().await { + Some(Ok((events, byte_size))) => { + emit!(&SocketEventsReceived { + mode: SocketMode::Udp, + byte_size, + count: events.len() + }); + + for mut event in events { + if let Event::Log(ref mut log) = event { + log.insert( + crate::config::log_schema().source_type_key(), + Bytes::from("socket"), + ); + + log.insert(host_key.clone(), address.to_string()); + } + + tokio::select!{ + result = out.send(event) => {match result { + Ok(()) => { }, + Err(()) => return Ok(()), + }} + _ = &mut shutdown => return Ok(()), + } + } + } + Some(Err(error)) => { + // Error is logged by `crate::codecs::Decoder`, no + // further handling is needed here. + if !error.can_continue() { + break; + } + } + None => break, } } - None => break, } + _ = &mut shutdown => return Ok(()), } - }) - .map(Ok) - .forward(&mut out) - .await + } }) } diff --git a/src/sources/socket/unix.rs b/src/sources/socket/unix.rs index 050b298ace970..b286b8c6321c1 100644 --- a/src/sources/socket/unix.rs +++ b/src/sources/socket/unix.rs @@ -18,8 +18,7 @@ use std::path::PathBuf; #[serde(deny_unknown_fields)] pub struct UnixConfig { pub path: PathBuf, - #[serde(default = "crate::serde::default_max_length")] - pub max_length: usize, + pub max_length: Option, pub host_key: Option, #[serde(default)] pub framing: Option>, @@ -31,7 +30,7 @@ impl UnixConfig { pub fn new(path: PathBuf) -> Self { Self { path, - max_length: crate::serde::default_max_length(), + max_length: Some(crate::serde::default_max_length()), host_key: None, framing: None, decoding: default_decoding(), diff --git a/website/cue/reference/components/sources/socket.cue b/website/cue/reference/components/sources/socket.cue index 87f83b2e0d339..9effd985d8ccd 100644 --- a/website/cue/reference/components/sources/socket.cue +++ b/website/cue/reference/components/sources/socket.cue @@ -90,10 +90,9 @@ components: sources: socket: { } } max_length: { - common: true - description: "The maximum bytes size of incoming messages before they are discarded." - relevant_when: "mode = `unix_datagram`" - required: false + common: true + description: "The maximum buffer size of incoming messages. Messages larger than this are truncated." + required: false warnings: [] type: uint: { default: 102400 diff --git a/website/cue/reference/components/sources/syslog.cue b/website/cue/reference/components/sources/syslog.cue index 544c40b7b9ec2..fcda08733d500 100644 --- a/website/cue/reference/components/sources/syslog.cue +++ b/website/cue/reference/components/sources/syslog.cue @@ -73,7 +73,7 @@ components: sources: syslog: { } max_length: { common: true - description: "The maximum bytes size of incoming messages before they are discarded." + description: "The maximum buffer size of incoming messages. Messages larger than this are truncated." required: false warnings: [] type: uint: { From 6aa05d0c2b7c29b9e495198295bb43ae908d3dc5 Mon Sep 17 00:00:00 2001 From: Stephen Wakely Date: Fri, 15 Oct 2021 09:23:09 +0100 Subject: [PATCH 10/38] fix(enriching): parse the schema columns using Conversion (#9583) * Parse the schema columns using Conversion Signed-off-by: Stephen Wakely * Trim schema types Signed-off-by: Stephen Wakely * Allow spaces around | in schema type Signed-off-by: Stephen Wakely * Clippy Signed-off-by: Stephen Wakely --- lib/shared/src/conversion.rs | 18 +++-- src/enrichment_tables/file.rs | 142 ++++++++++++++++++++++++++-------- 2 files changed, 118 insertions(+), 42 deletions(-) diff --git a/lib/shared/src/conversion.rs b/lib/shared/src/conversion.rs index 31c447d6863ed..b98bd70538f74 100644 --- a/lib/shared/src/conversion.rs +++ b/lib/shared/src/conversion.rs @@ -89,14 +89,16 @@ impl Conversion { /// * `"timestamp|FORMAT"` => Timestamp using the given format pub fn parse(s: impl AsRef, tz: TimeZone) -> Result { let s = s.as_ref(); - match s { - "asis" | "bytes" | "string" => Ok(Self::Bytes), - "integer" | "int" => Ok(Self::Integer), - "float" => Ok(Self::Float), - "bool" | "boolean" => Ok(Self::Boolean), - "timestamp" => Ok(Self::Timestamp(tz)), - _ if s.starts_with("timestamp|") => { - let fmt = &s[10..]; + let mut split = s.splitn(2, '|').map(|segment| segment.trim()); + match (split.next(), split.next()) { + (Some("asis"), None) | (Some("bytes"), None) | (Some("string"), None) => { + Ok(Self::Bytes) + } + (Some("integer"), None) | (Some("int"), None) => Ok(Self::Integer), + (Some("float"), None) => Ok(Self::Float), + (Some("bool"), None) | (Some("boolean"), None) => Ok(Self::Boolean), + (Some("timestamp"), None) => Ok(Self::Timestamp(tz)), + (Some("timestamp"), Some(fmt)) => { // DateTime can only convert timestamps without // time zones, and DateTime can only // convert with tone zones, so this has to distinguish diff --git a/src/enrichment_tables/file.rs b/src/enrichment_tables/file.rs index 4bc7122ecccd8..436b1e8b92d70 100644 --- a/src/enrichment_tables/file.rs +++ b/src/enrichment_tables/file.rs @@ -37,22 +37,11 @@ struct FileC { encoding: Encoding, } -#[derive(Serialize, Deserialize, Debug, Clone, PartialEq)] -#[serde(rename_all = "snake_case")] -enum SchemaType { - String, - Date, - DateTime, - Integer, - Float, - Boolean, -} - #[derive(Serialize, Deserialize, Default, Debug, Clone, PartialEq)] pub struct FileConfig { file: FileC, #[serde(default)] - schema: HashMap, + schema: HashMap, } const fn default_delimiter() -> char { @@ -70,30 +59,50 @@ impl FileConfig { use chrono::TimeZone; Ok(match self.schema.get(column) { - Some(SchemaType::Date) => Value::Timestamp( - chrono::FixedOffset::east(0) - .from_utc_datetime( - &chrono::NaiveDate::parse_from_str(value, "%Y-%m-%d") + Some(format) => { + let mut split = format.splitn(2, '|').map(|segment| segment.trim()); + + match (split.next(), split.next()) { + (Some("date"), None) => Value::Timestamp( + chrono::FixedOffset::east(0) + .from_utc_datetime( + &chrono::NaiveDate::parse_from_str(value, "%Y-%m-%d") + .map_err(|_| { + format!( + "unable to parse date {} found in row {}", + value, row + ) + })? + .and_hms(0, 0, 0), + ) + .into(), + ), + (Some("date"), Some(format)) => Value::Timestamp( + chrono::FixedOffset::east(0) + .from_utc_datetime( + &chrono::NaiveDate::parse_from_str(value, format) + .map_err(|_| { + format!( + "unable to parse date {} found in row {}", + value, row + ) + })? + .and_hms(0, 0, 0), + ) + .into(), + ), + _ => { + let conversion = + Conversion::parse(format, timezone).map_err(|err| err.to_string())?; + conversion + .convert(Bytes::copy_from_slice(value.as_bytes())) .map_err(|_| { - format!("unable to parse date {} found in row {}", value, row) + format!("unable to parse {} found in row {}", value, row) })? - .and_hms(0, 0, 0), - ) - .into(), - ), - Some(SchemaType::DateTime) => Conversion::Timestamp(timezone) - .convert(Bytes::copy_from_slice(value.as_bytes())) - .map_err(|_| format!("unable to parse datetime {} found in row {}", value, row))?, - Some(SchemaType::Integer) => Conversion::Integer - .convert(Bytes::copy_from_slice(value.as_bytes())) - .map_err(|_| format!("unable to parse integer {} found in row {}", value, row))?, - Some(SchemaType::Float) => Conversion::Boolean - .convert(Bytes::copy_from_slice(value.as_bytes())) - .map_err(|_| format!("unable to parse integer {} found in row {}", value, row))?, - Some(SchemaType::Boolean) => Conversion::Boolean - .convert(Bytes::copy_from_slice(value.as_bytes())) - .map_err(|_| format!("unable to parse integer {} found in row {}", value, row))?, - Some(SchemaType::String) | None => value.into(), + } + } + } + None => value.into(), }) } @@ -520,6 +529,71 @@ mod tests { use chrono::TimeZone; use shared::btreemap; + #[test] + fn parse_column() { + let mut schema = HashMap::new(); + schema.insert("col1".to_string(), " string ".to_string()); + schema.insert("col2".to_string(), " date ".to_string()); + schema.insert("col3".to_string(), "date|%m/%d/%Y".to_string()); + schema.insert("col3-spaces".to_string(), "date | %m %d %Y".to_string()); + schema.insert("col4".to_string(), "timestamp|%+".to_string()); + schema.insert("col4-spaces".to_string(), "timestamp | %+".to_string()); + schema.insert("col5".to_string(), "int".to_string()); + let config = FileConfig { + file: Default::default(), + schema, + }; + + assert_eq!( + Ok(Value::from("zork")), + config.parse_column(Default::default(), "col1", 1, "zork") + ); + + assert_eq!( + Ok(Value::from(chrono::Utc.ymd(2020, 3, 5).and_hms(0, 0, 0))), + config.parse_column(Default::default(), "col2", 1, "2020-03-05") + ); + + assert_eq!( + Ok(Value::from(chrono::Utc.ymd(2020, 3, 5).and_hms(0, 0, 0))), + config.parse_column(Default::default(), "col3", 1, "03/05/2020") + ); + + assert_eq!( + Ok(Value::from(chrono::Utc.ymd(2020, 3, 5).and_hms(0, 0, 0))), + config.parse_column(Default::default(), "col3-spaces", 1, "03 05 2020") + ); + + assert_eq!( + Ok(Value::from( + chrono::Utc.ymd(2001, 7, 7).and_hms_micro(15, 4, 0, 26490) + )), + config.parse_column( + Default::default(), + "col4", + 1, + "2001-07-08T00:34:00.026490+09:30" + ) + ); + + assert_eq!( + Ok(Value::from( + chrono::Utc.ymd(2001, 7, 7).and_hms_micro(15, 4, 0, 26490) + )), + config.parse_column( + Default::default(), + "col4-spaces", + 1, + "2001-07-08T00:34:00.026490+09:30" + ) + ); + + assert_eq!( + Ok(Value::from(42)), + config.parse_column(Default::default(), "col5", 1, "42") + ); + } + #[test] fn seahash() { // Ensure we can separate fields to create a distinct hash. From 44a0fd48979f2a4899674a35c791b7c20f5373c9 Mon Sep 17 00:00:00 2001 From: Danny Browning Date: Fri, 15 Oct 2021 03:56:51 -0600 Subject: [PATCH 11/38] fix(enriching): Fixes enrichment tables in test (again) (#9612) Signed-off-by: dbcfd --- src/config/unit_test.rs | 2 ++ src/topology/builder.rs | 4 ++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/config/unit_test.rs b/src/config/unit_test.rs index e8c58eb0a57c3..b65b76fe9b1e8 100644 --- a/src/config/unit_test.rs +++ b/src/config/unit_test.rs @@ -575,6 +575,8 @@ async fn build_unit_test( ); } + enrichment_tables.finish_load(); + if !errors.is_empty() { Err(errors) } else { diff --git a/src/topology/builder.rs b/src/topology/builder.rs index 0dba59a872dd3..60c0c66c9564c 100644 --- a/src/topology/builder.rs +++ b/src/topology/builder.rs @@ -488,7 +488,7 @@ pub async fn build_pieces( // We should have all the data for the enrichment tables loaded now, so switch them over to // readonly. - ENRICHMENT_TABLES.finish_load(); + enrichment_tables.finish_load(); let mut finalized_outputs = HashMap::new(); for (id, output) in outputs { @@ -507,7 +507,7 @@ pub async fn build_pieces( healthchecks, shutdown_coordinator, detach_triggers, - enrichment_tables: ENRICHMENT_TABLES.clone(), + enrichment_tables: enrichment_tables.clone(), }; Ok(pieces) From 58dfee0443511c4e51378cb33e22408cbe705556 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 15 Oct 2021 09:57:13 -0500 Subject: [PATCH 12/38] chore(deps): bump cached from 0.25.0 to 0.25.1 (#9630) Bumps [cached](https://github.com/jaemk/cached) from 0.25.0 to 0.25.1. - [Release notes](https://github.com/jaemk/cached/releases) - [Changelog](https://github.com/jaemk/cached/blob/master/CHANGELOG.md) - [Commits](https://github.com/jaemk/cached/commits) --- updated-dependencies: - dependency-name: cached dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- Cargo.lock | 54 +++++---------------------------------- lib/vrl/stdlib/Cargo.toml | 2 +- 2 files changed, 7 insertions(+), 49 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 6da7b33eb54a9..d909ec7c17af0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -876,28 +876,27 @@ checksum = "631ae5198c9be5e753e5cc215e1bd73c2b466a3565173db433f52bb9d3e66dba" [[package]] name = "cached" -version = "0.25.0" +version = "0.25.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b99e696f7b2696ed5eae0d462a9eeafaea111d99e39b2c8ceb418afe1013bcfc" +checksum = "14d3b04f85a6ef9fe543b2564ec8630bdf3363aa9bf664a1bfc85033e7350aaf" dependencies = [ "async-mutex", "async-trait", "cached_proc_macro", "cached_proc_macro_types", "futures 0.3.17", - "hashbrown 0.9.1", + "hashbrown 0.11.2", "once_cell", ] [[package]] name = "cached_proc_macro" -version = "0.6.1" +version = "0.6.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "25a685ba39b57a91a53d149dcbef854f50fbe204d1ff6081ea0bec3529a0c456" +checksum = "4230b8d9f5db741004bfaef172c5b2dbf0eb94f105204cc6147a220080daaa85" dependencies = [ - "async-mutex", "cached_proc_macro_types", - "darling 0.10.2", + "darling 0.13.0", "quote 1.0.9", "syn 1.0.76", ] @@ -1508,16 +1507,6 @@ dependencies = [ "zeroize", ] -[[package]] -name = "darling" -version = "0.10.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0d706e75d87e35569db781a9b5e2416cff1236a47ed380831f959382ccd5f858" -dependencies = [ - "darling_core 0.10.2", - "darling_macro 0.10.2", -] - [[package]] name = "darling" version = "0.12.4" @@ -1538,20 +1527,6 @@ dependencies = [ "darling_macro 0.13.0", ] -[[package]] -name = "darling_core" -version = "0.10.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f0c960ae2da4de88a91b2d920c2a7233b400bc33cb28453a2987822d8392519b" -dependencies = [ - "fnv", - "ident_case", - "proc-macro2 1.0.29", - "quote 1.0.9", - "strsim 0.9.3", - "syn 1.0.76", -] - [[package]] name = "darling_core" version = "0.12.4" @@ -1580,17 +1555,6 @@ dependencies = [ "syn 1.0.76", ] -[[package]] -name = "darling_macro" -version = "0.10.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d9b5a2f4ac4969822c62224815d069952656cadc7084fdca9751e6d959189b72" -dependencies = [ - "darling_core 0.10.2", - "quote 1.0.9", - "syn 1.0.76", -] - [[package]] name = "darling_macro" version = "0.12.4" @@ -6440,12 +6404,6 @@ version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a" -[[package]] -name = "strsim" -version = "0.9.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6446ced80d6c486436db5c078dde11a9f73d42b57fb273121e160b84f63d894c" - [[package]] name = "strsim" version = "0.10.0" diff --git a/lib/vrl/stdlib/Cargo.toml b/lib/vrl/stdlib/Cargo.toml index fe429082617c0..a333a1bf86e95 100644 --- a/lib/vrl/stdlib/Cargo.toml +++ b/lib/vrl/stdlib/Cargo.toml @@ -41,7 +41,7 @@ uuid = { version = "0.8", features = ["v4"], optional = true } roxmltree = { version = "0.14.1", optional = true } woothee = { version = "0.12.1", optional = true } uaparser = { version = "0.4.0", optional = true } -cached = { version = "0.25.0", optional = true } +cached = { version = "0.25.1", optional = true } [dev-dependencies] anyhow = "1" From 468dd2680962d5dc6aa69ccc5f467e1536bae8fa Mon Sep 17 00:00:00 2001 From: Stephen Wakely Date: Fri, 15 Oct 2021 17:35:07 +0100 Subject: [PATCH 13/38] enhancement(enriching): added behaviour test for enrichment tables (#9633) * Added behaviour test for enrichment tables Signed-off-by: Stephen Wakely * spaces Signed-off-by: Stephen Wakely --- tests/behavior/transforms/remap.toml | 51 ++++++++++++++++++++++++++++ tests/data/enrichment.csv | 8 +++++ 2 files changed, 59 insertions(+) create mode 100644 tests/data/enrichment.csv diff --git a/tests/behavior/transforms/remap.toml b/tests/behavior/transforms/remap.toml index f034f78f2c9ee..449be178bfef4 100644 --- a/tests/behavior/transforms/remap.toml +++ b/tests/behavior/transforms/remap.toml @@ -2204,3 +2204,54 @@ source = ''' .host == "localhost" ''' + +[enrichment_tables.test] +type = "file" +file.path = "./tests/data/enrichment.csv" +file.encoding.type = "csv" +schema.id = "integer" + +[transforms.remap_function_get_enrichment_table_row] + inputs = [] + type = "remap" + source = ''' + . = get_enrichment_table_record!("test", { "id": .id }) + ''' +[[tests]] + name = "remap_function_get_enrichment_table_row" + [tests.input] + insert_at = "remap_function_get_enrichment_table_row" + type = "log" + [tests.input.log_fields] + id = 3 + [[tests.outputs]] + extract_from = "remap_function_get_enrichment_table_row" + [[tests.outputs.conditions]] + "type" = "vrl" + source = ''' + .tree == "yew" + ''' + +[transforms.remap_function_find_enrichment_table_rows] + inputs = [] + type = "remap" + source = ''' + .trees = find_enrichment_table_records!("test", { "tree": .tree }) + ''' +[[tests]] + name = "remap_function_find_enrichment_table_rows" + [tests.input] + insert_at = "remap_function_find_enrichment_table_rows" + type = "log" + [tests.input.log_fields] + tree = "yew" + [[tests.outputs]] + extract_from = "remap_function_find_enrichment_table_rows" + [[tests.outputs.conditions]] + "type" = "vrl" + source = ''' + .trees == [{"id": 3, "tree": "yew"}, + {"id": 5, "tree": "yew"}, + {"id": 7, "tree": "yew"}] + ''' + diff --git a/tests/data/enrichment.csv b/tests/data/enrichment.csv new file mode 100644 index 0000000000000..0ebc631c3bc21 --- /dev/null +++ b/tests/data/enrichment.csv @@ -0,0 +1,8 @@ +id,tree +1,oak +2,ash +3,yew +4,beech +5,yew +6,beech +7,yew From b1f83d5d8ccc1827295f0a11ba11653f75ff9eff Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 15 Oct 2021 11:53:36 -0500 Subject: [PATCH 14/38] chore(deps): bump redis from 0.21.2 to 0.21.3 (#9635) Bumps [redis](https://github.com/mitsuhiko/redis-rs) from 0.21.2 to 0.21.3. - [Release notes](https://github.com/mitsuhiko/redis-rs/releases) - [Changelog](https://github.com/mitsuhiko/redis-rs/blob/master/CHANGELOG.md) - [Commits](https://github.com/mitsuhiko/redis-rs/compare/0.21.2...0.21.3) --- updated-dependencies: - dependency-name: redis dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- Cargo.lock | 4 ++-- Cargo.toml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index d909ec7c17af0..8c7a9cd37bf5b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5179,9 +5179,9 @@ dependencies = [ [[package]] name = "redis" -version = "0.21.2" +version = "0.21.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "202c5bf92cad3d57605c366e644a7fbf305a83f19754fc66678c6265dcc9b8b4" +checksum = "dd71bdb3d0d6e9183e675c977f652fbf8abc3b63fcb722e9abb42f82ef839b65" dependencies = [ "arc-swap", "async-trait", diff --git a/Cargo.toml b/Cargo.toml index 125dc13d3a492..e92ade7e8474f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -254,7 +254,7 @@ pulsar = { version = "4.1", default-features = false, features = ["tokio-runtime rand = { version = "0.8.4", default-features = false, features = ["small_rng"] } rand_distr = { version = "0.4.2", default-features = false } rdkafka = { version = "0.26.0", default-features = false, features = ["tokio", "libz", "ssl", "zstd"], optional = true } -redis = { version = "0.21.2", default-features = false, features = ["connection-manager", "tokio-comp", "tokio-native-tls-comp"], optional = true } +redis = { version = "0.21.3", default-features = false, features = ["connection-manager", "tokio-comp", "tokio-native-tls-comp"], optional = true } regex = { version = "1.5.4", default-features = false, features = ["std", "perf"] } seahash = { version = "4.1.0", default-features = false, optional = true } semver = { version = "1.0.4", default-features = false, features = ["serde", "std"], optional = true } From 86753fc29c9c0a56be703ae27580599aba258df8 Mon Sep 17 00:00:00 2001 From: Jesse Szwedko Date: Fri, 15 Oct 2021 12:57:18 -0500 Subject: [PATCH 15/38] fix(codecs): Drop default max framing length (#9625) * fix(codecs): Drop default max framing length This matches existing behavior pre-codec work. That is, for sources that used used tokio codecs for framing previously either used `max_length` from the config or did not specify a max length. I think this will more closely match expectations, especially since the default was relatively small (100 kib). Signed-off-by: Jesse Szwedko --- src/codecs/framers/character_delimited.rs | 20 +++++++++----- src/codecs/framers/newline_delimited.rs | 28 +++++++++++++------- src/codecs/framers/octet_counting.rs | 22 +++++++++------ src/generate.rs | 9 ------- website/cue/reference/components/sources.cue | 6 ++--- 5 files changed, 48 insertions(+), 37 deletions(-) diff --git a/src/codecs/framers/character_delimited.rs b/src/codecs/framers/character_delimited.rs index 761b3ec7a1722..f391421564f76 100644 --- a/src/codecs/framers/character_delimited.rs +++ b/src/codecs/framers/character_delimited.rs @@ -11,24 +11,30 @@ pub struct CharacterDelimitedDecoderConfig { } /// Options for building a `CharacterDelimitedCodec`. -#[derive(Debug, Clone, Deserialize, Serialize)] +#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)] pub struct CharacterDelimitedDecoderOptions { /// The character that delimits byte sequences. delimiter: char, /// The maximum length of the byte buffer. /// /// This length does *not* include the trailing delimiter. - #[serde(default = "crate::serde::default_max_length")] - max_length: usize, + #[serde(skip_serializing_if = "crate::serde::skip_serializing_if_default")] + max_length: Option, } #[typetag::serde(name = "character_delimited")] impl FramingConfig for CharacterDelimitedDecoderConfig { fn build(&self) -> crate::Result { - Ok(Box::new(CharacterDelimitedCodec::new_with_max_length( - self.character_delimited.delimiter, - self.character_delimited.max_length, - ))) + if let Some(max_length) = self.character_delimited.max_length { + Ok(Box::new(CharacterDelimitedCodec::new_with_max_length( + self.character_delimited.delimiter, + max_length, + ))) + } else { + Ok(Box::new(CharacterDelimitedCodec::new( + self.character_delimited.delimiter, + ))) + } } } diff --git a/src/codecs/framers/newline_delimited.rs b/src/codecs/framers/newline_delimited.rs index 743a07219d2df..8a4267e069506 100644 --- a/src/codecs/framers/newline_delimited.rs +++ b/src/codecs/framers/newline_delimited.rs @@ -4,28 +4,32 @@ use serde::{Deserialize, Serialize}; use tokio_util::codec::Decoder; /// Config used to build a `NewlineDelimitedCodec`. -#[derive(Debug, Clone, Default, Deserialize, Serialize)] +#[derive(Debug, Clone, Default, Deserialize, Serialize, PartialEq)] pub struct NewlineDelimitedDecoderConfig { - #[serde(default)] + #[serde( + default, + skip_serializing_if = "crate::serde::skip_serializing_if_default" + )] newline_delimited: NewlineDelimitedDecoderOptions, } /// Options for building a `NewlineDelimitedCodec`. -#[derive(Debug, Clone, Derivative, Deserialize, Serialize)] +#[derive(Debug, Clone, Derivative, Deserialize, Serialize, PartialEq)] #[derivative(Default)] pub struct NewlineDelimitedDecoderOptions { /// The maximum length of the byte buffer. /// /// This length does *not* include the trailing delimiter. - #[serde(default = "crate::serde::default_max_length")] - #[derivative(Default(value = "crate::serde::default_max_length()"))] - max_length: usize, + #[serde(skip_serializing_if = "crate::serde::skip_serializing_if_default")] + max_length: Option, } impl NewlineDelimitedDecoderOptions { /// Creates a `NewlineDelimitedDecoderOptions` with a maximum frame length limit. pub const fn new_with_max_length(max_length: usize) -> Self { - Self { max_length } + Self { + max_length: Some(max_length), + } } } @@ -46,9 +50,13 @@ impl NewlineDelimitedDecoderConfig { #[typetag::serde(name = "newline_delimited")] impl FramingConfig for NewlineDelimitedDecoderConfig { fn build(&self) -> crate::Result { - Ok(Box::new(NewlineDelimitedCodec::new_with_max_length( - self.newline_delimited.max_length, - ))) + if let Some(max_length) = self.newline_delimited.max_length { + Ok(Box::new(NewlineDelimitedCodec::new_with_max_length( + max_length, + ))) + } else { + Ok(Box::new(NewlineDelimitedCodec::new())) + } } } diff --git a/src/codecs/framers/octet_counting.rs b/src/codecs/framers/octet_counting.rs index 0dfe831e1c770..b367a728c0cfb 100644 --- a/src/codecs/framers/octet_counting.rs +++ b/src/codecs/framers/octet_counting.rs @@ -7,25 +7,31 @@ use tokio_util::codec::{LinesCodec, LinesCodecError}; /// Config used to build a `OctetCountingCodec`. #[derive(Debug, Clone, Default, Deserialize, Serialize)] pub struct OctetCountingDecoderConfig { - #[serde(default)] + #[serde( + default, + skip_serializing_if = "crate::serde::skip_serializing_if_default" + )] octet_counting: OctetCountingDecoderOptions, } /// Options for building a `OctetCountingCodec`. -#[derive(Debug, Clone, Derivative, Deserialize, Serialize)] +#[derive(Debug, Clone, Derivative, Deserialize, Serialize, PartialEq)] #[derivative(Default)] pub struct OctetCountingDecoderOptions { - #[serde(default = "crate::serde::default_max_length")] - #[derivative(Default(value = "crate::serde::default_max_length()"))] - max_length: usize, + #[serde(skip_serializing_if = "crate::serde::skip_serializing_if_default")] + max_length: Option, } #[typetag::serde(name = "octet_counting")] impl FramingConfig for OctetCountingDecoderConfig { fn build(&self) -> crate::Result { - Ok(Box::new(OctetCountingCodec::new_with_max_length( - self.octet_counting.max_length, - ))) + if let Some(max_length) = self.octet_counting.max_length { + Ok(Box::new(OctetCountingCodec::new_with_max_length( + max_length, + ))) + } else { + Ok(Box::new(OctetCountingCodec::new())) + } } } diff --git a/src/generate.rs b/src/generate.rs index 76f18d883660e..b4df99e4ca81f 100644 --- a/src/generate.rs +++ b/src/generate.rs @@ -447,9 +447,6 @@ mod tests { [sources.source0.framing] method = "newline_delimited" - [sources.source0.framing.newline_delimited] - max_length = 102400 - [transforms.transform0] inputs = ["source0"] drop_field = true @@ -489,9 +486,6 @@ mod tests { [sources.source0.framing] method = "newline_delimited" - [sources.source0.framing.newline_delimited] - max_length = 102400 - [transforms.transform0] inputs = ["source0"] drop_field = true @@ -531,9 +525,6 @@ mod tests { [sources.source0.framing] method = "newline_delimited" - [sources.source0.framing.newline_delimited] - max_length = 102400 - [sinks.sink0] inputs = ["source0"] target = "stdout" diff --git a/website/cue/reference/components/sources.cue b/website/cue/reference/components/sources.cue index bf06c9708534c..1942599f6749d 100644 --- a/website/cue/reference/components/sources.cue +++ b/website/cue/reference/components/sources.cue @@ -112,7 +112,7 @@ components: sources: [Name=string]: { required: false common: false type: uint: { - default: 102400 + default: null examples: [65535, 102400] unit: "bytes" } @@ -130,7 +130,7 @@ components: sources: [Name=string]: { required: false common: false type: uint: { - default: 102400 + default: null examples: [65535, 102400] unit: "bytes" } @@ -148,7 +148,7 @@ components: sources: [Name=string]: { required: false common: false type: uint: { - default: 102400 + default: null examples: [65535, 102400] unit: "bytes" } From 560e3ab2102e0d71bf824f12abd728f0c13236ed Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 15 Oct 2021 18:11:51 +0000 Subject: [PATCH 16/38] chore(deps): bump tower from 0.4.8 to 0.4.9 (#9638) Bumps [tower](https://github.com/tower-rs/tower) from 0.4.8 to 0.4.9. - [Release notes](https://github.com/tower-rs/tower/releases) - [Commits](https://github.com/tower-rs/tower/compare/tower-0.4.8...tower-0.4.9) --- updated-dependencies: - dependency-name: tower dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- Cargo.lock | 5 +++-- Cargo.toml | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 8c7a9cd37bf5b..98c0dbf9d8c33 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6964,14 +6964,15 @@ dependencies = [ [[package]] name = "tower" -version = "0.4.8" +version = "0.4.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f60422bc7fefa2f3ec70359b8ff1caff59d785877eb70595904605bcc412470f" +checksum = "d15a6b60cdff0cb039d81d3b37f8bc3d7e53dca09069aae3ef2502ca4834fe30" dependencies = [ "futures-core", "futures-util", "indexmap", "pin-project 1.0.8", + "pin-project-lite", "rand 0.8.4", "slab", "tokio", diff --git a/Cargo.toml b/Cargo.toml index e92ade7e8474f..e187d5cbb8666 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -150,7 +150,7 @@ azure_storage = { git = "https://github.com/Azure/azure-sdk-for-rust.git", rev = reqwest = { version = "0.11", optional = true } # Tower -tower = { version = "0.4.8", default-features = false, features = ["buffer", "limit", "retry", "timeout", "util"] } +tower = { version = "0.4.9", default-features = false, features = ["buffer", "limit", "retry", "timeout", "util"] } tower-layer = { version = "0.3.1", default-features = false } # Serde From 6fa232458ee375720163575052bd2045beb13689 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 15 Oct 2021 18:26:34 +0000 Subject: [PATCH 17/38] chore(ci): bump actions/checkout from 2.3.4 to 2.3.5 (#9639) Bumps [actions/checkout](https://github.com/actions/checkout) from 2.3.4 to 2.3.5. - [Release notes](https://github.com/actions/checkout/releases) - [Changelog](https://github.com/actions/checkout/blob/main/CHANGELOG.md) - [Commits](https://github.com/actions/checkout/compare/v2.3.4...v2.3.5) --- updated-dependencies: - dependency-name: actions/checkout dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/baseline.yml | 10 ++++---- .github/workflows/benches.yml | 2 +- .github/workflows/environment.yml | 2 +- .github/workflows/install-sh.yml | 2 +- .github/workflows/integration-test.yml | 2 +- .github/workflows/k8s_e2e.yml | 4 +-- .github/workflows/metrics.yml | 2 +- .github/workflows/nightly.yml | 32 ++++++++++++------------ .github/workflows/release.yml | 34 +++++++++++++------------- .github/workflows/test-harness.yml | 2 +- .github/workflows/test.yml | 20 +++++++-------- 11 files changed, 56 insertions(+), 56 deletions(-) diff --git a/.github/workflows/baseline.yml b/.github/workflows/baseline.yml index bb2b3f23f486d..cc6d9daccad11 100644 --- a/.github/workflows/baseline.yml +++ b/.github/workflows/baseline.yml @@ -13,7 +13,7 @@ jobs: name: "Release Build (optimized)" runs-on: [self-hosted, linux, x64, general] steps: - - uses: actions/checkout@v2.3.4 + - uses: actions/checkout@v2.3.5 - run: sudo -E bash scripts/environment/bootstrap-ubuntu-20.04.sh - run: bash scripts/environment/prepare.sh - run: cargo clean @@ -28,7 +28,7 @@ jobs: # with full LTO / single codegen unit. PROFILE: debug steps: - - uses: actions/checkout@v2.3.4 + - uses: actions/checkout@v2.3.5 - run: sudo -E bash scripts/environment/bootstrap-ubuntu-20.04.sh - run: bash scripts/environment/prepare.sh - run: cargo clean @@ -38,7 +38,7 @@ jobs: name: "Debug Build" runs-on: [self-hosted, linux, x64, general] steps: - - uses: actions/checkout@v2.3.4 + - uses: actions/checkout@v2.3.5 - run: sudo -E bash scripts/environment/bootstrap-ubuntu-20.04.sh - run: bash scripts/environment/prepare.sh - run: cargo clean @@ -48,7 +48,7 @@ jobs: name: "Debug Rebuild" runs-on: [self-hosted, linux, x64, general] steps: - - uses: actions/checkout@v2.3.4 + - uses: actions/checkout@v2.3.5 - run: sudo -E bash scripts/environment/bootstrap-ubuntu-20.04.sh - run: bash scripts/environment/prepare.sh - run: cargo clean @@ -60,7 +60,7 @@ jobs: name: "Cargo Check" runs-on: [self-hosted, linux, x64, general] steps: - - uses: actions/checkout@v2.3.4 + - uses: actions/checkout@v2.3.5 - run: sudo -E bash scripts/environment/bootstrap-ubuntu-20.04.sh - run: bash scripts/environment/prepare.sh - run: cargo clean diff --git a/.github/workflows/benches.yml b/.github/workflows/benches.yml index c261d25a66244..af54ae80d9669 100644 --- a/.github/workflows/benches.yml +++ b/.github/workflows/benches.yml @@ -56,7 +56,7 @@ jobs: matrix: target: [bench, bench-remap-functions, bench-remap, bench-languages, bench-metrics, bench-dnstap, bench-transform] steps: - - uses: actions/checkout@v2.3.4 + - uses: actions/checkout@v2.3.5 - uses: actions/cache@v2.1.6 with: path: | diff --git a/.github/workflows/environment.yml b/.github/workflows/environment.yml index 12bcc75e4ae70..fb419721e83a1 100644 --- a/.github/workflows/environment.yml +++ b/.github/workflows/environment.yml @@ -16,7 +16,7 @@ jobs: runs-on: ubuntu-20.04 steps: - name: Checkout - uses: actions/checkout@v2.3.4 + uses: actions/checkout@v2.3.5 - name: Set up QEMU uses: docker/setup-qemu-action@v1.2.0 - name: Set up Docker Buildx diff --git a/.github/workflows/install-sh.yml b/.github/workflows/install-sh.yml index 87ef73e8fb731..cb211539ee11d 100644 --- a/.github/workflows/install-sh.yml +++ b/.github/workflows/install-sh.yml @@ -14,7 +14,7 @@ jobs: sync-install: runs-on: ubuntu-18.04 steps: - - uses: actions/checkout@v2.3.4 + - uses: actions/checkout@v2.3.5 - run: sudo apt-get install --yes python3-setuptools python3.6-dev - run: pip3 install awscli --upgrade --user - env: diff --git a/.github/workflows/integration-test.yml b/.github/workflows/integration-test.yml index 9434cf172d559..60d64e0784472 100644 --- a/.github/workflows/integration-test.yml +++ b/.github/workflows/integration-test.yml @@ -78,7 +78,7 @@ jobs: - test: 'splunk' - test: 'dnstap' steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v2.3.5 - run: make ci-sweep - uses: actions/cache@v2.1.4 with: diff --git a/.github/workflows/k8s_e2e.yml b/.github/workflows/k8s_e2e.yml index 19b072fbe0a4a..3983f2c457410 100644 --- a/.github/workflows/k8s_e2e.yml +++ b/.github/workflows/k8s_e2e.yml @@ -59,7 +59,7 @@ jobs: CARGO_PROFILE_RELEASE_OPT_LEVEL: 0 CARGO_PROFILE_RELEASE_CODEGEN_UNITS: 256 steps: - - uses: actions/checkout@v2.3.4 + - uses: actions/checkout@v2.3.5 - run: make ci-sweep - uses: actions/cache@v2.1.6 with: @@ -148,7 +148,7 @@ jobs: fail-fast: false steps: - name: Checkout - uses: actions/checkout@v2.3.4 + uses: actions/checkout@v2.3.5 - uses: actions/download-artifact@v2 with: name: e2e-test-deb-package diff --git a/.github/workflows/metrics.yml b/.github/workflows/metrics.yml index 4361603dcc5f1..461ccf5de8746 100644 --- a/.github/workflows/metrics.yml +++ b/.github/workflows/metrics.yml @@ -20,7 +20,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout Action - uses: actions/checkout@v2 + uses: actions/checkout@v2.3.5 with: repository: timberio/gh-actions-workflow-metrics ref: main diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml index 3e1651f6eed63..c59f14a19d0e4 100644 --- a/.github/workflows/nightly.yml +++ b/.github/workflows/nightly.yml @@ -17,7 +17,7 @@ jobs: build-x86_64-unknown-linux-musl-packages: runs-on: [self-hosted, linux, x64, general] steps: - - uses: actions/checkout@v2.3.4 + - uses: actions/checkout@v2.3.5 - run: sudo -E bash scripts/environment/bootstrap-ubuntu-20.04.sh - run: bash scripts/environment/prepare.sh - run: echo VECTOR_VERSION=$(make version) >> $GITHUB_ENV @@ -33,7 +33,7 @@ jobs: build-x86_64-unknown-linux-gnu-debug-tarball: runs-on: [self-hosted, linux, x64, general] steps: - - uses: actions/checkout@v2.3.4 + - uses: actions/checkout@v2.3.5 - run: sudo -E bash scripts/environment/bootstrap-ubuntu-20.04.sh - run: bash scripts/environment/prepare.sh - run: echo VECTOR_VERSION=$(make version) >> $GITHUB_ENV @@ -56,7 +56,7 @@ jobs: build-x86_64-unknown-linux-gnu-packages: runs-on: [self-hosted, linux, x64, general] steps: - - uses: actions/checkout@v2.3.4 + - uses: actions/checkout@v2.3.5 - run: sudo -E bash scripts/environment/bootstrap-ubuntu-20.04.sh - run: bash scripts/environment/prepare.sh - run: echo VECTOR_VERSION=$(make version) >> $GITHUB_ENV @@ -80,7 +80,7 @@ jobs: build-aarch64-unknown-linux-musl-packages: runs-on: [self-hosted, linux, x64, general] steps: - - uses: actions/checkout@v2.3.4 + - uses: actions/checkout@v2.3.5 - run: sudo -E bash scripts/environment/bootstrap-ubuntu-20.04.sh - run: bash scripts/environment/prepare.sh - run: echo VECTOR_VERSION=$(make version) >> $GITHUB_ENV @@ -97,7 +97,7 @@ jobs: build-aarch64-unknown-linux-gnu-packages: runs-on: [self-hosted, linux, x64, general] steps: - - uses: actions/checkout@v2.3.4 + - uses: actions/checkout@v2.3.5 - run: sudo -E bash scripts/environment/bootstrap-ubuntu-20.04.sh - run: bash scripts/environment/prepare.sh - run: echo VECTOR_VERSION=$(make version) >> $GITHUB_ENV @@ -122,7 +122,7 @@ jobs: build-armv7-unknown-linux-gnueabihf-packages: runs-on: [self-hosted, linux, x64, general] steps: - - uses: actions/checkout@v2.3.4 + - uses: actions/checkout@v2.3.5 - run: sudo -E bash scripts/environment/bootstrap-ubuntu-20.04.sh - run: bash scripts/environment/prepare.sh - run: echo VECTOR_VERSION=$(make version) >> $GITHUB_ENV @@ -146,7 +146,7 @@ jobs: build-armv7-unknown-linux-musleabihf-packages: runs-on: [self-hosted, linux, x64, general] steps: - - uses: actions/checkout@v2.3.4 + - uses: actions/checkout@v2.3.5 - run: sudo -E bash scripts/environment/bootstrap-ubuntu-20.04.sh - run: bash scripts/environment/prepare.sh - run: echo VECTOR_VERSION=$(make version) >> $GITHUB_ENV @@ -162,7 +162,7 @@ jobs: build-x86_64-apple-darwin-packages: runs-on: macos-10.15 steps: - - uses: actions/checkout@v2.3.4 + - uses: actions/checkout@v2.3.5 - name: "Upgrade bash" run: brew install bash - name: "Install realpath dependency" @@ -193,7 +193,7 @@ jobs: env: RUSTFLAGS: "-D warnings -Ctarget-feature=+crt-static" steps: - - uses: actions/checkout@v2.3.4 + - uses: actions/checkout@v2.3.5 - name: "Add Vector version" run: echo VECTOR_VERSION=$(make version) >> $GITHUB_ENV shell: bash @@ -278,7 +278,7 @@ jobs: systemd \ make - name: checkout - uses: actions/checkout@v2.3.4 + uses: actions/checkout@v2.3.5 - run: echo VECTOR_VERSION=$(make version) >> $GITHUB_ENV - uses: actions/download-artifact@v2 with: @@ -313,7 +313,7 @@ jobs: tar \ make - name: checkout - uses: actions/checkout@v2.3.4 + uses: actions/checkout@v2.3.5 - run: echo VECTOR_VERSION=$(make version) >> $GITHUB_ENV - uses: actions/download-artifact@v2 with: @@ -334,7 +334,7 @@ jobs: - build-x86_64-apple-darwin-packages steps: - name: checkout - uses: actions/checkout@v2.3.4 + uses: actions/checkout@v2.3.5 - run: echo VECTOR_VERSION=$(make version) >> $GITHUB_ENV - uses: actions/download-artifact@v2 with: @@ -354,7 +354,7 @@ jobs: - build-armv7-unknown-linux-gnueabihf-packages - deb-verify steps: - - uses: actions/checkout@v2.3.4 + - uses: actions/checkout@v2.3.5 - run: echo VECTOR_VERSION=$(make version) >> $GITHUB_ENV - name: Login to DockerHub uses: docker/login-action@v1.10.0 @@ -416,7 +416,7 @@ jobs: - rpm-verify - osx-verify steps: - - uses: actions/checkout@v2.3.4 + - uses: actions/checkout@v2.3.5 - run: echo VECTOR_VERSION=$(make version) >> $GITHUB_ENV - uses: actions/download-artifact@v2 with: @@ -496,7 +496,7 @@ jobs: - deb-verify - rpm-verify steps: - - uses: actions/checkout@v2.3.4 + - uses: actions/checkout@v2.3.5 - run: echo VECTOR_VERSION=$(make version) >> $GITHUB_ENV - uses: actions/download-artifact@v2 with: @@ -608,7 +608,7 @@ jobs: # appear before the image it refers to. - release-docker steps: - - uses: actions/checkout@v2.3.4 + - uses: actions/checkout@v2.3.5 - name: Install Helm run: scripts/environment/setup-helm.sh - name: Release Helm Chart diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 668a4d70a13b7..0c2f8a3284bc8 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -19,7 +19,7 @@ jobs: build-x86_64-unknown-linux-musl-packages: runs-on: [self-hosted, linux, x64, general] steps: - - uses: actions/checkout@v2.3.4 + - uses: actions/checkout@v2.3.5 - run: sudo -E bash scripts/environment/bootstrap-ubuntu-20.04.sh - run: bash scripts/environment/prepare.sh - run: echo VECTOR_VERSION=$(make version) >> $GITHUB_ENV @@ -35,7 +35,7 @@ jobs: build-x86_64-unknown-linux-gnu-packages: runs-on: [self-hosted, linux, x64, general] steps: - - uses: actions/checkout@v2.3.4 + - uses: actions/checkout@v2.3.5 - run: sudo -E bash scripts/environment/bootstrap-ubuntu-20.04.sh - run: bash scripts/environment/prepare.sh - run: echo VECTOR_VERSION=$(make version) >> $GITHUB_ENV @@ -59,7 +59,7 @@ jobs: build-aarch64-unknown-linux-musl-packages: runs-on: [self-hosted, linux, x64, general] steps: - - uses: actions/checkout@v2.3.4 + - uses: actions/checkout@v2.3.5 - run: sudo -E bash scripts/environment/bootstrap-ubuntu-20.04.sh - run: bash scripts/environment/prepare.sh - run: echo VECTOR_VERSION=$(make version) >> $GITHUB_ENV @@ -76,7 +76,7 @@ jobs: build-aarch64-unknown-linux-gnu-packages: runs-on: [self-hosted, linux, x64, general] steps: - - uses: actions/checkout@v2.3.4 + - uses: actions/checkout@v2.3.5 - run: sudo -E bash scripts/environment/bootstrap-ubuntu-20.04.sh - run: bash scripts/environment/prepare.sh - run: echo VECTOR_VERSION=$(make version) >> $GITHUB_ENV @@ -101,7 +101,7 @@ jobs: build-armv7-unknown-linux-gnueabihf-packages: runs-on: [self-hosted, linux, x64, general] steps: - - uses: actions/checkout@v2.3.4 + - uses: actions/checkout@v2.3.5 - run: sudo -E bash scripts/environment/bootstrap-ubuntu-20.04.sh - run: bash scripts/environment/prepare.sh - run: echo VECTOR_VERSION=$(make version) >> $GITHUB_ENV @@ -125,7 +125,7 @@ jobs: build-armv7-unknown-linux-musleabihf-packages: runs-on: [self-hosted, linux, x64, general] steps: - - uses: actions/checkout@v2.3.4 + - uses: actions/checkout@v2.3.5 - run: sudo -E bash scripts/environment/bootstrap-ubuntu-20.04.sh - run: bash scripts/environment/prepare.sh - run: echo VECTOR_VERSION=$(make version) >> $GITHUB_ENV @@ -141,7 +141,7 @@ jobs: build-x86_64-apple-darwin-packages: runs-on: macos-10.15 steps: - - uses: actions/checkout@v2.3.4 + - uses: actions/checkout@v2.3.5 - name: "Upgrade bash" run: brew install bash - name: "Install realpath dependency" @@ -172,7 +172,7 @@ jobs: env: RUSTFLAGS: "-D warnings -Ctarget-feature=+crt-static" steps: - - uses: actions/checkout@v2.3.4 + - uses: actions/checkout@v2.3.5 - name: "Add Vector version" run: echo VECTOR_VERSION=$(make version) >> $GITHUB_ENV shell: bash @@ -257,7 +257,7 @@ jobs: systemd \ make - name: checkout - uses: actions/checkout@v2.3.4 + uses: actions/checkout@v2.3.5 - run: echo VECTOR_VERSION=$(make version) >> $GITHUB_ENV - uses: actions/download-artifact@v2 with: @@ -292,7 +292,7 @@ jobs: tar \ make - name: checkout - uses: actions/checkout@v2.3.4 + uses: actions/checkout@v2.3.5 - run: echo VECTOR_VERSION=$(make version) >> $GITHUB_ENV - uses: actions/download-artifact@v2 with: @@ -313,7 +313,7 @@ jobs: - build-x86_64-apple-darwin-packages steps: - name: checkout - uses: actions/checkout@v2.3.4 + uses: actions/checkout@v2.3.5 - run: echo VECTOR_VERSION=$(make version) >> $GITHUB_ENV - uses: actions/download-artifact@v2 with: @@ -333,7 +333,7 @@ jobs: - build-armv7-unknown-linux-gnueabihf-packages - deb-verify steps: - - uses: actions/checkout@v2.3.4 + - uses: actions/checkout@v2.3.5 - run: echo VECTOR_VERSION=$(make version) >> $GITHUB_ENV - name: Login to DockerHub uses: docker/login-action@v1.10.0 @@ -394,7 +394,7 @@ jobs: - rpm-verify - osx-verify steps: - - uses: actions/checkout@v2.3.4 + - uses: actions/checkout@v2.3.5 - run: echo VECTOR_VERSION=$(make version) >> $GITHUB_ENV - uses: actions/download-artifact@v2 with: @@ -476,7 +476,7 @@ jobs: - rpm-verify - osx-verify steps: - - uses: actions/checkout@v2.3.4 + - uses: actions/checkout@v2.3.5 - run: echo VECTOR_VERSION=$(make version) >> $GITHUB_ENV - uses: actions/download-artifact@v2 with: @@ -550,7 +550,7 @@ jobs: needs: - release-s3 steps: - - uses: actions/checkout@v2.3.4 + - uses: actions/checkout@v2.3.5 - run: echo VECTOR_VERSION=$(make version) >> $GITHUB_ENV - run: sudo -E bash scripts/environment/bootstrap-ubuntu-20.04.sh - run: bash scripts/environment/prepare.sh @@ -567,7 +567,7 @@ jobs: - deb-verify - rpm-verify steps: - - uses: actions/checkout@v2.3.4 + - uses: actions/checkout@v2.3.5 - run: echo VECTOR_VERSION=$(make version) >> $GITHUB_ENV - uses: actions/download-artifact@v2 with: @@ -679,7 +679,7 @@ jobs: # appear before the image it refers to. - release-docker steps: - - uses: actions/checkout@v2.3.4 + - uses: actions/checkout@v2.3.5 - name: Install Helm run: scripts/environment/setup-helm.sh - name: Release Helm Chart diff --git a/.github/workflows/test-harness.yml b/.github/workflows/test-harness.yml index e5063ebe4d5e2..7a9c693d9b6d5 100644 --- a/.github/workflows/test-harness.yml +++ b/.github/workflows/test-harness.yml @@ -55,7 +55,7 @@ jobs: # Clone vector source and build .deb - name: Clone the PR branch - uses: actions/checkout@v2.3.4 + uses: actions/checkout@v2.3.5 with: lfs: true ref: '${{ steps.pr-info.outputs.head_sha }}' diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 084e127cc163b..243e280cab160 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -42,7 +42,7 @@ jobs: cue: ${{ steps.filter.outputs.cue }} markdown: ${{ steps.filter.outputs.markdown }} steps: - - uses: actions/checkout@v2.3.4 + - uses: actions/checkout@v2.3.5 - uses: dorny/paths-filter@v2 id: filter with: @@ -90,7 +90,7 @@ jobs: needs: changes if: ${{ needs.changes.outputs.source == 'true' }} steps: - - uses: actions/checkout@v2.3.4 + - uses: actions/checkout@v2.3.5 - uses: actions/cache@v2.1.6 name: Cache Cargo registry + index with: @@ -126,7 +126,7 @@ jobs: needs: changes if: ${{ needs.changes.outputs.source == 'true' }} steps: - - uses: actions/checkout@v2.3.4 + - uses: actions/checkout@v2.3.5 - run: make ci-sweep - uses: actions/cache@v2.1.6 name: Cache Cargo registry + index @@ -176,7 +176,7 @@ jobs: # This helps us avoid adopting dependencies that aren't compatible with other architectures. if: ${{ needs.changes.outputs.dependencies == 'true' }} steps: - - uses: actions/checkout@v2.3.4 + - uses: actions/checkout@v2.3.5 - run: make ci-sweep - uses: actions/cache@v2.1.6 name: Cache Cargo registry + index @@ -217,7 +217,7 @@ jobs: needs: changes if: ${{ needs.changes.outputs.source == 'true' }} steps: - - uses: actions/checkout@v2.3.4 + - uses: actions/checkout@v2.3.5 - uses: actions/cache@v2.1.6 name: Cache Cargo registry + index with: @@ -243,7 +243,7 @@ jobs: needs: changes if: ${{ needs.changes.outputs.source == 'true' }} steps: - - uses: actions/checkout@v2.3.4 + - uses: actions/checkout@v2.3.5 - run: choco install llvm - run: .\scripts\environment\bootstrap-windows-2019.ps1 # We always make sure to stop any previous sccache process before starting it fresh, that @@ -267,7 +267,7 @@ jobs: needs: changes if: ${{ needs.changes.outputs.source == 'true' || needs.changes.outputs.cue == 'true' }} steps: - - uses: actions/checkout@v2.3.4 + - uses: actions/checkout@v2.3.5 - run: sudo -E bash scripts/environment/bootstrap-ubuntu-20.04.sh - run: bash scripts/environment/prepare.sh # We always make sure to stop any previous sccache process before starting it fresh, that @@ -291,7 +291,7 @@ jobs: needs: changes if: ${{ needs.changes.outputs.source == 'true' }} steps: - - uses: actions/checkout@v2.3.4 + - uses: actions/checkout@v2.3.5 - run: sudo -E bash scripts/environment/bootstrap-ubuntu-20.04.sh - run: bash scripts/environment/prepare.sh - run: echo "::add-matcher::.github/matchers/rust.json" @@ -316,7 +316,7 @@ jobs: needs: changes if: ${{ needs.changes.outputs.source == 'true' }} steps: - - uses: actions/checkout@v2.3.4 + - uses: actions/checkout@v2.3.5 - run: sudo -E bash scripts/environment/bootstrap-ubuntu-20.04.sh - run: bash scripts/environment/prepare.sh - run: echo "::add-matcher::.github/matchers/rust.json" @@ -341,7 +341,7 @@ jobs: container: timberio/ci_image needs: changes steps: - - uses: actions/checkout@v2.3.4 + - uses: actions/checkout@v2.3.5 with: # check-version needs tags fetch-depth: 0 # fetch everything From 23231a4694d46bed2ae799631ad4c7d63489e7f1 Mon Sep 17 00:00:00 2001 From: Will <43912346+001wwang@users.noreply.github.com> Date: Fri, 15 Oct 2021 15:06:52 -0400 Subject: [PATCH 18/38] enhancement(buffers): Instrument buffer max size and disk buffer initial size (#9584) * Add BufferCreated event for max size metric Signed-off-by: 001wwang * Record initial size of disk buffers Signed-off-by: 001wwang * Update buffer spec to reflect implementation Signed-off-by: 001wwang * Remove BufferMaxSize enum in favor of option arguments Signed-off-by: 001wwang * Fix clippy and test error Signed-off-by: 001wwang * Rephrase requirement for buffer initial size emitting Signed-off-by: 001wwang * Add scope section to explain global context Signed-off-by: 001wwang * Use more accurate max size in test Signed-off-by: 001wwang --- docs/specs/buffer.md | 24 +++++++++---------- .../buffers/src/buffer_usage_data.rs | 20 ++++++++++++++-- .../buffers/src/disk/leveldb_buffer/mod.rs | 18 ++++++++++---- .../buffers/src/internal_events.rs | 19 ++++++++++++++- lib/vector-core/buffers/src/lib.rs | 5 ++-- lib/vector-core/buffers/src/test/mod.rs | 2 +- 6 files changed, 64 insertions(+), 24 deletions(-) diff --git a/docs/specs/buffer.md b/docs/specs/buffer.md index 91644b22710fb..a126f0b18ec38 100644 --- a/docs/specs/buffer.md +++ b/docs/specs/buffer.md @@ -12,13 +12,16 @@ interpreted as described in [RFC 2119]. +## Scope + +This specification addresses direct buffer development and does not cover aspects that buffers inherit "for free". For example, this specification does not cover global context, such as component_id, that all buffers receive in their telemetry by nature of being attached to a Vector component. + ## Instrumentation Vector buffers MUST be instrumented for optimal observability and monitoring. This is required to drive various interfaces that Vector users depend on to manage Vector installations in mission critical production environments. This section extends the [Instrumentation Specification]. -### Terms and Definitions +### Terms And Definitions -* `component_metadata` - Refers to the metadata (component id, component scope, component kind, and component type) of the component associated with the buffer. Buffer metrics MUST be tagged with all or partial `component_metadata` unless specified otherwise. In most cases, these tags are automatically added from tracing span context and do not need to be included as event properties. * `byte_size` - Refers to the byte size of events from a buffer's perspective. For memory buffers, `byte_size` represents the in-memory byte size of events. For disk buffers, `byte_size` represents the serialized byte size of events. * `buffer_type` - One of `memory`, `disk`. Buffer metrics MUST be tagged with `buffer_type` unless otherwise specified. @@ -26,23 +29,18 @@ Vector buffers MUST be instrumented for optimal observability and monitoring. Th #### `BufferCreated` -*All buffers* MUST emit a `BufferCreated` event immediately upon creation. To avoid stale metrics, this event MUST be regularly emitted at an interval. +*All buffers* MUST emit a `BufferCreated` event upon creation. To avoid stale metrics, this event MUST be regularly emitted at an interval. * Properties - * `max_size_bytes` - the max size of the buffer in bytes - * `max_size_events` - the max size of the buffer in number of events - * `initial_events_size` - the number of events in the buffer at creation - * `initial_bytes_size` - the byte size of the buffer at creation - * `component_metadata` - as defined in [Terms and Definitions](#terms-and-definitions) + * `max_size_bytes` - the max size of the buffer in bytes if relevant + * `max_size_events` - the max size of the buffer in number of events if relevant * Metric * MUST emit the `buffer_max_event_size` gauge (in-memory buffers) if the defined `max_size_events` value is present * MUST emit the `buffer_max_byte_size` gauge (disk buffers) if the defined `max_size_bytes` value is present - * MUST emit the `buffer_received_events_total` counter with the defined `initial_events_size` value - * MUST emit the `buffer_received_bytes_total` counter with the defined `initial_bytes_size` value #### `BufferEventsReceived` -*All buffers* MUST emit an `BufferEventsReceived` event immediately after receiving one or more Vector events. +*All buffers* MUST emit a `BufferEventsReceived` event after receiving one or more Vector events. *All buffers* MUST emit a `BufferEventsReceived` event upon startup if there are existing events in the buffer. * Properties * `count` - the number of received events @@ -55,7 +53,7 @@ Vector buffers MUST be instrumented for optimal observability and monitoring. Th #### `BufferEventsSent` -*All buffers* MUST emit an `BufferEventsSent` event immediately after sending one or more Vector events. +*All buffers* MUST emit a `BufferEventsSent` event after sending one or more Vector events. * Properties * `count` - the number of sent events @@ -68,7 +66,7 @@ Vector buffers MUST be instrumented for optimal observability and monitoring. Th #### `EventsDropped` -*All buffers* MUST emit an `EventsDropped` event immediately after dropping one or more Vector events. +*All buffers* MUST emit an `EventsDropped` event after dropping one or more Vector events. * Properties * `count` - the number of dropped events diff --git a/lib/vector-core/buffers/src/buffer_usage_data.rs b/lib/vector-core/buffers/src/buffer_usage_data.rs index 8c823db1c2827..901cc58126def 100644 --- a/lib/vector-core/buffers/src/buffer_usage_data.rs +++ b/lib/vector-core/buffers/src/buffer_usage_data.rs @@ -4,7 +4,9 @@ use std::{sync::Arc, time::Duration}; use tokio::time::interval; use tracing::{Instrument, Span}; -use crate::internal_events::{BufferEventsReceived, BufferEventsSent, EventsDropped}; +use crate::internal_events::{ + BufferCreated, BufferEventsReceived, BufferEventsSent, EventsDropped, +}; use crate::WhenFull; pub struct BufferUsageData { @@ -13,10 +15,17 @@ pub struct BufferUsageData { sent_event_count: AtomicU64, sent_byte_size: AtomicUsize, dropped_event_count: Option, + max_size_bytes: Option, + max_size_events: Option, } impl BufferUsageData { - pub fn new(when_full: WhenFull, span: Span) -> Arc { + pub fn new( + when_full: WhenFull, + span: Span, + max_size_bytes: Option, + max_size_events: Option, + ) -> Arc { let dropped_event_count = match when_full { WhenFull::Block => None, WhenFull::DropNewest => Some(AtomicU64::new(0)), @@ -28,6 +37,8 @@ impl BufferUsageData { sent_event_count: AtomicU64::new(0), sent_byte_size: AtomicUsize::new(0), dropped_event_count, + max_size_bytes, + max_size_events, }); let usage_data = buffer_usage_data.clone(); @@ -37,6 +48,11 @@ impl BufferUsageData { loop { interval.tick().await; + emit(&BufferCreated { + max_size_bytes: usage_data.max_size_bytes, + max_size_events: usage_data.max_size_events, + }); + emit(&BufferEventsReceived { count: usage_data.received_event_count.swap(0, Ordering::Relaxed), byte_size: usage_data.received_byte_size.swap(0, Ordering::Relaxed), diff --git a/lib/vector-core/buffers/src/disk/leveldb_buffer/mod.rs b/lib/vector-core/buffers/src/disk/leveldb_buffer/mod.rs index ae3e58a24bfb2..02a885aa82ded 100644 --- a/lib/vector-core/buffers/src/disk/leveldb_buffer/mod.rs +++ b/lib/vector-core/buffers/src/disk/leveldb_buffer/mod.rs @@ -34,7 +34,7 @@ pub struct Buffer { phantom: PhantomData, } -/// Read the byte size of the database +/// Read the byte size and item size of the database /// /// There is a mismatch between leveldb's mechanism and vector's. While vector /// would prefer to keep as little in-memory as possible leveldb, being a @@ -50,13 +50,19 @@ pub struct Buffer { /// This function does not solve the problem -- leveldb will still map 1000 /// files if it wants -- but we at least avoid forcing this to happen at the /// start of vector. -fn db_initial_size(path: &Path) -> Result { +fn db_initial_size(path: &Path) -> Result<(usize, u64), DataDirError> { let mut options = Options::new(); options.create_if_missing = true; let db: Database = Database::open(path, options).with_context(|| Open { data_dir: path.parent().expect("always a parent"), })?; - Ok(db.value_iter(ReadOptions::new()).map(|v| v.len()).sum()) + let mut item_size = 0; + let mut byte_size = 0; + for v in db.value_iter(ReadOptions::new()) { + item_size += 1; + byte_size += v.len(); + } + Ok((byte_size, item_size)) } impl Buffer @@ -82,7 +88,9 @@ where let max_uncompacted_size = max_size / MAX_UNCOMPACTED_DENOMINATOR; let max_size = max_size - max_uncompacted_size; - let initial_size = db_initial_size(path)?; + let (initial_byte_size, initial_item_size) = db_initial_size(path)?; + buffer_usage_data + .increment_received_event_count_and_byte_size(initial_item_size, initial_byte_size); let mut options = Options::new(); options.create_if_missing = true; @@ -101,7 +109,7 @@ where tail = if iter.valid() { iter.key().0 + 1 } else { 0 }; } - let current_size = Arc::new(AtomicUsize::new(initial_size)); + let current_size = Arc::new(AtomicUsize::new(initial_byte_size)); let write_notifier = Arc::new(AtomicWaker::new()); diff --git a/lib/vector-core/buffers/src/internal_events.rs b/lib/vector-core/buffers/src/internal_events.rs index 6dc6f6124c9a0..9bac2cd225789 100644 --- a/lib/vector-core/buffers/src/internal_events.rs +++ b/lib/vector-core/buffers/src/internal_events.rs @@ -1,5 +1,5 @@ use core_common::internal_event::InternalEvent; -use metrics::{counter, decrement_gauge, increment_gauge}; +use metrics::{counter, decrement_gauge, gauge, increment_gauge}; pub struct BufferEventsReceived { pub count: u64, @@ -40,3 +40,20 @@ impl InternalEvent for EventsDropped { counter!("buffer_discarded_events_total", self.count); } } + +pub struct BufferCreated { + pub max_size_events: Option, + pub max_size_bytes: Option, +} + +impl InternalEvent for BufferCreated { + #[allow(clippy::cast_precision_loss)] + fn emit_metrics(&self) { + if let Some(max_size) = self.max_size_events { + gauge!("buffer_max_event_size", max_size as f64); + } + if let Some(max_size) = self.max_size_bytes { + gauge!("buffer_max_byte_size", max_size as f64); + } + } +} diff --git a/lib/vector-core/buffers/src/lib.rs b/lib/vector-core/buffers/src/lib.rs index 30a836b85a18f..60bd12da9415b 100644 --- a/lib/vector-core/buffers/src/lib.rs +++ b/lib/vector-core/buffers/src/lib.rs @@ -71,7 +71,7 @@ where .. } => { let buffer_dir = format!("{}_buffer", id); - let buffer_usage_data = BufferUsageData::new(when_full, span); + let buffer_usage_data = BufferUsageData::new(when_full, span, Some(max_size), None); let (tx, rx, acker) = disk::open(&data_dir, &buffer_dir, max_size, buffer_usage_data.clone()) .map_err(|error| error.to_string())?; @@ -86,7 +86,8 @@ where } => { let (tx, rx) = mpsc::channel(max_events); if instrument { - let buffer_usage_data = BufferUsageData::new(when_full, span); + let buffer_usage_data = + BufferUsageData::new(when_full, span, None, Some(max_events)); let tx = BufferInputCloner::Memory(tx, when_full, Some(buffer_usage_data.clone())); let rx = rx.inspect(move |item: &T| { buffer_usage_data.increment_sent_event_count_and_byte_size(1, item.size_of()); diff --git a/lib/vector-core/buffers/src/test/mod.rs b/lib/vector-core/buffers/src/test/mod.rs index 7da0b38454c87..405375accb333 100644 --- a/lib/vector-core/buffers/src/test/mod.rs +++ b/lib/vector-core/buffers/src/test/mod.rs @@ -18,7 +18,7 @@ async fn drop_when_full() { let mut tx = Box::pin(DropWhenFull::new( tx, - BufferUsageData::new(WhenFull::DropNewest, Span::none()), + BufferUsageData::new(WhenFull::DropNewest, Span::none(), None, Some(2)), )); assert_eq!(tx.as_mut().poll_ready(cx), Poll::Ready(Ok(()))); From 147bc5fbf0811681acec8a0e3190b6abcef34691 Mon Sep 17 00:00:00 2001 From: Jesse Szwedko Date: Fri, 15 Oct 2021 14:09:30 -0500 Subject: [PATCH 19/38] fix(codecs, http source): Re-add support for encoding (#9640) * fix(codecs, http source): Re-add support for encoding This was mistakenly dropped in aa219e23cdd8ba77e19cce2d04ff345b1da6fa7d Signed-off-by: Jesse Szwedko --- src/sources/http.rs | 66 +++++++++++++++---- src/sources/util/body_decoding.rs | 12 ++++ src/sources/util/mod.rs | 4 ++ .../cue/reference/components/sources/http.cue | 15 +++++ 4 files changed, 85 insertions(+), 12 deletions(-) create mode 100644 src/sources/util/body_decoding.rs diff --git a/src/sources/http.rs b/src/sources/http.rs index fac44ecae2ef5..5a04db8d9c7aa 100644 --- a/src/sources/http.rs +++ b/src/sources/http.rs @@ -1,12 +1,17 @@ use crate::{ - codecs::{self, DecodingConfig, FramingConfig, ParserConfig}, + codecs::{ + self, BytesDecoderConfig, BytesParserConfig, DecodingConfig, FramingConfig, + JsonParserConfig, NewlineDelimitedDecoderConfig, ParserConfig, + }, config::{ log_schema, DataType, GenerateConfig, Resource, SourceConfig, SourceContext, SourceDescription, }, event::{Event, Value}, serde::{default_decoding, default_framing_stream_based}, - sources::util::{add_query_parameters, ErrorMessage, HttpSource, HttpSourceAuthConfig}, + sources::util::{ + add_query_parameters, Encoding, ErrorMessage, HttpSource, HttpSourceAuthConfig, + }, tls::TlsConfig, }; use bytes::{Bytes, BytesMut}; @@ -21,6 +26,8 @@ use warp::http::{HeaderMap, HeaderValue}; pub struct SimpleHttpConfig { address: SocketAddr, #[serde(default)] + encoding: Option, + #[serde(default)] headers: Vec, #[serde(default)] query_parameters: Vec, @@ -32,10 +39,8 @@ pub struct SimpleHttpConfig { path: String, #[serde(default = "default_path_key")] path_key: String, - #[serde(default = "default_framing_stream_based")] - framing: Box, - #[serde(default = "default_decoding")] - decoding: Box, + framing: Option>, + decoding: Option>, } inventory::submit! { @@ -46,6 +51,7 @@ impl GenerateConfig for SimpleHttpConfig { fn generate_config() -> toml::Value { toml::Value::try_from(Self { address: "0.0.0.0:8080".parse().unwrap(), + encoding: None, headers: Vec::new(), query_parameters: Vec::new(), tls: None, @@ -53,8 +59,8 @@ impl GenerateConfig for SimpleHttpConfig { path_key: "path".to_string(), path: "/".to_string(), strict_path: true, - framing: default_framing_stream_based(), - decoding: default_decoding(), + framing: Some(default_framing_stream_based()), + decoding: Some(default_decoding()), }) .unwrap() } @@ -124,7 +130,43 @@ impl HttpSource for SimpleHttpSource { #[typetag::serde(name = "http")] impl SourceConfig for SimpleHttpConfig { async fn build(&self, cx: SourceContext) -> crate::Result { - let decoder = DecodingConfig::new(self.framing.clone(), self.decoding.clone()).build()?; + if self.encoding.is_some() && (self.framing.is_some() || self.decoding.is_some()) { + return Err("Using `encoding` is deprecated and does not have any effect when `decoding` or `framing` is provided. Configure `framing` and `decoding` instead.".into()); + } + + let (framing, decoding) = if let Some(encoding) = self.encoding { + match encoding { + Encoding::Text => ( + Box::new(NewlineDelimitedDecoderConfig::new()) as Box, + Box::new(BytesParserConfig::new()) as Box, + ), + Encoding::Json => ( + Box::new(BytesDecoderConfig::new()) as Box, + Box::new(JsonParserConfig::new()) as Box, + ), + Encoding::Ndjson => ( + Box::new(NewlineDelimitedDecoderConfig::new()) as Box, + Box::new(JsonParserConfig::new()) as Box, + ), + Encoding::Binary => ( + Box::new(BytesDecoderConfig::new()) as Box, + Box::new(BytesParserConfig::new()) as Box, + ), + } + } else { + ( + match self.framing.as_ref() { + Some(framing) => framing.clone(), + None => default_framing_stream_based(), + } as Box, + match self.decoding.as_ref() { + Some(decoding) => decoding.clone(), + None => default_decoding(), + } as Box, + ) + }; + + let decoder = DecodingConfig::new(framing, decoding).build()?; let source = SimpleHttpSource { headers: self.headers.clone(), query_parameters: self.query_parameters.clone(), @@ -182,7 +224,6 @@ mod tests { codecs::{BytesDecoderConfig, FramingConfig, JsonParserConfig, ParserConfig}, config::{log_schema, SourceConfig, SourceContext}, event::{Event, EventStatus, Value}, - serde::{default_decoding, default_framing_stream_based}, test_util::{components, next_addr, spawn_collect_n, trace_init, wait_for_tcp}, Pipeline, }; @@ -224,14 +265,15 @@ mod tests { SimpleHttpConfig { address, headers, + encoding: None, query_parameters, tls: None, auth: None, strict_path, path_key, path, - framing: framing.unwrap_or_else(default_framing_stream_based), - decoding: decoding.unwrap_or_else(default_decoding), + framing, + decoding, } .build(context) .await diff --git a/src/sources/util/body_decoding.rs b/src/sources/util/body_decoding.rs new file mode 100644 index 0000000000000..1027c68b8f949 --- /dev/null +++ b/src/sources/util/body_decoding.rs @@ -0,0 +1,12 @@ +use serde::{Deserialize, Serialize}; + +#[derive(Deserialize, Serialize, Debug, Eq, PartialEq, Clone, Derivative, Copy)] +#[serde(rename_all = "snake_case")] +#[derivative(Default)] +pub enum Encoding { + #[derivative(Default)] + Text, + Ndjson, + Json, + Binary, +} diff --git a/src/sources/util/mod.rs b/src/sources/util/mod.rs index c113b8a26cf38..ca7f4664b5938 100644 --- a/src/sources/util/mod.rs +++ b/src/sources/util/mod.rs @@ -1,3 +1,5 @@ +#[cfg(any(feature = "sources-http"))] +mod body_decoding; mod encoding_config; #[cfg(any(feature = "sources-file", feature = "sources-kafka"))] pub mod finalizer; @@ -24,6 +26,8 @@ mod unix_datagram; #[cfg(all(unix, feature = "sources-utils-unix"))] mod unix_stream; +#[cfg(any(feature = "sources-http"))] +pub use self::body_decoding::Encoding; #[cfg(feature = "sources-utils-http-query")] pub use self::http::add_query_parameters; #[cfg(any( diff --git a/website/cue/reference/components/sources/http.cue b/website/cue/reference/components/sources/http.cue index cd4c7e58d4f3a..40439f4f48a26 100644 --- a/website/cue/reference/components/sources/http.cue +++ b/website/cue/reference/components/sources/http.cue @@ -73,6 +73,21 @@ components: sources: http: { syntax: "literal" } } + encoding: { + common: true + description: "The expected encoding of received data. Note that for `json` and `ndjson` encodings, the fields of the JSON objects are output as separate fields." + required: false + type: string: { + default: "text" + enum: { + text: "Newline-delimited text, with each line forming a message." + ndjson: "Newline-delimited JSON objects, where each line must contain a JSON object." + json: "Array of JSON objects, which must be a JSON array containing JSON objects." + binary: "Binary or text, whole http request body is considered as one message." + } + syntax: "literal" + } + } headers: { common: false description: "A list of HTTP headers to include in the log event. These will override any values included in the JSON payload with conflicting names." From 1ada00235d4aedd125f19b70c0e5b52ae1ca2be8 Mon Sep 17 00:00:00 2001 From: Bruce Guenter Date: Fri, 15 Oct 2021 14:17:50 -0600 Subject: [PATCH 20/38] chore: Simplify calling size_of over vectors (#9627) Signed-off-by: Bruce Guenter --- lib/vector-core/core-common/src/byte_size_of.rs | 2 +- lib/vector-core/src/event/test/size_of.rs | 2 +- lib/vector-core/src/event/value.rs | 6 ++---- src/sinks/sematext/metrics.rs | 2 +- src/sources/apache_metrics/mod.rs | 3 +-- src/sources/util/http/prelude.rs | 2 +- src/topology/builder.rs | 7 +++---- 7 files changed, 10 insertions(+), 14 deletions(-) diff --git a/lib/vector-core/core-common/src/byte_size_of.rs b/lib/vector-core/core-common/src/byte_size_of.rs index 2dd8412bea7eb..18dab24e5bf8f 100644 --- a/lib/vector-core/core-common/src/byte_size_of.rs +++ b/lib/vector-core/core-common/src/byte_size_of.rs @@ -104,7 +104,7 @@ impl ByteSizeOf for Value { match self { Value::Null | Value::Bool(_) | Value::Number(_) => 0, Value::String(s) => s.len(), - Value::Array(a) => a.iter().map(ByteSizeOf::size_of).sum(), + Value::Array(a) => a.size_of(), Value::Object(o) => o.iter().map(|(k, v)| k.size_of() + v.size_of()).sum(), } } diff --git a/lib/vector-core/src/event/test/size_of.rs b/lib/vector-core/src/event/test/size_of.rs index f011469c30608..aaa196b160f52 100644 --- a/lib/vector-core/src/event/test/size_of.rs +++ b/lib/vector-core/src/event/test/size_of.rs @@ -115,7 +115,7 @@ fn log_operation_maintains_size() { let new_value_sz = value.size_of(); let old_value_sz = log_event.get_flat(&key).map_or(0, |x| x.size_of()); if !log_event.contains(&key) { - current_size += key.len(); + current_size += key.size_of(); } log_event.insert_flat(&key, value); current_size -= old_value_sz; diff --git a/lib/vector-core/src/event/value.rs b/lib/vector-core/src/event/value.rs index a65d6b9a8d275..608476ca5d229 100644 --- a/lib/vector-core/src/event/value.rs +++ b/lib/vector-core/src/event/value.rs @@ -103,10 +103,8 @@ impl ByteSizeOf for Value { fn allocated_bytes(&self) -> usize { match self { Value::Bytes(bytes) => bytes.len(), - Value::Map(map) => map - .iter() - .fold(0, |acc, (k, v)| acc + k.len() + v.size_of()), - Value::Array(arr) => arr.iter().fold(0, |acc, v| acc + v.size_of()), + Value::Map(map) => map.size_of(), + Value::Array(arr) => arr.size_of(), _ => 0, } } diff --git a/src/sinks/sematext/metrics.rs b/src/sinks/sematext/metrics.rs index d051cae71d375..6ec31b05cb144 100644 --- a/src/sinks/sematext/metrics.rs +++ b/src/sinks/sematext/metrics.rs @@ -221,7 +221,7 @@ fn encode_events( metrics: Vec, ) -> EncodedEvent { let mut output = String::new(); - let byte_size = metrics.iter().map(|metric| metric.size_of()).sum(); + let byte_size = metrics.size_of(); for metric in metrics.into_iter() { let (series, data, _metadata) = metric.into_parts(); let namespace = series diff --git a/src/sources/apache_metrics/mod.rs b/src/sources/apache_metrics/mod.rs index 553b9eee3e12f..964dd8341faa0 100644 --- a/src/sources/apache_metrics/mod.rs +++ b/src/sources/apache_metrics/mod.rs @@ -216,9 +216,8 @@ fn apache_metrics( }) .collect::>(); - let byte_size = metrics.iter().map(ByteSizeOf::size_of).sum(); emit!(&ApacheMetricsEventsReceived { - byte_size, + byte_size: metrics.size_of(), count: metrics.len(), endpoint: &sanitized_url, }); diff --git a/src/sources/util/http/prelude.rs b/src/sources/util/http/prelude.rs index 48304754e51b4..2ecef98081289 100644 --- a/src/sources/util/http/prelude.rs +++ b/src/sources/util/http/prelude.rs @@ -101,7 +101,7 @@ pub trait HttpSource: Clone + Send + Sync + 'static { .map(|events| { emit!(&HttpEventsReceived { count: events.len(), - byte_size: events.iter().map(|event| event.size_of()).sum(), + byte_size: events.size_of(), http_path, protocol, }); diff --git a/src/topology/builder.rs b/src/topology/builder.rs index 60c0c66c9564c..0860881313cc1 100644 --- a/src/topology/builder.rs +++ b/src/topology/builder.rs @@ -229,7 +229,7 @@ pub async fn build_pieces( .inspect(|events| { emit!(&EventsReceived { count: events.len(), - byte_size: events.iter().map(|e| e.size_of()).sum(), + byte_size: events.size_of(), }); }) .flat_map(move |events| { @@ -242,7 +242,7 @@ pub async fn build_pieces( } emit!(&EventsSent { count: output.len(), - byte_size: output.iter().map(|event| event.size_of()).sum(), + byte_size: output.size_of(), }); stream::iter(output.into_iter()).map(Ok) }) @@ -278,8 +278,7 @@ pub async fn build_pieces( // TODO: account for error outputs separately? emit!(&EventsSent { count: buf.len() + err_buf.len(), - byte_size: buf.iter().map(|event| event.size_of()).sum::() - + err_buf.iter().map(|event| event.size_of()).sum::(), + byte_size: buf.size_of() + err_buf.size_of(), }); for event in buf { From e3b3fe14cab2bc1149fe5981a598ff250cd6be01 Mon Sep 17 00:00:00 2001 From: Will <43912346+001wwang@users.noreply.github.com> Date: Fri, 15 Oct 2021 17:16:42 -0400 Subject: [PATCH 21/38] enhancement(observability, splunk_hec source): Instrument splunk_hec source according to component spec (#9586) * Update SplunkHecEventsReceived to match spec requirements Signed-off-by: 001wwang * Add component_received_event_bytes_total to splunk_hec source cue Signed-off-by: 001wwang * Update splunk hec error events to match spec requirements Signed-off-by: 001wwang * Add component_errors_total to splunk_hec source cue Signed-off-by: 001wwang * Add SplunkHecBytesReceived event Signed-off-by: 001wwang * Add component_received_bytes_total to splunk_hec source cue Signed-off-by: 001wwang * Add metric tags to component_received_bytes_total Signed-off-by: 001wwang * Enable check-events on splunk_hec internal events Signed-off-by: 001wwang * Sort splunk_hec cue metrics alphabetically Signed-off-by: 001wwang * Replace SplunkHecEventsReceived with EventsReceived Signed-off-by: 001wwang * Replace SplunkHecBytesReceived with HttpBytesReceived Signed-off-by: 001wwang * Update SplunkEventEncodeError Signed-off-by: 001wwang * Rename HTTP_SOURCE_TAGS const to HTTP_POLL_SOURCE_TAGS Signed-off-by: 001wwang * Add component instrumentation test assertions and HTTP_PUSH_SOURCE_TAGS const Signed-off-by: 001wwang * Fix formatting errors Signed-off-by: 001wwang * Resolve cfg compilation error Signed-off-by: 001wwang * Rename POLL to PUSH for SOURCE_TAGS Signed-off-by: 001wwang * Consolidate protocol determination logic Signed-off-by: 001wwang * Fix unused code error Signed-off-by: 001wwang * Update components::init to init_test Signed-off-by: 001wwang * Remove unused trace_init import Signed-off-by: 001wwang * Refactor get_protocol to http_protocol_name method Signed-off-by: 001wwang --- src/internal_events/mod.rs | 3 +- src/internal_events/splunk_hec.rs | 32 +++-- src/sinks/prometheus/exporter.rs | 5 +- src/sources/apache_metrics/mod.rs | 4 +- src/sources/prometheus/remote_write.rs | 5 +- src/sources/splunk_hec.rs | 110 +++++++++--------- src/sources/util/http/prelude.rs | 5 +- src/test_util/components.rs | 7 +- src/tls/settings.rs | 7 ++ .../components/sources/internal_metrics.cue | 1 + .../components/sources/splunk_hec.cue | 11 +- 11 files changed, 98 insertions(+), 92 deletions(-) diff --git a/src/internal_events/mod.rs b/src/internal_events/mod.rs index 358a59b3d107e..40dc13d9feaee 100644 --- a/src/internal_events/mod.rs +++ b/src/internal_events/mod.rs @@ -212,7 +212,8 @@ pub(crate) use self::host_metrics::*; feature = "sources-utils-http", feature = "sources-utils-http-encoding", feature = "sinks-http", - feature = "sources-datadog" + feature = "sources-datadog", + feature = "sources-splunk_hec", ))] pub(crate) use self::http::*; #[cfg(all(unix, feature = "sources-journald"))] diff --git a/src/internal_events/splunk_hec.rs b/src/internal_events/splunk_hec.rs index c938eddbad535..43d7f639e33f0 100644 --- a/src/internal_events/splunk_hec.rs +++ b/src/internal_events/splunk_hec.rs @@ -1,5 +1,3 @@ -// ## skip check-events ## - use crate::event::metric::{MetricKind, MetricValue}; use metrics::counter; use serde_json::Error; @@ -29,11 +27,14 @@ impl InternalEvent for SplunkEventEncodeError { error!( message = "Error encoding Splunk HEC event to JSON.", error = ?self.error, + error_type = "encode_failed", + stage = "processing", internal_log_rate_secs = 30, ); } fn emit_metrics(&self) { + counter!("component_errors_total", 1, "error_type" => "encode_failed", "stage" => "processing"); counter!("encode_errors_total", 1); } } @@ -65,20 +66,6 @@ mod source { use metrics::counter; use vector_core::internal_event::InternalEvent; - #[derive(Debug)] - pub struct SplunkHecEventReceived; - - impl InternalEvent for SplunkHecEventReceived { - fn emit_logs(&self) { - trace!(message = "Received one event."); - } - - fn emit_metrics(&self) { - counter!("component_received_events_total", 1); - counter!("events_in_total", 1); - } - } - #[derive(Debug)] pub struct SplunkHecRequestReceived<'a> { pub path: &'a str, @@ -99,20 +86,24 @@ mod source { } #[derive(Debug)] - pub struct SplunkHecRequestBodyInvalid { + pub struct SplunkHecRequestBodyInvalidError { pub error: std::io::Error, } - impl InternalEvent for SplunkHecRequestBodyInvalid { + impl InternalEvent for SplunkHecRequestBodyInvalidError { fn emit_logs(&self) { error!( message = "Invalid request body.", error = ?self.error, + error_type = "parse_failed", + stage = "processing", internal_log_rate_secs = 10 ); } - fn emit_metrics(&self) {} + fn emit_metrics(&self) { + counter!("component_errors_total", 1, "error_type" => "parse_failed", "stage" => "processing") + } } #[derive(Debug)] @@ -125,12 +116,15 @@ mod source { error!( message = "Error processing request.", error = ?self.error, + error_type = "http_error", + stage = "receiving", internal_log_rate_secs = 10 ); } fn emit_metrics(&self) { counter!("http_request_errors_total", 1); + counter!("component_errors_total", 1, "error_type" => "http_error", "stage" => "receiving") } } } diff --git a/src/sinks/prometheus/exporter.rs b/src/sinks/prometheus/exporter.rs index 2eeb8ae3dc782..e36d7e45a486e 100644 --- a/src/sinks/prometheus/exporter.rs +++ b/src/sinks/prometheus/exporter.rs @@ -492,10 +492,7 @@ mod tests { trace_init(); let client_settings = MaybeTlsSettings::from_config(&tls_config, false).unwrap(); - let proto = match &tls_config { - Some(_) => "https", - None => "http", - }; + let proto = client_settings.http_protocol_name(); let address = next_addr(); let config = PrometheusExporterConfig { diff --git a/src/sources/apache_metrics/mod.rs b/src/sources/apache_metrics/mod.rs index 964dd8341faa0..f6c2f18e48a48 100644 --- a/src/sources/apache_metrics/mod.rs +++ b/src/sources/apache_metrics/mod.rs @@ -274,7 +274,7 @@ mod test { use super::*; use crate::{ config::SourceConfig, - test_util::components::{self, HTTP_SOURCE_TAGS, SOURCE_TESTS}, + test_util::components::{self, HTTP_PULL_SOURCE_TAGS, SOURCE_TESTS}, test_util::{collect_ready, next_addr, wait_for_tcp}, Error, }; @@ -368,7 +368,7 @@ Scoreboard: ____S_____I______R____I_______KK___D__C__G_L____________W___________ .map(|e| e.into_metric()) .collect::>(); - SOURCE_TESTS.assert(&HTTP_SOURCE_TAGS); + SOURCE_TESTS.assert(&HTTP_PULL_SOURCE_TAGS); match metrics.iter().find(|m| m.name() == "up") { Some(m) => { diff --git a/src/sources/prometheus/remote_write.rs b/src/sources/prometheus/remote_write.rs index c3a36ef490983..f136ee0edbd63 100644 --- a/src/sources/prometheus/remote_write.rs +++ b/src/sources/prometheus/remote_write.rs @@ -111,6 +111,7 @@ mod test { config::{SinkConfig, SinkContext}, sinks::prometheus::remote_write::RemoteWriteConfig, test_util::{self, components}, + tls::MaybeTlsSettings, Pipeline, }; use chrono::{SubsecRound as _, Utc}; @@ -137,7 +138,9 @@ mod test { let address = test_util::next_addr(); let (tx, rx) = Pipeline::new_test_finalize(EventStatus::Delivered); - let proto = if tls.is_none() { "http" } else { "https" }; + let proto = MaybeTlsSettings::from_config(&tls, true) + .unwrap() + .http_protocol_name(); let source = PrometheusRemoteWriteConfig { address, auth: None, diff --git a/src/sources/splunk_hec.rs b/src/sources/splunk_hec.rs index 6e01ff6593b57..81771410141a6 100644 --- a/src/sources/splunk_hec.rs +++ b/src/sources/splunk_hec.rs @@ -2,7 +2,7 @@ use crate::{ config::{log_schema, DataType, Resource, SourceConfig, SourceContext, SourceDescription}, event::{Event, LogEvent, Value}, internal_events::{ - SplunkHecEventReceived, SplunkHecRequestBodyInvalid, SplunkHecRequestError, + EventsReceived, HttpBytesReceived, SplunkHecRequestBodyInvalidError, SplunkHecRequestError, SplunkHecRequestReceived, }, tls::{MaybeTlsSettings, TlsConfig}, @@ -22,6 +22,7 @@ use std::{ io::Read, net::{Ipv4Addr, SocketAddr}, }; +use vector_core::ByteSizeOf; use warp::{filters::BoxedFilter, path, reject::Rejection, reply::Response, Filter, Reply}; @@ -80,7 +81,8 @@ fn default_socket_address() -> SocketAddr { #[typetag::serde(name = "splunk_hec")] impl SourceConfig for SplunkConfig { async fn build(&self, cx: SourceContext) -> crate::Result { - let source = SplunkSource::new(self); + let tls = MaybeTlsSettings::from_config(&self.tls, true)?; + let source = SplunkSource::new(self, tls.http_protocol_name()); let event_service = source.event_service(cx.out.clone()); let raw_service = source.raw_service(cx.out); @@ -108,7 +110,6 @@ impl SourceConfig for SplunkConfig { ) .or_else(finish_err); - let tls = MaybeTlsSettings::from_config(&self.tls, true)?; let listener = tls.bind(&self.address).await?; let shutdown = cx.shutdown; @@ -141,10 +142,11 @@ impl SourceConfig for SplunkConfig { /// Shared data for responding to requests. struct SplunkSource { valid_credentials: Vec, + protocol: &'static str, } impl SplunkSource { - fn new(config: &SplunkConfig) -> Self { + fn new(config: &SplunkConfig, protocol: &'static str) -> Self { let valid_tokens = config .valid_tokens .iter() @@ -154,6 +156,7 @@ impl SplunkSource { valid_credentials: valid_tokens .map(|token| format!("Splunk {}", token)) .collect(), + protocol, } } @@ -166,6 +169,7 @@ impl SplunkSource { .and(splunk_channel_query_param) .map(|header: Option, query_param| header.or(query_param)); + let protocol = self.protocol; warp::post() .and(path!("event").or(path!("event" / "1.0"))) .and(self.authorization()) @@ -174,6 +178,7 @@ impl SplunkSource { .and(warp::header::optional::("X-Forwarded-For")) .and(self.gzip()) .and(warp::body::bytes()) + .and(warp::path::full()) .and_then( move |_, _, @@ -181,10 +186,16 @@ impl SplunkSource { remote: Option, xff: Option, gzip: bool, - body: Bytes| { + body: Bytes, + path: warp::path::FullPath| { let mut out = out .clone() .sink_map_err(|_| Rejection::from(ApiError::ServerShutdown)); + emit!(&HttpBytesReceived { + byte_size: body.len(), + http_path: path.as_str(), + protocol, + }); async move { let reader: Box = if gzip { Box::new(MultiGzDecoder::new(body.reader())) @@ -226,6 +237,7 @@ impl SplunkSource { .ok_or_else(|| Rejection::from(ApiError::MissingChannel)) }); + let protocol = self.protocol; warp::post() .and(path!("raw" / "1.0").or(path!("raw"))) .and(self.authorization()) @@ -234,6 +246,7 @@ impl SplunkSource { .and(warp::header::optional::("X-Forwarded-For")) .and(self.gzip()) .and(warp::body::bytes()) + .and(warp::path::full()) .and_then( move |_, _, @@ -241,8 +254,14 @@ impl SplunkSource { remote: Option, xff: Option, gzip: bool, - body: Bytes| { + body: Bytes, + path: warp::path::FullPath| { let out = out.clone(); + emit!(&HttpBytesReceived { + byte_size: body.len(), + http_path: path.as_str(), + protocol, + }); async move { let event = future::ready(raw_event(body, gzip, channel, remote, xff)); futures::stream::once(event) @@ -470,7 +489,10 @@ impl<'de, R: JsonRead<'de>> EventIterator<'de, R> { de.extract(log, &mut json); } - emit!(&SplunkHecEventReceived); + emit!(&EventsReceived { + count: 1, + byte_size: event.size_of(), + }); self.events += 1; Ok(event) @@ -491,7 +513,7 @@ impl<'de, R: JsonRead<'de>> Iterator for EventIterator<'de, R> { } } Some(Err(error)) => { - emit!(&SplunkHecRequestBodyInvalid { + emit!(&SplunkHecRequestBodyInvalidError { error: error.into() }); Some(Err( @@ -599,7 +621,7 @@ fn raw_event( Ok(0) => return Err(ApiError::NoData.into()), Ok(_) => Value::from(Bytes::from(data)), Err(error) => { - emit!(&SplunkHecRequestBodyInvalid { error }); + emit!(&SplunkHecRequestBodyInvalidError { error }); return Err(ApiError::InvalidDataFormat { event: 0 }.into()); } } @@ -634,7 +656,10 @@ fn raw_event( .as_mut_log() .try_insert(log_schema().source_type_key(), Bytes::from("splunk_hec")); - emit!(&SplunkHecEventReceived); + emit!(&EventsReceived { + count: 1, + byte_size: event.size_of(), + }); Ok(event) } @@ -772,7 +797,11 @@ mod tests { util::{encoding::EncodingConfig, BatchConfig, Compression, TowerRequestConfig}, Healthcheck, VectorSink, }, - test_util::{collect_n, next_addr, trace_init, wait_for_tcp}, + test_util::{ + collect_n, + components::{self, HTTP_PUSH_SOURCE_TAGS, SOURCE_TESTS}, + next_addr, wait_for_tcp, + }, Pipeline, }; use chrono::{TimeZone, Utc}; @@ -796,6 +825,7 @@ mod tests { token: Option, valid_tokens: Option<&[&str]>, ) -> (mpsc::Receiver, SocketAddr) { + components::init_test(); let (sender, recv) = Pipeline::new_test(); let address = next_addr(); let valid_tokens = @@ -865,6 +895,7 @@ mod tests { }); let events = collect_n(source, n).await; + SOURCE_TESTS.assert(&HTTP_PUSH_SOURCE_TAGS); assert_eq!(n, events.len()); events @@ -925,8 +956,6 @@ mod tests { #[tokio::test] async fn no_compression_text_event() { - trace_init(); - let message = "gzip_text_event"; let (sink, source) = start(Encoding::Text, Compression::None).await; @@ -942,8 +971,6 @@ mod tests { #[tokio::test] async fn one_simple_text_event() { - trace_init(); - let message = "one_simple_text_event"; let (sink, source) = start(Encoding::Text, Compression::gzip_default()).await; @@ -959,8 +986,6 @@ mod tests { #[tokio::test] async fn multiple_simple_text_event() { - trace_init(); - let n = 200; let (sink, source) = start(Encoding::Text, Compression::None).await; @@ -981,8 +1006,6 @@ mod tests { #[tokio::test] async fn one_simple_json_event() { - trace_init(); - let message = "one_simple_json_event"; let (sink, source) = start(Encoding::Json, Compression::gzip_default()).await; @@ -998,8 +1021,6 @@ mod tests { #[tokio::test] async fn multiple_simple_json_event() { - trace_init(); - let n = 200; let (sink, source) = start(Encoding::Json, Compression::gzip_default()).await; @@ -1020,8 +1041,6 @@ mod tests { #[tokio::test] async fn json_event() { - trace_init(); - let (sink, source) = start(Encoding::Json, Compression::gzip_default()).await; let mut event = Event::new_empty_log(); @@ -1041,8 +1060,6 @@ mod tests { #[tokio::test] async fn line_to_message() { - trace_init(); - let (sink, source) = start(Encoding::Json, Compression::gzip_default()).await; let mut event = Event::new_empty_log(); @@ -1055,14 +1072,13 @@ mod tests { #[tokio::test] async fn raw() { - trace_init(); - let message = "raw"; let (source, address) = source().await; assert_eq!(200, post(address, "services/collector/raw", message).await); let event = collect_n(source, 1).await.remove(0); + SOURCE_TESTS.assert(&HTTP_PUSH_SOURCE_TAGS); assert_eq!(event.as_log()[log_schema().message_key()], message.into()); assert_eq!(event.as_log()[&super::CHANNEL], "channel".into()); assert!(event.as_log().get(log_schema().timestamp_key()).is_some()); @@ -1074,8 +1090,6 @@ mod tests { #[tokio::test] async fn channel_header() { - trace_init(); - let message = "raw"; let (source, address) = source().await; @@ -1090,13 +1104,12 @@ mod tests { ); let event = collect_n(source, 1).await.remove(0); + SOURCE_TESTS.assert(&HTTP_PUSH_SOURCE_TAGS); assert_eq!(event.as_log()[&super::CHANNEL], "guid".into()); } #[tokio::test] async fn xff_header_raw() { - trace_init(); - let message = "raw"; let (source, address) = source().await; @@ -1111,14 +1124,13 @@ mod tests { ); let event = collect_n(source, 1).await.remove(0); + SOURCE_TESTS.assert(&HTTP_PUSH_SOURCE_TAGS); assert_eq!(event.as_log()[log_schema().host_key()], "10.0.0.1".into()); } // Test helps to illustrate that a payload's `host` value should override an x-forwarded-for header #[tokio::test] async fn xff_header_event_with_host_field() { - trace_init(); - let message = r#"{"event":"first", "host": "10.1.0.2"}"#; let (source, address) = source().await; @@ -1133,14 +1145,13 @@ mod tests { ); let event = collect_n(source, 1).await.remove(0); + SOURCE_TESTS.assert(&HTTP_PUSH_SOURCE_TAGS); assert_eq!(event.as_log()[log_schema().host_key()], "10.1.0.2".into()); } // Test helps to illustrate that a payload's `host` value should override an x-forwarded-for header #[tokio::test] async fn xff_header_event_without_host_field() { - trace_init(); - let message = r#"{"event":"first", "color": "blue"}"#; let (source, address) = source().await; @@ -1155,13 +1166,12 @@ mod tests { ); let event = collect_n(source, 1).await.remove(0); + SOURCE_TESTS.assert(&HTTP_PUSH_SOURCE_TAGS); assert_eq!(event.as_log()[log_schema().host_key()], "10.0.0.1".into()); } #[tokio::test] async fn channel_query_param() { - trace_init(); - let message = "raw"; let (source, address) = source().await; @@ -1176,13 +1186,12 @@ mod tests { ); let event = collect_n(source, 1).await.remove(0); + SOURCE_TESTS.assert(&HTTP_PUSH_SOURCE_TAGS); assert_eq!(event.as_log()[&super::CHANNEL], "guid".into()); } #[tokio::test] async fn no_data() { - trace_init(); - let (_source, address) = source().await; assert_eq!(400, post(address, "services/collector/event", "").await); @@ -1190,8 +1199,6 @@ mod tests { #[tokio::test] async fn invalid_token() { - trace_init(); - let (_source, address) = source().await; let opts = SendWithOpts { channel: Some(Channel::Header("channel")), @@ -1206,8 +1213,6 @@ mod tests { #[tokio::test] async fn secondary_token() { - trace_init(); - let message = r#"{"event":"first", "color": "blue"}"#; let (_source, address) = source_with(None, Some(VALID_TOKENS)).await; let options = SendWithOpts { @@ -1226,12 +1231,11 @@ mod tests { ) .await ); + SOURCE_TESTS.assert(&HTTP_PUSH_SOURCE_TAGS); } #[tokio::test] async fn no_authorization() { - trace_init(); - let message = "no_authorization"; let (source, address) = source_with(None, None).await; let (sink, health) = sink(address, Encoding::Text, Compression::gzip_default()).await; @@ -1239,13 +1243,12 @@ mod tests { let event = channel_n(vec![message], sink, source).await.remove(0); + SOURCE_TESTS.assert(&HTTP_PUSH_SOURCE_TAGS); assert_eq!(event.as_log()[log_schema().message_key()], message.into()); } #[tokio::test] async fn partial() { - trace_init(); - let message = r#"{"event":"first"}{"event":"second""#; let (source, address) = source().await; @@ -1255,6 +1258,7 @@ mod tests { ); let event = collect_n(source, 1).await.remove(0); + SOURCE_TESTS.assert(&HTTP_PUSH_SOURCE_TAGS); assert_eq!(event.as_log()[log_schema().message_key()], "first".into()); assert!(event.as_log().get(log_schema().timestamp_key()).is_some()); assert_eq!( @@ -1265,8 +1269,6 @@ mod tests { #[tokio::test] async fn handles_newlines() { - trace_init(); - let message = r#" {"event":"first"} "#; @@ -1278,6 +1280,7 @@ mod tests { ); let event = collect_n(source, 1).await.remove(0); + SOURCE_TESTS.assert(&HTTP_PUSH_SOURCE_TAGS); assert_eq!(event.as_log()[log_schema().message_key()], "first".into()); assert!(event.as_log().get(log_schema().timestamp_key()).is_some()); assert_eq!( @@ -1288,8 +1291,6 @@ mod tests { #[tokio::test] async fn handles_spaces() { - trace_init(); - let message = r#" {"event":"first"} "#; let (source, address) = source().await; @@ -1299,6 +1300,7 @@ mod tests { ); let event = collect_n(source, 1).await.remove(0); + SOURCE_TESTS.assert(&HTTP_PUSH_SOURCE_TAGS); assert_eq!(event.as_log()[log_schema().message_key()], "first".into()); assert!(event.as_log().get(log_schema().timestamp_key()).is_some()); assert_eq!( @@ -1309,8 +1311,6 @@ mod tests { #[tokio::test] async fn default() { - trace_init(); - let message = r#"{"event":"first","source":"main"}{"event":"second"}{"event":"third","source":"secondary"}"#; let (source, address) = source().await; @@ -1321,6 +1321,7 @@ mod tests { let events = collect_n(source, 3).await; + SOURCE_TESTS.assert(&HTTP_PUSH_SOURCE_TAGS); assert_eq!( events[0].as_log()[log_schema().message_key()], "first".into() @@ -1379,13 +1380,12 @@ mod tests { /// https://github.com/seanmonstar/warp/pull/713 #[tokio::test] async fn host_test() { - trace_init(); - let message = "for the host"; let (sink, source) = start(Encoding::Text, Compression::gzip_default()).await; let event = channel_n(vec![message], sink, source).await.remove(0); + SOURCE_TESTS.assert(&HTTP_PUSH_SOURCE_TAGS); assert_eq!(event.as_log()[log_schema().message_key()], message.into()); assert!(event.as_log().get(log_schema().host_key()).is_none()); } diff --git a/src/sources/util/http/prelude.rs b/src/sources/util/http/prelude.rs index 2ecef98081289..4aaf85863d5d7 100644 --- a/src/sources/util/http/prelude.rs +++ b/src/sources/util/http/prelude.rs @@ -42,10 +42,7 @@ pub trait HttpSource: Clone + Send + Sync + 'static { cx: SourceContext, ) -> crate::Result { let tls = MaybeTlsSettings::from_config(tls, true)?; - let protocol = match &tls { - MaybeTlsSettings::Raw { .. } => "http", - MaybeTlsSettings::Tls { .. } => "https", - }; + let protocol = tls.http_protocol_name(); let auth = HttpSourceAuth::try_from(auth.as_ref())?; let path = path.to_owned(); let out = cx.out; diff --git a/src/test_util/components.rs b/src/test_util/components.rs index b70b3f326195e..e2d683f791b1b 100644 --- a/src/test_util/components.rs +++ b/src/test_util/components.rs @@ -19,8 +19,11 @@ thread_local!( static EVENTS_RECORDED: RefCell> = RefCell::new(Default::default()); ); -/// The standard set of tags for sources that communicate over HTTP. -pub const HTTP_SOURCE_TAGS: [&str; 2] = ["endpoint", "protocol"]; +/// The standard set of tags for sources that poll connections over HTTP. +pub const HTTP_PULL_SOURCE_TAGS: [&str; 2] = ["endpoint", "protocol"]; + +/// The standard set of tags for sources that accept connections over HTTP. +pub const HTTP_PUSH_SOURCE_TAGS: [&str; 2] = ["http_path", "protocol"]; /// The standard set of tags for all `TcpSource`-based sources. pub const TCP_SOURCE_TAGS: [&str; 2] = ["peer_addr", "protocol"]; diff --git a/src/tls/settings.rs b/src/tls/settings.rs index a1a67bfd5a407..07e5fea6c27ca 100644 --- a/src/tls/settings.rs +++ b/src/tls/settings.rs @@ -383,6 +383,13 @@ impl MaybeTlsSettings { } } } + + pub const fn http_protocol_name(&self) -> &'static str { + match self { + MaybeTls::Raw(_) => "http", + MaybeTls::Tls(_) => "https", + } + } } impl From for MaybeTlsSettings { diff --git a/website/cue/reference/components/sources/internal_metrics.cue b/website/cue/reference/components/sources/internal_metrics.cue index 29dede6c76623..9c6dfec1a2409 100644 --- a/website/cue/reference/components/sources/internal_metrics.cue +++ b/website/cue/reference/components/sources/internal_metrics.cue @@ -1148,6 +1148,7 @@ components: sources: internal_metrics: { required: true enum: { "delete_failed": "The file deletion failed." + "encode_failed": "The encode operation failed." "field_missing": "The event field was missing." "glob_failed": "The glob pattern match operation failed." "http_error": "The HTTP request resulted in an error code." diff --git a/website/cue/reference/components/sources/splunk_hec.cue b/website/cue/reference/components/sources/splunk_hec.cue index b064f4f99f011..f7ebac6d1b5c3 100644 --- a/website/cue/reference/components/sources/splunk_hec.cue +++ b/website/cue/reference/components/sources/splunk_hec.cue @@ -112,9 +112,12 @@ components: sources: splunk_hec: { } telemetry: metrics: { - events_in_total: components.sources.internal_metrics.output.metrics.events_in_total - http_request_errors_total: components.sources.internal_metrics.output.metrics.http_request_errors_total - component_received_events_total: components.sources.internal_metrics.output.metrics.component_received_events_total - requests_received_total: components.sources.internal_metrics.output.metrics.requests_received_total + component_errors_total: components.sources.internal_metrics.output.metrics.component_errors_total + component_received_bytes_total: components.sources.internal_metrics.output.metrics.component_received_bytes_total + component_received_event_bytes_total: components.sources.internal_metrics.output.metrics.component_received_event_bytes_total + component_received_events_total: components.sources.internal_metrics.output.metrics.component_received_events_total + events_in_total: components.sources.internal_metrics.output.metrics.events_in_total + http_request_errors_total: components.sources.internal_metrics.output.metrics.http_request_errors_total + requests_received_total: components.sources.internal_metrics.output.metrics.requests_received_total } } From a0ca04c30316fa9771ab142ab08a2e52c2b76cdd Mon Sep 17 00:00:00 2001 From: Pablo Sichert Date: Sat, 16 Oct 2021 03:38:26 +0200 Subject: [PATCH 22/38] fix(codecs): Add precedence for event data over metadata (#9641) * Add precedence for event data over metadata Signed-off-by: Pablo Sichert * Fix `try_insert_flat` and test `*_insert_*` functions Signed-off-by: Pablo Sichert * Do not include timestamp in `bytes` parser Signed-off-by: Pablo Sichert * Take precendence for datadog timestamp rather than new timestamp Signed-off-by: Pablo Sichert * Fix `bytes` parser test assertion Signed-off-by: Pablo Sichert * Explicitly add timestamp in HTTP source Signed-off-by: Pablo Sichert * Add source type and timestamp to all sources Signed-off-by: Pablo Sichert --- lib/vector-core/src/event/log_event.rs | 119 ++++++++++++++++++- src/codecs/parsers/bytes.rs | 12 +- src/sources/aws_kinesis_firehose/handlers.rs | 10 +- src/sources/aws_kinesis_firehose/mod.rs | 2 + src/sources/aws_s3/sqs.rs | 1 + src/sources/datadog/agent.rs | 16 +-- src/sources/exec/mod.rs | 17 ++- src/sources/fluent.rs | 2 +- src/sources/generator.rs | 13 +- src/sources/heroku_logs.rs | 14 ++- src/sources/http.rs | 15 +-- src/sources/internal_logs.rs | 4 + src/sources/kafka.rs | 14 +-- src/sources/kubernetes_logs/mod.rs | 14 +-- src/sources/logstash.rs | 7 +- src/sources/nats.rs | 8 +- src/sources/socket/tcp.rs | 14 ++- src/sources/socket/udp.rs | 13 +- src/sources/socket/unix.rs | 12 +- src/sources/stdin.rs | 8 +- 20 files changed, 225 insertions(+), 90 deletions(-) diff --git a/lib/vector-core/src/event/log_event.rs b/lib/vector-core/src/event/log_event.rs index b480fd3e74313..fd8a2d830240c 100644 --- a/lib/vector-core/src/event/log_event.rs +++ b/lib/vector-core/src/event/log_event.rs @@ -113,6 +113,14 @@ impl LogEvent { util::log::insert(self.as_map_mut(), key.as_ref(), value.into()) } + #[instrument(level = "trace", skip(self, key), fields(key = %key.as_ref()))] + pub fn try_insert(&mut self, key: impl AsRef, value: impl Into + Debug) { + let key = key.as_ref(); + if !self.contains(key) { + self.insert(key, value); + } + } + #[instrument(level = "trace", skip(self, key), fields(key = ?key))] pub fn insert_path(&mut self, key: Vec, value: V) -> Option where @@ -149,19 +157,19 @@ impl LogEvent { /// pathing information in the key. It will insert over the top of any value /// that exists in the map already. #[instrument(level = "trace", skip(self, key), fields(key = %key))] - pub fn insert_flat(&mut self, key: K, value: V) + pub fn insert_flat(&mut self, key: K, value: V) -> Option where K: Into + Display, V: Into + Debug, { - self.as_map_mut().insert(key.into(), value.into()); + self.as_map_mut().insert(key.into(), value.into()) } #[instrument(level = "trace", skip(self, key), fields(key = %key.as_ref()))] - pub fn try_insert(&mut self, key: impl AsRef, value: impl Into + Debug) { + pub fn try_insert_flat(&mut self, key: impl AsRef, value: impl Into + Debug) { let key = key.as_ref(); - if !self.contains(key) { - self.insert(key, value); + if !self.as_map().contains_key(key) { + self.insert_flat(key, value); } } @@ -549,6 +557,107 @@ mod test { assert_eq!(expected_fields, actual_fields); } + #[test] + fn insert() { + let mut log = LogEvent::default(); + + let old = log.insert("foo", "foo"); + + assert_eq!(log.get("foo"), Some(&"foo".into())); + assert_eq!(old, None); + } + + #[test] + fn insert_existing() { + let mut log = LogEvent::default(); + log.insert("foo", "foo"); + + let old = log.insert("foo", "bar"); + + assert_eq!(log.get("foo"), Some(&"bar".into())); + assert_eq!(old, Some("foo".into())); + } + + #[test] + fn try_insert() { + let mut log = LogEvent::default(); + + log.try_insert("foo", "foo"); + + assert_eq!(log.get("foo"), Some(&"foo".into())); + } + + #[test] + fn try_insert_existing() { + let mut log = LogEvent::default(); + log.insert("foo", "foo"); + + log.try_insert("foo", "bar"); + + assert_eq!(log.get("foo"), Some(&"foo".into())); + } + + #[test] + fn try_insert_dotted() { + let mut log = LogEvent::default(); + + log.try_insert("foo.bar", "foo"); + + assert_eq!(log.get("foo.bar"), Some(&"foo".into())); + assert_eq!(log.get_flat("foo.bar"), None); + } + + #[test] + fn try_insert_existing_dotted() { + let mut log = LogEvent::default(); + log.insert("foo.bar", "foo"); + + log.try_insert("foo.bar", "bar"); + + assert_eq!(log.get("foo.bar"), Some(&"foo".into())); + assert_eq!(log.get_flat("foo.bar"), None); + } + + #[test] + fn try_insert_flat() { + let mut log = LogEvent::default(); + + log.try_insert_flat("foo", "foo"); + + assert_eq!(log.get_flat("foo"), Some(&"foo".into())); + } + + #[test] + fn try_insert_flat_existing() { + let mut log = LogEvent::default(); + log.insert_flat("foo", "foo"); + + log.try_insert_flat("foo", "bar"); + + assert_eq!(log.get_flat("foo"), Some(&"foo".into())); + } + + #[test] + fn try_insert_flat_dotted() { + let mut log = LogEvent::default(); + + log.try_insert_flat("foo.bar", "foo"); + + assert_eq!(log.get_flat("foo.bar"), Some(&"foo".into())); + assert_eq!(log.get("foo.bar"), None); + } + + #[test] + fn try_insert_flat_existing_dotted() { + let mut log = LogEvent::default(); + log.insert_flat("foo.bar", "foo"); + + log.try_insert_flat("foo.bar", "bar"); + + assert_eq!(log.get_flat("foo.bar"), Some(&"foo".into())); + assert_eq!(log.get("foo.bar"), None); + } + // This test iterates over the `tests/data/fixtures/log_event` folder and: // // * Ensures the EventLog parsed from bytes and turned into a diff --git a/src/codecs/parsers/bytes.rs b/src/codecs/parsers/bytes.rs index 3a2f871397b02..a08fdb6b9b76e 100644 --- a/src/codecs/parsers/bytes.rs +++ b/src/codecs/parsers/bytes.rs @@ -1,6 +1,7 @@ use crate::{ codecs::{BoxedParser, Parser, ParserConfig}, - event::Event, + config::log_schema, + event::{Event, LogEvent}, }; use bytes::Bytes; use serde::{Deserialize, Serialize}; @@ -40,11 +41,9 @@ impl BytesParser { impl Parser for BytesParser { fn parse(&self, bytes: Bytes) -> crate::Result> { - // Currently, the `From` implementation from `Bytes` to `Event` adds a - // timestamp to the event. This is not strictly related to parsing and - // should probably better be done explicitly in another place. However, - // many parts in Vector rely on this behavior, so we just leave it here. - Ok(smallvec![bytes.into()]) + let mut log = LogEvent::default(); + log.insert(log_schema().message_key(), bytes); + Ok(smallvec![log.into()]) } } @@ -65,7 +64,6 @@ mod tests { let event = events.next().unwrap(); let log = event.as_log(); assert_eq!(log[log_schema().message_key()], "foo".into()); - assert!(log.get(log_schema().timestamp_key()).is_some()); } assert_eq!(events.next(), None); diff --git a/src/sources/aws_kinesis_firehose/handlers.rs b/src/sources/aws_kinesis_firehose/handlers.rs index 31401ad4ce6a5..c5a130e096f81 100644 --- a/src/sources/aws_kinesis_firehose/handlers.rs +++ b/src/sources/aws_kinesis_firehose/handlers.rs @@ -47,9 +47,13 @@ pub async fn firehose( for mut event in events { if let Event::Log(ref mut log) = event { - log.insert(log_schema().timestamp_key(), request.timestamp); - log.insert("request_id", request_id.to_string()); - log.insert("source_arn", source_arn.to_string()); + log.try_insert( + log_schema().source_type_key(), + Bytes::from("aws_kinesis_firehose"), + ); + log.try_insert(log_schema().timestamp_key(), request.timestamp); + log.try_insert_flat("request_id", request_id.to_string()); + log.try_insert_flat("source_arn", source_arn.to_string()); } out.send(event) diff --git a/src/sources/aws_kinesis_firehose/mod.rs b/src/sources/aws_kinesis_firehose/mod.rs index ecb01e84b0b2f..0e6c07b3dad48 100644 --- a/src/sources/aws_kinesis_firehose/mod.rs +++ b/src/sources/aws_kinesis_firehose/mod.rs @@ -347,6 +347,7 @@ mod tests { assert_event_data_eq!( events, vec![log_event! { + "source_type" => Bytes::from("aws_kinesis_firehose"), "timestamp" => timestamp.trunc_subsecs(3), // AWS sends timestamps as ms "message"=> Bytes::from(expected), "request_id" => request_id, @@ -410,6 +411,7 @@ mod tests { assert_event_data_eq!( events, vec![log_event! { + "source_type" => Bytes::from("aws_kinesis_firehose"), "timestamp" => timestamp.trunc_subsecs(3), // AWS sends timestamps as ms "message"=> record, "request_id" => request_id, diff --git a/src/sources/aws_s3/sqs.rs b/src/sources/aws_s3/sqs.rs index 8921e08eba225..c87411e3d51bc 100644 --- a/src/sources/aws_s3/sqs.rs +++ b/src/sources/aws_s3/sqs.rs @@ -445,6 +445,7 @@ impl IngestorProcess { log.insert_flat("bucket", bucket_name.clone()); log.insert_flat("object", object_key.clone()); log.insert_flat("region", aws_region.clone()); + log.insert_flat(log_schema().source_type_key(), Bytes::from("aws_s3")); log.insert_flat(log_schema().timestamp_key(), timestamp); if let Some(metadata) = &metadata { diff --git a/src/sources/datadog/agent.rs b/src/sources/datadog/agent.rs index f44a140a23882..7371c27ffd7e3 100644 --- a/src/sources/datadog/agent.rs +++ b/src/sources/datadog/agent.rs @@ -265,17 +265,17 @@ impl DatadogAgentSource { Ok(Some((events, _byte_size))) => { for mut event in events { if let Event::Log(ref mut log) = event { - log.insert_flat(self.log_schema_timestamp_key, now); - log.insert_flat( + log.try_insert_flat("status", message.status.clone()); + log.try_insert_flat("timestamp", message.timestamp); + log.try_insert_flat("hostname", message.hostname.clone()); + log.try_insert_flat("service", message.service.clone()); + log.try_insert_flat("ddsource", message.ddsource.clone()); + log.try_insert_flat("ddtags", message.ddtags.clone()); + log.try_insert_flat( self.log_schema_source_type_key, Bytes::from("datadog_agent"), ); - log.insert_flat("status", message.status.clone()); - log.insert_flat("timestamp", message.timestamp); - log.insert_flat("hostname", message.hostname.clone()); - log.insert_flat("service", message.service.clone()); - log.insert_flat("ddsource", message.ddsource.clone()); - log.insert_flat("ddtags", message.ddtags.clone()); + log.try_insert_flat(self.log_schema_timestamp_key, now); if let Some(k) = &api_key { log.metadata_mut().set_datadog_api_key(Some(Arc::clone(k))); } diff --git a/src/sources/exec/mod.rs b/src/sources/exec/mod.rs index 082c87474b8a5..7e5e805fee682 100644 --- a/src/sources/exec/mod.rs +++ b/src/sources/exec/mod.rs @@ -465,32 +465,29 @@ fn handle_event( event: &mut Event, ) { if let Event::Log(log) = event { - let timestamp_key = log_schema().timestamp_key(); - if !log.contains(timestamp_key) { - // Add timestamp - log.insert(timestamp_key, Utc::now()); - } + // Add timestamp + log.try_insert(log_schema().timestamp_key(), Utc::now()); // Add source type - log.insert(log_schema().source_type_key(), Bytes::from(EXEC)); + log.try_insert(log_schema().source_type_key(), Bytes::from(EXEC)); // Add data stream of stdin or stderr (if needed) if let Some(data_stream) = data_stream { - log.insert(STREAM_KEY, data_stream.clone()); + log.try_insert_flat(STREAM_KEY, data_stream.clone()); } // Add pid (if needed) if let Some(pid) = pid { - log.insert(PID_KEY, pid as i64); + log.try_insert_flat(PID_KEY, pid as i64); } // Add hostname (if needed) if let Some(hostname) = hostname { - log.insert(log_schema().host_key(), hostname.clone()); + log.try_insert(log_schema().host_key(), hostname.clone()); } // Add command - log.insert(COMMAND_KEY, config.command.clone()); + log.try_insert_flat(COMMAND_KEY, config.command.clone()); } } diff --git a/src/sources/fluent.rs b/src/sources/fluent.rs index 42d344292c9f6..7ef9fc360db34 100644 --- a/src/sources/fluent.rs +++ b/src/sources/fluent.rs @@ -372,7 +372,7 @@ impl From for LogEvent { log.insert(log_schema().timestamp_key(), timestamp); log.insert("tag", tag); for (key, value) in record.into_iter() { - log.insert_flat(key, value) + log.insert_flat(key, value); } log } diff --git a/src/sources/generator.rs b/src/sources/generator.rs index 556719438d1e4..c1c7e4499c894 100644 --- a/src/sources/generator.rs +++ b/src/sources/generator.rs @@ -1,12 +1,14 @@ use crate::{ codecs::{self, DecodingConfig, FramingConfig, ParserConfig}, - config::{DataType, SourceConfig, SourceContext, SourceDescription}, + config::{log_schema, DataType, SourceConfig, SourceContext, SourceDescription}, internal_events::GeneratorEventProcessed, serde::{default_decoding, default_framing_message_based}, shutdown::ShutdownSignal, sources::util::TcpError, Pipeline, }; +use bytes::Bytes; +use chrono::Utc; use fakedata::logs::*; use futures::{SinkExt, StreamExt}; use rand::seq::SliceRandom; @@ -156,7 +158,14 @@ async fn generator_source( while let Some(next) = stream.next().await { match next { Ok((events, _byte_size)) => { - for event in events { + let now = Utc::now(); + + for mut event in events { + let log = event.as_mut_log(); + + log.try_insert(log_schema().source_type_key(), Bytes::from("generator")); + log.try_insert(log_schema().timestamp_key(), now); + out.send(event) .await .map_err(|_: crate::pipeline::ClosedError| { diff --git a/src/sources/heroku_logs.rs b/src/sources/heroku_logs.rs index 2d8f448bd5770..a7d97e3b9d7de 100644 --- a/src/sources/heroku_logs.rs +++ b/src/sources/heroku_logs.rs @@ -218,13 +218,13 @@ fn line_to_events(mut decoder: codecs::Decoder, line: String) -> SmallVec<[Event for mut event in decoded { if let Event::Log(ref mut log) = event { if let Ok(ts) = timestamp.parse::>() { - log.insert(log_schema().timestamp_key(), ts); + log.try_insert(log_schema().timestamp_key(), ts); } - log.insert(log_schema().host_key(), hostname.to_owned()); + log.try_insert(log_schema().host_key(), hostname.to_owned()); - log.insert("app_name", app_name.to_owned()); - log.insert("proc_id", proc_id.to_owned()); + log.try_insert_flat("app_name", app_name.to_owned()); + log.try_insert_flat("proc_id", proc_id.to_owned()); } events.push(event); @@ -248,10 +248,12 @@ fn line_to_events(mut decoder: codecs::Decoder, line: String) -> SmallVec<[Event events.push(Event::from(line)) }; + let now = Utc::now(); + for event in &mut events { if let Event::Log(log) = event { - // Add source type - log.insert_flat(log_schema().source_type_key(), Bytes::from("heroku_logs")); + log.try_insert(log_schema().source_type_key(), Bytes::from("heroku_logs")); + log.try_insert(log_schema().timestamp_key(), now); } } diff --git a/src/sources/http.rs b/src/sources/http.rs index 5a04db8d9c7aa..a67e8d2256d9c 100644 --- a/src/sources/http.rs +++ b/src/sources/http.rs @@ -15,6 +15,7 @@ use crate::{ tls::TlsConfig, }; use bytes::{Bytes, BytesMut}; +use chrono::Utc; use http::StatusCode; use serde::{Deserialize, Serialize}; use std::{collections::HashMap, net::SocketAddr}; @@ -114,12 +115,12 @@ impl HttpSource for SimpleHttpSource { add_query_parameters(&mut events, &self.query_parameters, query_parameters); add_path(&mut events, self.path_key.as_str(), request_path); - // Add source type - let key_source_type = log_schema().source_type_key(); + let now = Utc::now(); for event in &mut events { - event - .as_mut_log() - .try_insert(key_source_type, Bytes::from("http")); + let log = event.as_mut_log(); + + log.try_insert(log_schema().source_type_key(), Bytes::from("http")); + log.try_insert(log_schema().timestamp_key(), now); } Ok(events) @@ -200,7 +201,7 @@ fn add_path(events: &mut [Event], key: &str, path: &str) { for event in events.iter_mut() { event .as_mut_log() - .insert(key, Value::from(path.to_string())); + .try_insert(key, Value::from(path.to_string())); } } @@ -209,7 +210,7 @@ fn add_headers(events: &mut [Event], headers_config: &[String], headers: HeaderM let value = headers.get(header_name).map(HeaderValue::as_bytes); for event in events.iter_mut() { - event.as_mut_log().insert( + event.as_mut_log().try_insert_flat( header_name as &str, Value::from(value.map(Bytes::copy_from_slice)), ); diff --git a/src/sources/internal_logs.rs b/src/sources/internal_logs.rs index 73f2ff5c9ba03..7d232bd4dd0cf 100644 --- a/src/sources/internal_logs.rs +++ b/src/sources/internal_logs.rs @@ -4,6 +4,8 @@ use crate::{ shutdown::ShutdownSignal, trace, Pipeline, }; +use bytes::Bytes; +use chrono::Utc; use futures::{stream, SinkExt, StreamExt}; use serde::{Deserialize, Serialize}; use tokio::sync::broadcast::error::RecvError; @@ -62,6 +64,8 @@ async fn run( log.insert(host_key.clone(), hostname.to_owned()); } log.insert(pid_key.clone(), pid); + log.try_insert(log_schema().source_type_key(), Bytes::from("internal_logs")); + log.try_insert(log_schema().timestamp_key(), Utc::now()); Ok(Event::from(log)) })) .await?; diff --git a/src/sources/kafka.rs b/src/sources/kafka.rs index 11f151bf240a0..46b49f2a6bae3 100644 --- a/src/sources/kafka.rs +++ b/src/sources/kafka.rs @@ -228,13 +228,13 @@ async fn kafka_source( Ok((mut events, _)) => { let mut event = events.pop().expect("event must exist"); if let Event::Log(ref mut log) = event { - log.insert(schema.source_type_key(), Bytes::from("kafka")); - log.insert(schema.timestamp_key(), timestamp); - log.insert(key_field, msg_key.clone()); - log.insert(topic_key, Value::from(msg_topic.clone())); - log.insert(partition_key, Value::from(msg_partition)); - log.insert(offset_key, Value::from(msg_offset)); - log.insert(headers_key, Value::from(headers_map.clone())); + log.try_insert(schema.source_type_key(), Bytes::from("kafka")); + log.try_insert(schema.timestamp_key(), timestamp); + log.try_insert(key_field, msg_key.clone()); + log.try_insert(topic_key, Value::from(msg_topic.clone())); + log.try_insert(partition_key, Value::from(msg_partition)); + log.try_insert(offset_key, Value::from(msg_offset)); + log.try_insert(headers_key, Value::from(headers_map.clone())); } Some(Some(Ok(event))) diff --git a/src/sources/kubernetes_logs/mod.rs b/src/sources/kubernetes_logs/mod.rs index 89bb23975ca04..99b910e3daaf4 100644 --- a/src/sources/kubernetes_logs/mod.rs +++ b/src/sources/kubernetes_logs/mod.rs @@ -14,14 +14,15 @@ use crate::kubernetes as k8s; use crate::kubernetes::hash_value::HashKey; use crate::{ config::{ - ComponentKey, DataType, GenerateConfig, GlobalOptions, ProxyConfig, SourceConfig, - SourceContext, SourceDescription, + log_schema, ComponentKey, DataType, GenerateConfig, GlobalOptions, ProxyConfig, + SourceConfig, SourceContext, SourceDescription, }, shutdown::ShutdownSignal, sources, transforms::{FunctionTransform, TaskTransform}, }; use bytes::Bytes; +use chrono::Utc; use file_source::{ Checkpointer, FileServer, FileServerShutdown, FingerprintStrategy, Fingerprinter, Line, ReadFrom, @@ -494,19 +495,18 @@ fn create_event(line: Bytes, file: &str, ingestion_timestamp_field: Option<&str> let mut event = LogEvent::from(line); // Add source type. - event.insert( - crate::config::log_schema().source_type_key(), - COMPONENT_ID.to_owned(), - ); + event.insert(log_schema().source_type_key(), COMPONENT_ID.to_owned()); // Add file. event.insert(FILE_KEY, file.to_owned()); // Add ingestion timestamp if requested. if let Some(ingestion_timestamp_field) = ingestion_timestamp_field { - event.insert(ingestion_timestamp_field, chrono::Utc::now()); + event.insert(ingestion_timestamp_field, Utc::now()); } + event.try_insert(log_schema().timestamp_key(), Utc::now()); + event.into() } diff --git a/src/sources/logstash.rs b/src/sources/logstash.rs index ca766a555b8eb..f6bdcba61aed4 100644 --- a/src/sources/logstash.rs +++ b/src/sources/logstash.rs @@ -105,13 +105,11 @@ impl TcpSource for LogstashSource { let now = Value::from(chrono::Utc::now()); for event in events { let log = event.as_mut_log(); - if log.get(log_schema().host_key()).is_none() { - log.insert(log_schema().host_key(), host.clone()); - } + log.try_insert(log_schema().source_type_key(), "logstash"); if log.get(log_schema().timestamp_key()).is_none() { // Attempt to parse @timestamp if it exists; otherwise set to receipt time. let timestamp = log - .get("@timestamp") + .get_flat("@timestamp") .and_then(|timestamp| { self.timestamp_converter .convert::(timestamp.as_bytes()) @@ -120,6 +118,7 @@ impl TcpSource for LogstashSource { .unwrap_or_else(|| now.clone()); log.insert(log_schema().timestamp_key(), timestamp); } + log.try_insert(log_schema().host_key(), host.clone()); } } } diff --git a/src/sources/nats.rs b/src/sources/nats.rs index 656b854d9f9af..1fcb6176208b0 100644 --- a/src/sources/nats.rs +++ b/src/sources/nats.rs @@ -136,12 +136,12 @@ async fn nats_source( count: events.len() }); + let now = Utc::now(); + for mut event in events { if let Event::Log(ref mut log) = event { - log.insert(log_schema().timestamp_key(), Utc::now()); - - // Add source type - log.insert(log_schema().source_type_key(), Bytes::from("nats")); + log.try_insert(log_schema().source_type_key(), Bytes::from("nats")); + log.try_insert(log_schema().timestamp_key(), now); } out.send(event) diff --git a/src/sources/socket/tcp.rs b/src/sources/socket/tcp.rs index 6599b711aa68f..26aa13b36981f 100644 --- a/src/sources/socket/tcp.rs +++ b/src/sources/socket/tcp.rs @@ -1,5 +1,6 @@ use crate::{ codecs::{self, FramingConfig, ParserConfig}, + config::log_schema, event::Event, internal_events::{SocketEventsReceived, SocketMode}, serde::default_decoding, @@ -8,6 +9,7 @@ use crate::{ tls::TlsConfig, }; use bytes::Bytes; +use chrono::Utc; use getset::{CopyGetters, Getters, Setters}; use serde::{Deserialize, Serialize}; use smallvec::SmallVec; @@ -108,17 +110,17 @@ impl TcpSource for RawTcpSource { count: events.len() }); + let now = Utc::now(); + for event in events { if let Event::Log(ref mut log) = event { - log.insert( - crate::config::log_schema().source_type_key(), - Bytes::from("socket"), - ); + log.try_insert(log_schema().source_type_key(), Bytes::from("socket")); + log.try_insert(log_schema().timestamp_key(), now); let host_key = (self.config.host_key.clone()) - .unwrap_or_else(|| crate::config::log_schema().host_key().to_string()); + .unwrap_or_else(|| log_schema().host_key().to_string()); - log.insert(host_key, host.clone()); + log.try_insert(host_key, host.clone()); } } } diff --git a/src/sources/socket/udp.rs b/src/sources/socket/udp.rs index c00676ceee45d..05ef65ac69ee0 100644 --- a/src/sources/socket/udp.rs +++ b/src/sources/socket/udp.rs @@ -1,5 +1,6 @@ use crate::{ codecs::{self, Decoder, FramingConfig, ParserConfig}, + config::log_schema, event::Event, internal_events::{SocketEventsReceived, SocketMode, SocketReceiveError}, serde::{default_decoding, default_framing_message_based}, @@ -8,6 +9,7 @@ use crate::{ udp, Pipeline, }; use bytes::{Bytes, BytesMut}; +use chrono::Utc; use futures::{SinkExt, StreamExt}; use getset::{CopyGetters, Getters}; use serde::{Deserialize, Serialize}; @@ -105,14 +107,13 @@ pub fn udp( count: events.len() }); + let now = Utc::now(); + for mut event in events { if let Event::Log(ref mut log) = event { - log.insert( - crate::config::log_schema().source_type_key(), - Bytes::from("socket"), - ); - - log.insert(host_key.clone(), address.to_string()); + log.try_insert(log_schema().source_type_key(), Bytes::from("socket")); + log.try_insert(log_schema().timestamp_key(), now); + log.try_insert(host_key.clone(), address.to_string()); } tokio::select!{ diff --git a/src/sources/socket/unix.rs b/src/sources/socket/unix.rs index b286b8c6321c1..cb2092f9214b2 100644 --- a/src/sources/socket/unix.rs +++ b/src/sources/socket/unix.rs @@ -1,5 +1,6 @@ use crate::{ codecs::{Decoder, FramingConfig, ParserConfig}, + config::log_schema, event::Event, internal_events::{SocketEventsReceived, SocketMode}, serde::default_decoding, @@ -11,6 +12,7 @@ use crate::{ Pipeline, }; use bytes::Bytes; +use chrono::Utc; use serde::{Deserialize, Serialize}; use std::path::PathBuf; @@ -52,16 +54,16 @@ fn handle_events( count: events.len() }); + let now = Utc::now(); + for event in events { let log = event.as_mut_log(); - log.insert( - crate::config::log_schema().source_type_key(), - Bytes::from("socket"), - ); + log.try_insert(log_schema().source_type_key(), Bytes::from("socket")); + log.try_insert(log_schema().timestamp_key(), now); if let Some(ref host) = received_from { - log.insert(host_key, host.clone()); + log.try_insert(host_key, host.clone()); } } } diff --git a/src/sources/stdin.rs b/src/sources/stdin.rs index 1ccf71e803520..6a38b3feb4f74 100644 --- a/src/sources/stdin.rs +++ b/src/sources/stdin.rs @@ -9,6 +9,7 @@ use crate::{ }; use async_stream::stream; use bytes::Bytes; +use chrono::Utc; use futures::{channel::mpsc, executor, SinkExt, StreamExt}; use serde::{Deserialize, Serialize}; use std::{io, thread}; @@ -125,13 +126,16 @@ where count: events.len() }); + let now = Utc::now(); + for mut event in events { let log = event.as_mut_log(); - log.insert(log_schema().source_type_key(), Bytes::from("stdin")); + log.try_insert(log_schema().source_type_key(), Bytes::from("stdin")); + log.try_insert(log_schema().timestamp_key(), now); if let Some(hostname) = &hostname { - log.insert(&host_key, hostname.clone()); + log.try_insert(&host_key, hostname.clone()); } yield event; From 7e3275bc2f882adbaa1ac04adaaa56e2968d93d4 Mon Sep 17 00:00:00 2001 From: Fred Moyer Date: Sat, 16 Oct 2021 08:14:59 -0700 Subject: [PATCH 23/38] Ruy => Ruby (#9649) Signed-off-by: Fred Moyer --- website/cue/reference/components/sources/file.cue | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/website/cue/reference/components/sources/file.cue b/website/cue/reference/components/sources/file.cue index 0c6246e93c552..8b7dbb3904155 100644 --- a/website/cue/reference/components/sources/file.cue +++ b/website/cue/reference/components/sources/file.cue @@ -437,7 +437,7 @@ components: sources: file: { """ sub_sections: [ { - title: "Example 1: Ruy Exceptions" + title: "Example 1: Ruby Exceptions" body: #""" Ruby exceptions, when logged, consist of multiple lines: From 806e2a381f77ebf4d85d83f3e46b061f0fc41d99 Mon Sep 17 00:00:00 2001 From: Luc Perkins Date: Mon, 18 Oct 2021 07:56:53 -0700 Subject: [PATCH 24/38] Fix dark mode SVG issue (#9653) Signed-off-by: Luc Perkins --- website/assets/sass/unpurged.sass | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/website/assets/sass/unpurged.sass b/website/assets/sass/unpurged.sass index 92e8a91318d75..cb9e5e7596e3c 100644 --- a/website/assets/sass/unpurged.sass +++ b/website/assets/sass/unpurged.sass @@ -42,11 +42,14 @@ svg html &.dark svg - [fill="#000000"] + [fill="#000000"], + [fill="black"] fill: $dark-mode-gray - [stroke="#000000"] + [stroke="#000000"], + [stroke="black"] stroke: $dark-mode-gray - [stop-color="#000000"] + [stop-color="#000000"], + [stop-color="black"] stop-color: $dark-mode-gray [fill="#FFFFFF"], [fill="#FFF"] From 0b8cd826ffb08ef274ef7cf2707fe609ac29edd6 Mon Sep 17 00:00:00 2001 From: Luc Perkins Date: Mon, 18 Oct 2021 07:57:42 -0700 Subject: [PATCH 25/38] Update dark mode highlight color (#9654) Signed-off-by: Luc Perkins --- website/tailwind.config.js | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/website/tailwind.config.js b/website/tailwind.config.js index e47eaa968f49d..8db256c4c47fe 100644 --- a/website/tailwind.config.js +++ b/website/tailwind.config.js @@ -113,9 +113,8 @@ module.exports = { 'border-bottom-color': theme('colors.gray.700'), }, code: { - color: theme('colors.primary', 'currentColor'), + color: theme('colors.gray.100', 'currentColor'), '&:not([class^="language-"])': { - color: theme('colors.gray.100'), 'background-color': theme('colors.gray.700'), } }, From 1d33a3a5414e175c6303a2d9661da7934f5ff0b4 Mon Sep 17 00:00:00 2001 From: Luc Perkins Date: Mon, 18 Oct 2021 08:00:11 -0700 Subject: [PATCH 26/38] Convert VRL code highlight alias to coffee (#9655) Signed-off-by: Luc Perkins --- website/content/en/docs/reference/vrl/_index.md | 2 +- .../reference/components/transforms/add_fields.cue | 2 +- .../cue/reference/components/transforms/add_tags.cue | 2 +- .../components/transforms/ansi_stripper.cue | 2 +- .../cue/reference/components/transforms/coercer.cue | 2 +- .../cue/reference/components/transforms/concat.cue | 2 +- .../reference/components/transforms/grok_parser.cue | 2 +- .../reference/components/transforms/json_parser.cue | 2 +- .../components/transforms/key_value_parser.cue | 2 +- .../components/transforms/logfmt_parser.cue | 2 +- .../reference/components/transforms/regex_parser.cue | 2 +- .../components/transforms/remove_fields.cue | 2 +- .../reference/components/transforms/remove_tags.cue | 2 +- .../components/transforms/rename_fields.cue | 2 +- .../cue/reference/components/transforms/split.cue | 2 +- .../reference/components/transforms/tokenizer.cue | 2 +- website/cue/reference/remap/concepts/event.cue | 4 ++-- .../651_unnecessary_error_coalescing_operation.cue | 2 +- .../cue/reference/remap/expressions/assignment.cue | 8 ++++---- .../reference/remap/expressions/function_call.cue | 12 ++++++------ website/cue/reference/remap/expressions/path.cue | 8 ++++---- 21 files changed, 33 insertions(+), 33 deletions(-) diff --git a/website/content/en/docs/reference/vrl/_index.md b/website/content/en/docs/reference/vrl/_index.md index 9daa9e79b2090..9f29e59eed050 100644 --- a/website/content/en/docs/reference/vrl/_index.md +++ b/website/content/en/docs/reference/vrl/_index.md @@ -92,7 +92,7 @@ Conditions can also be more multifaceted. This condition would filter out all events for which the `severity` field is `"info"`, the `status_code` field is greater than or equal to 400, and the `host` field isn't set: -```vrl +```coffee condition = '.severity != "info" && .status_code < 400 && exists(.host) ``` diff --git a/website/cue/reference/components/transforms/add_fields.cue b/website/cue/reference/components/transforms/add_fields.cue index b21b5d3972f7e..2a8a1542e923d 100644 --- a/website/cue/reference/components/transforms/add_fields.cue +++ b/website/cue/reference/components/transforms/add_fields.cue @@ -31,7 +31,7 @@ components: transforms: add_fields: { """ \(add_fields._remap_deprecation_notice) - ```vrl + ```coffee .severity = "crit" .status = 200 .success_codes = [200, 201, 202, 204] diff --git a/website/cue/reference/components/transforms/add_tags.cue b/website/cue/reference/components/transforms/add_tags.cue index 396987dce7477..12db7ea3bbf9b 100644 --- a/website/cue/reference/components/transforms/add_tags.cue +++ b/website/cue/reference/components/transforms/add_tags.cue @@ -31,7 +31,7 @@ components: transforms: add_tags: { """ \(add_tags._remap_deprecation_notice) - ```vrl + ```coffee #".tag = "value""# ``` """, diff --git a/website/cue/reference/components/transforms/ansi_stripper.cue b/website/cue/reference/components/transforms/ansi_stripper.cue index 1f3a84f5a4f2b..e9d4f019a5108 100644 --- a/website/cue/reference/components/transforms/ansi_stripper.cue +++ b/website/cue/reference/components/transforms/ansi_stripper.cue @@ -34,7 +34,7 @@ components: transforms: ansi_stripper: { """ \(ansi_stripper._remap_deprecation_notice) - ```vrl + ```coffee .message = strip_ansi_escape_codes(.message) ``` """, diff --git a/website/cue/reference/components/transforms/coercer.cue b/website/cue/reference/components/transforms/coercer.cue index ff174bf6df238..a1158db730253 100644 --- a/website/cue/reference/components/transforms/coercer.cue +++ b/website/cue/reference/components/transforms/coercer.cue @@ -34,7 +34,7 @@ components: transforms: coercer: { """ \(coercer._remap_deprecation_notice) - ```vrl + ```coffee .bool = to_bool("false") .float = to_float("1.0") .int = to_int("1") diff --git a/website/cue/reference/components/transforms/concat.cue b/website/cue/reference/components/transforms/concat.cue index d4a33b1362cae..6d451d5bdf27a 100644 --- a/website/cue/reference/components/transforms/concat.cue +++ b/website/cue/reference/components/transforms/concat.cue @@ -34,7 +34,7 @@ components: transforms: concat: { """ \(concat._remap_deprecation_notice) - ```vrl + ```coffee .message = "The severity level is " + .level ``` """, diff --git a/website/cue/reference/components/transforms/grok_parser.cue b/website/cue/reference/components/transforms/grok_parser.cue index 4a258db01e884..59c19a2257724 100644 --- a/website/cue/reference/components/transforms/grok_parser.cue +++ b/website/cue/reference/components/transforms/grok_parser.cue @@ -40,7 +40,7 @@ components: transforms: grok_parser: { """ \(grok_parser._remap_deprecation_notice) - ```vrl + ```coffee .message = parse_grok(.message, "%{TIMESTAMP_ISO8601:timestamp} %{LOGLEVEL:level} %{GREEDYDATA:message}") ``` """, diff --git a/website/cue/reference/components/transforms/json_parser.cue b/website/cue/reference/components/transforms/json_parser.cue index 08b521b7f6d16..dd6fa5850eba6 100644 --- a/website/cue/reference/components/transforms/json_parser.cue +++ b/website/cue/reference/components/transforms/json_parser.cue @@ -40,7 +40,7 @@ components: transforms: json_parser: { """ \(json_parser._remap_deprecation_notice) - ```vrl + ```coffee .message = parse_json(.message) ``` """, diff --git a/website/cue/reference/components/transforms/key_value_parser.cue b/website/cue/reference/components/transforms/key_value_parser.cue index a5b0977b97ae9..eb7182b6cf4b9 100644 --- a/website/cue/reference/components/transforms/key_value_parser.cue +++ b/website/cue/reference/components/transforms/key_value_parser.cue @@ -40,7 +40,7 @@ components: transforms: key_value_parser: { """ \(key_value_parser._remap_deprecation_notice) - ```vrl + ```coffee .message = parse_key_value(.message) ``` """, diff --git a/website/cue/reference/components/transforms/logfmt_parser.cue b/website/cue/reference/components/transforms/logfmt_parser.cue index 479048ca2cf15..1cf41c4c8b677 100644 --- a/website/cue/reference/components/transforms/logfmt_parser.cue +++ b/website/cue/reference/components/transforms/logfmt_parser.cue @@ -40,7 +40,7 @@ components: transforms: logfmt_parser: { """ \(logfmt_parser._remap_deprecation_notice) - ```vrl + ```coffee .message = parse_key_value(.message) ``` """, diff --git a/website/cue/reference/components/transforms/regex_parser.cue b/website/cue/reference/components/transforms/regex_parser.cue index edc7ec6341c5c..79c848ed6dba0 100644 --- a/website/cue/reference/components/transforms/regex_parser.cue +++ b/website/cue/reference/components/transforms/regex_parser.cue @@ -40,7 +40,7 @@ components: transforms: regex_parser: { """ \(regex_parser._remap_deprecation_notice) - ```vrl + ```coffee .message = parse_regex(.message, r'(?P.*?) group') ``` """, diff --git a/website/cue/reference/components/transforms/remove_fields.cue b/website/cue/reference/components/transforms/remove_fields.cue index d023beecce1d4..51962fad43a4b 100644 --- a/website/cue/reference/components/transforms/remove_fields.cue +++ b/website/cue/reference/components/transforms/remove_fields.cue @@ -34,7 +34,7 @@ components: transforms: remove_fields: { """ \(remove_fields._remap_deprecation_notice) - ```vrl + ```coffee del(.level) ``` """, diff --git a/website/cue/reference/components/transforms/remove_tags.cue b/website/cue/reference/components/transforms/remove_tags.cue index 4d6e06ac86f25..fbc93904e18a4 100644 --- a/website/cue/reference/components/transforms/remove_tags.cue +++ b/website/cue/reference/components/transforms/remove_tags.cue @@ -34,7 +34,7 @@ components: transforms: remove_tags: { """ \(remove_tags._remap_deprecation_notice) - ```vrl + ```coffee del(.tag) ``` """, diff --git a/website/cue/reference/components/transforms/rename_fields.cue b/website/cue/reference/components/transforms/rename_fields.cue index 33d8280df3921..c82aaca96b464 100644 --- a/website/cue/reference/components/transforms/rename_fields.cue +++ b/website/cue/reference/components/transforms/rename_fields.cue @@ -34,7 +34,7 @@ components: transforms: rename_fields: { """ \(rename_fields._remap_deprecation_notice) - ```vrl + ```coffee .new_name = del(.old_name) ``` """, diff --git a/website/cue/reference/components/transforms/split.cue b/website/cue/reference/components/transforms/split.cue index 3e7daea09200f..21a43b927916e 100644 --- a/website/cue/reference/components/transforms/split.cue +++ b/website/cue/reference/components/transforms/split.cue @@ -34,7 +34,7 @@ components: transforms: split: { """ \(split._remap_deprecation_notice) - ```vrl + ```coffee .message = split(.message) ``` """, diff --git a/website/cue/reference/components/transforms/tokenizer.cue b/website/cue/reference/components/transforms/tokenizer.cue index e88f1b474422c..2f85a02869c67 100644 --- a/website/cue/reference/components/transforms/tokenizer.cue +++ b/website/cue/reference/components/transforms/tokenizer.cue @@ -40,7 +40,7 @@ components: transforms: tokenizer: { """ \(tokenizer._remap_deprecation_notice) - ```vrl + ```coffee .message = parse_tokens(.message) ``` """, diff --git a/website/cue/reference/remap/concepts/event.cue b/website/cue/reference/remap/concepts/event.cue index 7d1c5dbe966fb..822919348ec9c 100644 --- a/website/cue/reference/remap/concepts/event.cue +++ b/website/cue/reference/remap/concepts/event.cue @@ -4,7 +4,7 @@ remap: concepts: event: { VRL programs operate on observability [events](\(urls.vector_data_model)). This VRL program, for example, adds a field to a log event: - ```vrl + ```coffee .new_field = "new value" ``` @@ -18,7 +18,7 @@ remap: concepts: event: { This expression, for example... - ```vrl + ```coffee . = ["hello", 1, true, { "foo": "bar" }] ``` diff --git a/website/cue/reference/remap/errors/651_unnecessary_error_coalescing_operation.cue b/website/cue/reference/remap/errors/651_unnecessary_error_coalescing_operation.cue index 8ffe52af00ebf..de91e8aaa99bf 100644 --- a/website/cue/reference/remap/errors/651_unnecessary_error_coalescing_operation.cue +++ b/website/cue/reference/remap/errors/651_unnecessary_error_coalescing_operation.cue @@ -10,7 +10,7 @@ remap: errors: "651": { Error coalescing operations are useful when you want to specify what happens if an operation fails. Here's an example: - ```vrl + ```coffee result = op1 ?? op2 ``` diff --git a/website/cue/reference/remap/expressions/assignment.cue b/website/cue/reference/remap/expressions/assignment.cue index 681cfe85ad5b3..80201b96e0b22 100644 --- a/website/cue/reference/remap/expressions/assignment.cue +++ b/website/cue/reference/remap/expressions/assignment.cue @@ -36,20 +36,20 @@ remap: expressions: assignment: { "=": """ Simple assignment operator. Assigns the result from the right-hand side to the left-hand side: - ```vrl + ```coffee .field = "value" ``` """ "|=": """ Object merge assignment operator. Assigns the result of the right-hand side, merged with the left-hand side, to the left-hand side: - ```vrl + ```coffee .field |= {"foo": "bar"} ``` This is equivalent to using the `merge` function: - ```vrl + ```coffee .field = merge(.field, {"foo": "bar"}) ``` @@ -59,7 +59,7 @@ remap: expressions: assignment: { Assigns _only_ if the right-hand side doesn't error. This is useful when invoking fallible functions on the right-hand side: - ```vrl + ```coffee .structured ??= parse_json(.message) ``` """ diff --git a/website/cue/reference/remap/expressions/function_call.cue b/website/cue/reference/remap/expressions/function_call.cue index 1eb2ce837d7a8..893f1ac51ddc0 100644 --- a/website/cue/reference/remap/expressions/function_call.cue +++ b/website/cue/reference/remap/expressions/function_call.cue @@ -28,13 +28,13 @@ remap: expressions: function_call: { `abort` represents a literal `!` that can optionally be used with fallible functions to abort the program when the function fails: - ```vrl + ```coffee result = f!() ``` Otherwise, errors must be handled: - ```vrl + ```coffee result, err = f() ``` @@ -55,7 +55,7 @@ remap: expressions: function_call: { _All_ function arguments in VRL are assigned names, including required leading arguments. Named arguments are suffixed with a colon (`:`), with the value proceeding the name: - ```vrl + ```coffee argument_name: "value" argument_name: (1 + 2) ``` @@ -69,7 +69,7 @@ remap: expressions: function_call: { Function calls support nameless positional arguments. Arguments must be supplied in the order they are documented: - ```vrl + ```coffee f(1, 2) ``` """ @@ -79,13 +79,13 @@ remap: expressions: function_call: { description: """ Function arguments enforce type safety when the type of the value supplied is known: - ```vrl + ```coffee round("not a number") # fails at compile time ``` If the type of the value is not known, you need to handle the potential argument error: - ```vrl + ```coffee number = int(.message) ?? 0 round(number) ``` diff --git a/website/cue/reference/remap/expressions/path.cue b/website/cue/reference/remap/expressions/path.cue index 349cf1bd16292..4fe9b19311169 100644 --- a/website/cue/reference/remap/expressions/path.cue +++ b/website/cue/reference/remap/expressions/path.cue @@ -33,7 +33,7 @@ remap: expressions: path: { description: """ Array elements can be accessed by their index: - ```vrl + ```coffee .array[0] ``` """ @@ -45,7 +45,7 @@ remap: expressions: path: { particularly useful when working with [externally tagged](\(urls.externally_tagged_representation)) data: - ```vrl + ```coffee .grand_parent.(parent1 | parent2).child ``` """ @@ -61,7 +61,7 @@ remap: expressions: path: { description: """ Nested object values are accessed by delimiting each ancestor path with `.`: - ```vrl + ```coffee .parent.child ``` """ @@ -78,7 +78,7 @@ remap: expressions: path: { Path segments can be quoted to include special characters, such as spaces, periods, and others: - ```vrl + ```coffee ."parent.key.with.special \"characters\"".child ``` """# From 8d4af020f031b49237f14ff38e929162a72bcb02 Mon Sep 17 00:00:00 2001 From: Luc Perkins Date: Mon, 18 Oct 2021 08:01:01 -0700 Subject: [PATCH 27/38] Update errors array (#9664) Signed-off-by: Luc Perkins --- lib/vrl/cli/src/repl.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/vrl/cli/src/repl.rs b/lib/vrl/cli/src/repl.rs index 6b40873abc4aa..bd23715e1bac5 100644 --- a/lib/vrl/cli/src/repl.rs +++ b/lib/vrl/cli/src/repl.rs @@ -15,9 +15,9 @@ use vrl::{diagnostic::Formatter, state, value, Runtime, Target, Value}; // Create a list of all possible error values for potential docs lookup lazy_static! { static ref ERRORS: Vec = [ - 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 203, 204, 205, 206, 207, 208, 209, - 601, 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, 312, 313, 314, 400, 401, - 601, 620, 630, 640, 650, 660 + 100, 101, 102, 103, 104, 105, 106, 107, 108, 110, 203, 204, 205, 206, 207, 208, 209, 300, + 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, 312, 313, 314, 400, 401, 402, 403, + 601, 620, 630, 640, 650, 651, 652, 660, 701 ] .iter() .map(|i| i.to_string()) From 5741defd2f25cc77123a18be46d32f8e1ae69925 Mon Sep 17 00:00:00 2001 From: Luc Perkins Date: Mon, 18 Oct 2021 08:02:16 -0700 Subject: [PATCH 28/38] Fix community page template issue (#9657) Signed-off-by: Luc Perkins --- website/layouts/community/section.html | 52 ++++++++++++--------- website/layouts/partials/content.html | 4 +- website/layouts/partials/inner-content.html | 3 ++ 3 files changed, 34 insertions(+), 25 deletions(-) create mode 100644 website/layouts/partials/inner-content.html diff --git a/website/layouts/community/section.html b/website/layouts/community/section.html index 6a81d45ce5485..13df951566179 100644 --- a/website/layouts/community/section.html +++ b/website/layouts/community/section.html @@ -5,35 +5,41 @@ {{ define "main" }} {{ $buttons := .Params.buttons }} {{ $faq := .Params.faq }} -
- {{ partial "hero.html" . }} +
+
+
+ {{ partial "hero.html" . }} +
- + + + {{ end }} +
-
- - {{ partial "heading.html" (dict "text" "FAQ" "level" 2) }} - +
+ + {{ partial "heading.html" (dict "text" "FAQ" "level" 2) }} + - {{ partial "content.html" . }} +
+ {{ partial "inner-content.html" . }} +
+
{{ end }} \ No newline at end of file diff --git a/website/layouts/partials/content.html b/website/layouts/partials/content.html index 721d6b733672d..70cc9d6c4aaf7 100644 --- a/website/layouts/partials/content.html +++ b/website/layouts/partials/content.html @@ -22,7 +22,7 @@

-
- {{ .Content }} +
+ {{ partial "inner-content.html" . }}
\ No newline at end of file diff --git a/website/layouts/partials/inner-content.html b/website/layouts/partials/inner-content.html new file mode 100644 index 0000000000000..ba2f3dcfda9b5 --- /dev/null +++ b/website/layouts/partials/inner-content.html @@ -0,0 +1,3 @@ +
+ {{ .Content }} +
\ No newline at end of file From aeb3629d76732203cd964415e2418b68125738ad Mon Sep 17 00:00:00 2001 From: Luc Perkins Date: Mon, 18 Oct 2021 08:02:58 -0700 Subject: [PATCH 29/38] fix(external docs): Remove TOC from component list pages (#9658) * Remove TOC from component list pages Signed-off-by: Luc Perkins * Update columns Signed-off-by: Luc Perkins --- website/content/en/docs/reference/configuration/sinks/_index.md | 1 + .../content/en/docs/reference/configuration/sources/_index.md | 1 + .../en/docs/reference/configuration/transforms/_index.md | 1 + website/layouts/shortcodes/components.html | 2 +- 4 files changed, 4 insertions(+), 1 deletion(-) diff --git a/website/content/en/docs/reference/configuration/sinks/_index.md b/website/content/en/docs/reference/configuration/sinks/_index.md index 90bfad3fb6cee..e8762bba0262b 100644 --- a/website/content/en/docs/reference/configuration/sinks/_index.md +++ b/website/content/en/docs/reference/configuration/sinks/_index.md @@ -6,6 +6,7 @@ component_type: sinks weight: 3 tags: ["components", "sinks"] aliases: ["/sinks"] +no_toc: true --- ## Available sinks diff --git a/website/content/en/docs/reference/configuration/sources/_index.md b/website/content/en/docs/reference/configuration/sources/_index.md index dd27292a966a9..9e1e768bab876 100644 --- a/website/content/en/docs/reference/configuration/sources/_index.md +++ b/website/content/en/docs/reference/configuration/sources/_index.md @@ -7,6 +7,7 @@ layout: components weight: 1 tags: ["components", "sources"] aliases: ["/sources"] +no_toc: true --- Vector enables you to take in observability data from a wide variety of **sources**. diff --git a/website/content/en/docs/reference/configuration/transforms/_index.md b/website/content/en/docs/reference/configuration/transforms/_index.md index d6e2511c377e6..e253e9d8b90a7 100644 --- a/website/content/en/docs/reference/configuration/transforms/_index.md +++ b/website/content/en/docs/reference/configuration/transforms/_index.md @@ -7,6 +7,7 @@ layout: components weight: 2 tags: ["components", "transforms"] aliases: ["/transforms"] +no_toc: true --- ## Available transforms diff --git a/website/layouts/shortcodes/components.html b/website/layouts/shortcodes/components.html index 0f6d21e8394aa..49bcc04f57a8c 100644 --- a/website/layouts/shortcodes/components.html +++ b/website/layouts/shortcodes/components.html @@ -1,7 +1,7 @@ {{ $kind := .Get 0 }} {{ $componentsOfType := where site.RegularPages ".Params.kind" $kind }}
-
+
{{ range $componentsOfType }} {{ .Render "component-card" }} {{ end }} From 2a2e9c4e6f5a8f881bf15922eaec81514d68825d Mon Sep 17 00:00:00 2001 From: Jesse Szwedko Date: Mon, 18 Oct 2021 10:59:16 -0500 Subject: [PATCH 30/38] fix(observability): Remove duplicate counter emit `events_out_total` (#9668) This is emitted by the `Acker` too so was double counting for sinks instrumented by `EventsSent`. Signed-off-by: Jesse Szwedko --- src/internal_events/common.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/internal_events/common.rs b/src/internal_events/common.rs index b8edbe61aec85..1af6036b1400d 100644 --- a/src/internal_events/common.rs +++ b/src/internal_events/common.rs @@ -63,7 +63,7 @@ impl InternalEvent for EventsSent { fn emit_metrics(&self) { if self.count > 0 { - counter!("events_out_total", self.count as u64); + // events_out_total is emitted by `Acker` counter!("component_sent_events_total", self.count as u64); counter!("component_sent_event_bytes_total", self.byte_size as u64); } From 6683e2713d4cffd55978ea7ad8b34354381a70ac Mon Sep 17 00:00:00 2001 From: Spencer Gilbert Date: Mon, 18 Oct 2021 12:29:38 -0400 Subject: [PATCH 31/38] docs(vrl): Fix split return type (#9672) --- website/cue/reference/remap/functions/split.cue | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/website/cue/reference/remap/functions/split.cue b/website/cue/reference/remap/functions/split.cue index f2cb566ef01ab..c68fbc981a676 100644 --- a/website/cue/reference/remap/functions/split.cue +++ b/website/cue/reference/remap/functions/split.cue @@ -28,7 +28,7 @@ remap: functions: split: { ] internal_failure_reasons: [] return: { - types: ["string"] + types: ["array"] rules: [ "If `limit` is specified, the remainder of the string is returned unsplit after `limit` has been reached.", ] From f36d456ae2fe6dcf105cb128918c922a70deda71 Mon Sep 17 00:00:00 2001 From: Jesse Szwedko Date: Mon, 18 Oct 2021 11:36:29 -0500 Subject: [PATCH 32/38] chore(deps): Ignore RUSTSEC-2020-0071 (#9674) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We have a number of transitive dependencies on v0.1 of time which has no fixed version: ``` ➜ vector git:(ignore-RUSTSEC-2020-0071) ✗ cargo tree -i -p time:0.1.44 time v0.1.44 ├── chrono v0.4.19 │ ├── async-graphql v2.10.0 │ │ ├── async-graphql-warp v2.10.0 │ │ │ └── vector v0.18.0 (/Users/jesse.szwedko/workspace/vector) │ │ ├── vector v0.18.0 (/Users/jesse.szwedko/workspace/vector) │ │ └── vector_core v0.1.0 (/Users/jesse.szwedko/workspace/vector/lib/vector-core) │ │ └── vector v0.18.0 (/Users/jesse.szwedko/workspace/vector) │ ├── azure_core v0.1.0 (https://github.com/Azure/azure-sdk-for-rust.git?rev=16bcf0ab1bb6e380d966a69d314de1e99ede553a#16bcf0ab) │ │ ├── azure_storage v0.1.0 (https://github.com/Azure/azure-sdk-for-rust.git?rev=16bcf0ab1bb6e380d966a69d314de1e99ede553a#16bcf0ab) │ │ │ └── vector v0.18.0 (/Users/jesse.szwedko/workspace/vector) │ │ │ [dev-dependencies] │ │ │ └── vector v0.18.0 (/Users/jesse.szwedko/workspace/vector) │ │ └── vector v0.18.0 (/Users/jesse.szwedko/workspace/vector) │ │ [dev-dependencies] │ │ └── vector v0.18.0 (/Users/jesse.szwedko/workspace/vector) │ ├── azure_storage v0.1.0 (https://github.com/Azure/azure-sdk-for-rust.git?rev=16bcf0ab1bb6e380d966a69d314de1e99ede553a#16bcf0ab) (*) │ ├── bollard v0.11.0 │ │ └── vector v0.18.0 (/Users/jesse.szwedko/workspace/vector) │ ├── bollard-stubs v1.41.0 │ │ └── bollard v0.11.0 (*) │ ├── bson v2.0.1 │ │ └── mongodb v2.0.1 │ │ └── vector v0.18.0 (/Users/jesse.szwedko/workspace/vector) │ ├── chrono-tz v0.6.0 │ │ └── shared v0.1.0 (/Users/jesse.szwedko/workspace/vector/lib/shared) │ │ ├── enrichment v0.1.0 (/Users/jesse.szwedko/workspace/vector/lib/enrichment) │ │ │ ├── vector v0.18.0 (/Users/jesse.szwedko/workspace/vector) │ │ │ └── vector_core v0.1.0 (/Users/jesse.szwedko/workspace/vector/lib/vector-core) (*) │ │ ├── prometheus-parser v0.1.0 (/Users/jesse.szwedko/workspace/vector/lib/prometheus-parser) │ │ │ └── vector v0.18.0 (/Users/jesse.szwedko/workspace/vector) │ │ ├── vector v0.18.0 (/Users/jesse.szwedko/workspace/vector) │ │ ├── vector_core v0.1.0 (/Users/jesse.szwedko/workspace/vector/lib/vector-core) (*) │ │ ├── vrl v0.1.0 (/Users/jesse.szwedko/workspace/vector/lib/vrl/core) │ │ │ ├── enrichment v0.1.0 (/Users/jesse.szwedko/workspace/vector/lib/enrichment) (*) │ │ │ ├── vector v0.18.0 (/Users/jesse.szwedko/workspace/vector) │ │ │ ├── vector_core v0.1.0 (/Users/jesse.szwedko/workspace/vector/lib/vector-core) (*) │ │ │ ├── vrl-cli v0.1.0 (/Users/jesse.szwedko/workspace/vector/lib/vrl/cli) │ │ │ │ └── vector v0.18.0 (/Users/jesse.szwedko/workspace/vector) │ │ │ └── vrl-stdlib v0.1.0 (/Users/jesse.szwedko/workspace/vector/lib/vrl/stdlib) │ │ │ ├── vector v0.18.0 (/Users/jesse.szwedko/workspace/vector) │ │ │ └── vrl-cli v0.1.0 (/Users/jesse.szwedko/workspace/vector/lib/vrl/cli) (*) │ │ ├── vrl-cli v0.1.0 (/Users/jesse.szwedko/workspace/vector/lib/vrl/cli) (*) │ │ ├── vrl-compiler v0.1.0 (/Users/jesse.szwedko/workspace/vector/lib/vrl/compiler) │ │ │ ├── vector v0.18.0 (/Users/jesse.szwedko/workspace/vector) │ │ │ └── vrl v0.1.0 (/Users/jesse.szwedko/workspace/vector/lib/vrl/core) (*) │ │ └── vrl-stdlib v0.1.0 (/Users/jesse.szwedko/workspace/vector/lib/vrl/stdlib) (*) │ ├── enrichment v0.1.0 (/Users/jesse.szwedko/workspace/vector/lib/enrichment) (*) │ ├── fakedata v0.1.0 (/Users/jesse.szwedko/workspace/vector/lib/fakedata) │ │ └── vector v0.18.0 (/Users/jesse.szwedko/workspace/vector) │ ├── file-source v0.1.0 (/Users/jesse.szwedko/workspace/vector/lib/file-source) │ │ └── vector v0.18.0 (/Users/jesse.szwedko/workspace/vector) │ ├── k8s-openapi v0.13.1 │ │ └── vector v0.18.0 (/Users/jesse.szwedko/workspace/vector) │ ├── mongodb v2.0.1 (*) │ ├── oauth2 v4.1.0 │ │ └── azure_core v0.1.0 (https://github.com/Azure/azure-sdk-for-rust.git?rev=16bcf0ab1bb6e380d966a69d314de1e99ede553a#16bcf0ab) (*) │ ├── postgres-types v0.2.2 │ │ └── tokio-postgres v0.7.3 │ │ ├── postgres-openssl v0.5.0 │ │ │ └── vector v0.18.0 (/Users/jesse.szwedko/workspace/vector) │ │ └── vector v0.18.0 (/Users/jesse.szwedko/workspace/vector) │ ├── pulsar v4.1.1 │ │ └── vector v0.18.0 (/Users/jesse.szwedko/workspace/vector) │ ├── rusoto_credential v0.47.0 │ │ ├── rusoto_core v0.47.0 │ │ │ ├── rusoto_cloudwatch v0.47.0 │ │ │ │ └── vector v0.18.0 (/Users/jesse.szwedko/workspace/vector) │ │ │ ├── rusoto_firehose v0.47.0 │ │ │ │ └── vector v0.18.0 (/Users/jesse.szwedko/workspace/vector) │ │ │ ├── rusoto_kinesis v0.47.0 │ │ │ │ └── vector v0.18.0 (/Users/jesse.szwedko/workspace/vector) │ │ │ ├── rusoto_logs v0.47.0 │ │ │ │ └── vector v0.18.0 (/Users/jesse.szwedko/workspace/vector) │ │ │ ├── rusoto_s3 v0.47.0 │ │ │ │ └── vector v0.18.0 (/Users/jesse.szwedko/workspace/vector) │ │ │ ├── rusoto_sqs v0.47.0 │ │ │ │ └── vector v0.18.0 (/Users/jesse.szwedko/workspace/vector) │ │ │ ├── rusoto_sts v0.47.0 │ │ │ │ └── vector v0.18.0 (/Users/jesse.szwedko/workspace/vector) │ │ │ └── vector v0.18.0 (/Users/jesse.szwedko/workspace/vector) │ │ ├── rusoto_signature v0.47.0 │ │ │ ├── rusoto_core v0.47.0 (*) │ │ │ └── vector v0.18.0 (/Users/jesse.szwedko/workspace/vector) │ │ └── vector v0.18.0 (/Users/jesse.szwedko/workspace/vector) │ ├── rusoto_signature v0.47.0 (*) │ ├── rusoto_sts v0.47.0 (*) │ ├── shared v0.1.0 (/Users/jesse.szwedko/workspace/vector/lib/shared) (*) │ ├── simple_asn1 v0.4.1 │ │ └── jsonwebtoken v7.2.0 │ │ └── gouth v0.2.1 │ │ └── vector v0.18.0 (/Users/jesse.szwedko/workspace/vector) │ ├── syslog_loose v0.15.0 │ │ ├── vector v0.18.0 (/Users/jesse.szwedko/workspace/vector) │ │ └── vrl-stdlib v0.1.0 (/Users/jesse.szwedko/workspace/vector/lib/vrl/stdlib) (*) │ ├── tracing-subscriber v0.2.25 │ │ ├── metrics-tracing-context v0.8.0 │ │ │ ├── vector v0.18.0 (/Users/jesse.szwedko/workspace/vector) │ │ │ └── vector_core v0.1.0 (/Users/jesse.szwedko/workspace/vector/lib/vector-core) (*) │ │ ├── tracing-limit v0.1.0 (/Users/jesse.szwedko/workspace/vector/lib/tracing-limit) │ │ │ └── vector v0.18.0 (/Users/jesse.szwedko/workspace/vector) │ │ ├── vector v0.18.0 (/Users/jesse.szwedko/workspace/vector) │ │ └── vector_core v0.1.0 (/Users/jesse.szwedko/workspace/vector/lib/vector-core) (*) │ ├── vector v0.18.0 (/Users/jesse.szwedko/workspace/vector) │ ├── vector-api-client v0.1.2 (/Users/jesse.szwedko/workspace/vector/lib/vector-api-client) │ │ └── vector v0.18.0 (/Users/jesse.szwedko/workspace/vector) │ ├── vector_core v0.1.0 (/Users/jesse.szwedko/workspace/vector/lib/vector-core) (*) │ ├── vrl-compiler v0.1.0 (/Users/jesse.szwedko/workspace/vector/lib/vrl/compiler) (*) │ └── vrl-stdlib v0.1.0 (/Users/jesse.szwedko/workspace/vector/lib/vrl/stdlib) (*) ├── headers v0.3.4 │ ├── hyper-proxy v0.9.1 │ │ ├── vector v0.18.0 (/Users/jesse.szwedko/workspace/vector) │ │ └── vector_core v0.1.0 (/Users/jesse.szwedko/workspace/vector/lib/vector-core) (*) │ ├── vector v0.18.0 (/Users/jesse.szwedko/workspace/vector) │ └── warp v0.3.1 │ ├── async-graphql-warp v2.10.0 (*) │ └── vector v0.18.0 (/Users/jesse.szwedko/workspace/vector) └── syslog v5.0.0 └── vector v0.18.0 (/Users/jesse.szwedko/workspace/vector) ``` Signed-off-by: Jesse Szwedko --- deny.toml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/deny.toml b/deny.toml index b57a500294b45..ddf524d693a8a 100644 --- a/deny.toml +++ b/deny.toml @@ -42,4 +42,8 @@ ignore = [ # stdweb is unmaintained # https://github.com/timberio/vector/issues/5585 "RUSTSEC-2020-0056", + + # Potential segfault in the time crate + # https://github.com/vectordotdev/vector/issues/9673 + "RUSTSEC-2020-0071", ] From f0e98742076f407100410e08a946bbc963ef4a5e Mon Sep 17 00:00:00 2001 From: Luc Perkins Date: Mon, 18 Oct 2021 12:00:30 -0700 Subject: [PATCH 33/38] fix(external docs): Update CLI docs template (#9676) * Update CLI docs template Signed-off-by: Luc Perkins * Fix CUE error Signed-off-by: Luc Perkins * Note whether args are required Signed-off-by: Luc Perkins --- website/content/en/docs/reference/cli.md | 4 - website/cue/reference/cli.cue | 63 ++++----- website/layouts/shortcodes/cli/commands.html | 132 +++++++++++++++---- 3 files changed, 138 insertions(+), 61 deletions(-) diff --git a/website/content/en/docs/reference/cli.md b/website/content/en/docs/reference/cli.md index 367e08adec50e..a9c13d5768fe3 100644 --- a/website/content/en/docs/reference/cli.md +++ b/website/content/en/docs/reference/cli.md @@ -6,10 +6,6 @@ weight: 4 Vector is a single binary that be managed through a command line interface. This page documents all of the available flags, options, subcommands, and environment variables. -```shell -vector [FLAGS] [OPTIONS] [SUBCOMMAND] [ARGS] -``` - ## Commands {{< cli/commands >}} diff --git a/website/cue/reference/cli.cue b/website/cue/reference/cli.cue index f870ec662d019..195b793990348 100644 --- a/website/cue/reference/cli.cue +++ b/website/cue/reference/cli.cue @@ -5,7 +5,7 @@ package metadata _default_flags: { "help": { _short: "h" - description: "Prints help information" + description: "Prints help information " } "version": { _short: "V" @@ -17,14 +17,11 @@ _config_options: { "config": { _short: "c" description: """ - Read configuration from one or more files. Wildcard paths are - supported. If zero files are specified the default config path - `/etc/vector/vector.toml` will be targeted. - TOML, YAML and JSON file formats are supported. - The format to interpret the file with is determined from - the file extension (.toml, .yaml, .json). - We will fallback to TOML if we are unable to detect - a supported format. + Read configuration from one or more files. Wildcard paths are supported. If no files are + specified the default config path `/etc/vector/vector.toml` is targeted. TOML, YAML and + JSON file formats are supported. The format to interpret the file with is determined from + the file extension (`.toml`, `.yaml`, `.json`). Vector falls back to TOML if it can't + detect a supported format. """ type: "string" default: "/etc/vector/vector.toml" @@ -32,33 +29,33 @@ _config_options: { } "config-dir": { description: """ - Read configuration from files in one or more directories. File - format is detected from the file name. Files not ending in .toml, - .json, .yaml, or .yml will be ignored. + Read configuration from files in one or more directories. The file format is detected + from the file name. Files not ending in `.toml`, `.json`, `.yaml`, or `.yml` are + ignored. """ type: "string" env_var: "VECTOR_CONFIG_DIR" } "config-toml": { description: """ - Read configuration from one or more files. Wildcard paths are - supported. TOML file format is assumed. + Read configuration from one or more files. Wildcard paths are supported. TOML file + format is assumed. """ type: "string" env_var: "VECTOR_CONFIG_TOML" } "config-json": { description: """ - Read configuration from one or more files. Wildcard paths are - supported. JSON file format is assumed. + Read configuration from one or more files. Wildcard paths are supported. JSON file + format is assumed. """ type: "string" env_var: "VECTOR_CONFIG_JSON" } "config-yaml": { description: """ - Read configuration from one or more files. Wildcard paths are - supported. YAML file format is assumed. + Read configuration from one or more files. Wildcard paths are supported. YAML file + format is assumed. """ type: "string" env_var: "VECTOR_CONFIG_YAML" @@ -68,6 +65,7 @@ _config_options: { cli: { #Args: [Arg=string]: { description: !="" + required: bool | *false name: Arg type: #ArgType default?: string | [...string] @@ -78,6 +76,7 @@ cli: { #Commands: [Command=string]: { description: !="" name: Command + example?: string flags?: #Flags options?: #Options args?: #Args @@ -140,8 +139,7 @@ cli: { "quiet": { _short: "q" description: """ - Reduce detail of internal logging. Repeat to reduce further. Overrides - `--verbose` + Reduce detail of internal logging. Repeat to reduce further. Overrides `--verbose`. """ } "require-healthy": { @@ -151,7 +149,7 @@ cli: { } "verbose": { _short: "v" - description: "Enable more detailed logging. Repeat to reduce further. Overrides `--verbose`" + description: "Enable more detailed logging. Repeat to reduce further. Overrides `--verbose`." } "watch-config": { _short: "w" @@ -165,23 +163,22 @@ cli: { description: "Control when ANSI terminal formatting is used." default: "auto" enum: { - always: "Enable ANSI terminal formatting always." - auto: "Detect ANSI terminal formatting and enable if supported." - never: "Disable ANSI terminal formatting." + always: "Always enable ANSI terminal formatting always" + auto: "Detect ANSI terminal formatting and enable if supported" + never: "Disable ANSI terminal formatting" } env_var: "VECTOR_COLOR" } "threads": { _short: "t" description: """ - Number of threads to use for processing (default is number of - available cores) + The number of threads to use for processing (the default is the number of available cores) """ type: "integer" env_var: "VECTOR_THREADS" } "log-format": { - description: "Set the logging format [default: text]" + description: "Set the logging format" default: "text" enum: { json: "Output Vector's logs as JSON." @@ -194,18 +191,14 @@ cli: { commands: { "graph": { description: """ - Generate a visual representation of topologies. The output is in the [DOT format](\(urls.dot_format)) + Generate a visual representation of topologies. The output is in the [DOT format](\(urls.dot_format)), which can be rendered using [GraphViz](\(urls.graphviz)). - Example: - - ```shell - vector graph --config /etc/vector/vector.toml | dot -Tsvg > graph.svg - ``` - You can also visualize the output online at [webgraphviz.com](http://www.webgraphviz.com/). """ + example: "vector graph --config /etc/vector/vector.toml | dot -Tsvg > graph.svg" + options: _config_options } "generate": { @@ -341,7 +334,7 @@ cli: { components: { type: "list" description: """ - Components to observe (comma-separated; accepts glob patterns). + Components to observe (comma-separated; accepts glob patterns). """ default: "*" } diff --git a/website/layouts/shortcodes/cli/commands.html b/website/layouts/shortcodes/cli/commands.html index 346bb31c83469..11f877df01199 100644 --- a/website/layouts/shortcodes/cli/commands.html +++ b/website/layouts/shortcodes/cli/commands.html @@ -6,8 +6,6 @@

{{ $cli.name }}

- - {{ partial "badge.html" (dict "word" "root" "color" "blue") }}
@@ -15,12 +13,16 @@

The root command (no subcommand) starts Vector.

+
+ {{ highlight "vector [FLAGS] [OPTIONS] [SUBCOMMAND] [ARGS]" "shell" "" }} +
+
- {{ template "table" (dict "title" "Flags" "items" $cli.flags "cmd" "vector") }} + {{ template "options-table" (dict "title" "Flags" "items" $cli.flags "cmd" "vector") }}
- {{ template "table" (dict "title" "Options" "items" $cli.options "cmd" "vector") }} + {{ template "options-table" (dict "title" "Options" "items" $cli.options "cmd" "vector") }}

@@ -29,33 +31,119 @@

{{ range $k, $v := $cli.commands }}
-

- {{ $k }} -

+ {{ partial "heading.html" (dict "text" (printf "vector %s" $k) "level" 3 "mono" true "id" $k) }}
- {{ with $v.description }} -
- {{ . | markdownify }} -
- {{ end }} +
+ {{ with $v.description }} +
+ {{ . | markdownify }} +
+ {{ end }} - {{ with $v.flags }} -
- {{ template "table" (dict "title" "Flags" "items" . "cmd" $k) }} -
- {{ end }} +
+ {{ template "usage" $v }} +
- {{ with $v.options }} -
- {{ template "table" (dict "title" "Options" "items" . "cmd" $k) }} + {{ with $v.example }} +
+ + {{ partial "heading.html" (dict "text" "Example" "level" 4 "toc_hide" true) }} + + +
+ {{ highlight . "shell" "" }} +
+
+ {{ end }} + + {{ with $v.flags }} +
+ {{ template "options-table" (dict "title" "Flags" "items" . "cmd" $k) }} +
+ {{ end }} + + {{ with $v.options }} +
+ {{ template "options-table" (dict "title" "Options" "items" . "cmd" $k) }} +
+ {{ end }} + + {{ with $v.args }} +
+ {{ template "args-table" (dict "title" "Arguments" "items" . "cmd" $k) }} +
+ {{ end }}
- {{ end }} +
{{ end }}
-{{ define "table" }} +{{ define "usage" }} +{{ $s := printf "vector %s" .name }} +{{ if .flags }} +{{ $s = printf "%s [FLAGS]" $s }} +{{ end }} +{{ if .options }} +{{ $s = printf "%s [OPTIONS]" $s }} +{{ end }} +{{ if .args }} +{{ $s = printf "%s [ARGUMENTS]" $s }} +{{ end }} +{{ highlight $s "shell" "" }} +{{ end }} + +{{ define "args-table" }} + + + + + + + + + + + + {{ range $k, $v := .items }} + + + + + + + + {{ end }} + +
+ {{ .title }} + + Required + + Description + + Type + + Default +
+ {{ $k }} + + {{ $v.required }} + + {{ $v.description | markdownify }} + + {{ $v.type }} + + {{ with $v.default }} + {{ . }} + {{ else }} + none + {{ end }} +
+{{ end }} + +{{ define "options-table" }} {{ $cmd := .cmd }} From 71b3d6a48a58dec4c39f7a2f4a3918d606fa387e Mon Sep 17 00:00:00 2001 From: Luc Perkins Date: Mon, 18 Oct 2021 12:30:32 -0700 Subject: [PATCH 34/38] fix(external docs): Upgrade download template (#9656) * Fix discrepancies between nightly/stable Signed-off-by: Luc Perkins * Streamline download template Signed-off-by: Luc Perkins * Remove unnecessary Alpine storage functions Signed-off-by: Luc Perkins * Enable routing scheme for download page Signed-off-by: Luc Perkins * Streamline JS routing logic Signed-off-by: Luc Perkins * Remove hash logic Signed-off-by: Luc Perkins --- website/assets/js/app.js | 19 +- website/layouts/download/section.html | 183 +++++++++--------- .../partials/download/release-toggle.html | 25 --- .../partials/download/version-selector.html | 20 +- 4 files changed, 102 insertions(+), 145 deletions(-) delete mode 100644 website/layouts/partials/download/release-toggle.html diff --git a/website/assets/js/app.js b/website/assets/js/app.js index 06ce167de9c22..d4bb1c3f834cf 100644 --- a/website/assets/js/app.js +++ b/website/assets/js/app.js @@ -77,6 +77,9 @@ const manageState = () => { this.versionBackup = v; } }, + isVersion(v) { + return this.version === v; + }, notLatest() { return this.version != '{{ $latest }}'; }, @@ -87,16 +90,6 @@ const manageState = () => { toggleDarkMode() { this.dark = !this.dark; }, - // Toggle between stable and nightly - toggleRelease() { - if (this.release === 'stable') { - this.release = 'nightly'; - this.setVersion('nightly'); - } else if (this.release === 'nightly') { - this.release = 'stable'; - this.setVersion(this.versionBackup); - } - }, // Switch the banner on and off toggleBanner() { this.banner = !this.banner; @@ -105,12 +98,6 @@ const manageState = () => { isNightly() { return this.release === 'nightly'; }, - isStable() { - return this.release === 'stable'; - }, - isCurrent(version) { - return this.version === version; - }, }, useLocalStorage); } diff --git a/website/layouts/download/section.html b/website/layouts/download/section.html index 6021671f73104..17a6f8c86fe33 100644 --- a/website/layouts/download/section.html +++ b/website/layouts/download/section.html @@ -1,5 +1,3 @@ -{{/* TODO: find a way to generate less HTML for displaying the download info */}} - {{ define "title" }} Download | {{ site.Title }} {{ end }} @@ -11,117 +9,112 @@
{{ partial "hero.html" . }} +
-
-
- {{ partial "download/release-toggle.html" . }} - - {{ partial "download/version-selector.html" . }} +
+
+ {{ partial "download/version-selector.html" . }} - {{ template "latest-toggler" }} -
+ {{ template "latest-toggler" }}
+
- {{/* Stable releases */}} +
{{ range $k, $v := $releases }} -
-
- - Date {{ $v.date }} - - - - License MPL 2.0 - - - - {{ $k }} release notes - -
+ {{ template "release-header" (dict "version" $k "info" $v) }} + {{ end }} -
-
-
-
- {{ partial "heading.html" (dict "text" "Downloads" "level" 2) }} -
- -
- {{ partial "download/download-matrix.html" (dict "version" $k) }} -
-
- -
-
- {{ partial "heading.html" (dict "text" "Other methods" "level" 2) }} -
- -
- {{ partial "download/links.html" . }} -
-
-
-
-
+ {{ template "release-header" (dict "version" "nightly") }} +
+ +
+ {{ range $k, $v := $releases }} + {{ template "release-body" (dict "version" $k "info" $v) }} {{ end }} - {{/* Nightly release */}} -
-
- - Date - + {{ template "release-body" (dict "version" "nightly") }} +
+
+{{ end }} - - License MPL 2.0 - -
+{{ define "release-header" }} +{{ $v := .version }} +{{ $isNightly := eq $v "nightly" }} +
+
+ + Date {{ if $isNightly }} + + {{ else }} + {{ .info.date }} + {{ end }} + + + + License MPL 2.0 + + + {{ if not $isNightly }} + + {{ $v }} release notes + + {{ end }} +
+
+{{ end }} + +{{ define "release-body" }} +{{ $v := .version }} +
+
+
+
+
+ {{ partial "heading.html" (dict "text" "Downloads" "level" 2) }} +
-
-
-
- {{ template "download-heading" (dict "text" "Download") }} - -
- {{ partial "download/download-matrix.html" (dict "version" "nightly") }} -
- -
- {{/* Heroicon: outline/exclamation */}} - - - - - - - - Nightly versions contain bleeding-edge changes that may contain bugs. Proceed with caution. - -
-
- -
- {{ template "download-heading" (dict "text" "Other methods") }} - -
- {{ partial "download/links.html" . }} -
-
+
+ {{ partial "download/download-matrix.html" (dict "version" $v) }}
+ + {{ template "nightly-warning" }}
+ + {{ template "methods" }}
{{ end }} -{{ define "download-heading" }} -

- {{ .text }} -

-{{ end }} - {{ define "latest-toggler" }} - +{{ end }} + +{{ define "methods" }} +
+
+ {{ partial "heading.html" (dict "text" "Other methods" "level" 2) }} +
+ +
+ {{ partial "download/links.html" . }} +
+
+{{ end }} + +{{ define "nightly-warning" }} +
+ {{/* Heroicon: outline/exclamation */}} + + + + + + + + Nightly versions contain bleeding-edge changes that may contain bugs. Proceed with caution. + +
{{ end }} \ No newline at end of file diff --git a/website/layouts/partials/download/release-toggle.html b/website/layouts/partials/download/release-toggle.html deleted file mode 100644 index 4aab7291536e9..0000000000000 --- a/website/layouts/partials/download/release-toggle.html +++ /dev/null @@ -1,25 +0,0 @@ -
- - - - - {{/* Heroicon: outline/shield-check */}} - - - - - {{/* Heroicon: outline/moon */}} - - - -
\ No newline at end of file diff --git a/website/layouts/partials/download/version-selector.html b/website/layouts/partials/download/version-selector.html index daf46a674a538..30ff30aa429aa 100644 --- a/website/layouts/partials/download/version-selector.html +++ b/website/layouts/partials/download/version-selector.html @@ -3,10 +3,13 @@ {{ $latest := index $versions 0 }}
- {{ if $isLatest }} From cfd66ce9949b519e60e49d385459daccbce83caf Mon Sep 17 00:00:00 2001 From: Bruce Guenter Date: Mon, 18 Oct 2021 16:57:30 -0600 Subject: [PATCH 35/38] enhancement(file sink, observability): Add event processing metrics (#9589) Signed-off-by: Bruce Guenter --- src/internal_events/file.rs | 23 +++++++++++- src/sinks/file/mod.rs | 36 ++++++++++++++----- src/test_util/components.rs | 3 ++ .../cue/reference/components/sinks/file.cue | 7 ++-- 4 files changed, 58 insertions(+), 11 deletions(-) diff --git a/src/internal_events/file.rs b/src/internal_events/file.rs index 9668279f52c8f..13226cfb02725 100644 --- a/src/internal_events/file.rs +++ b/src/internal_events/file.rs @@ -1,4 +1,5 @@ -use metrics::gauge; +use metrics::{counter, gauge}; +use std::borrow::Cow; use vector_core::internal_event::InternalEvent; #[cfg(any(feature = "sources-file", feature = "sources-kubernetes_logs"))] @@ -15,6 +16,26 @@ impl InternalEvent for FileOpen { } } +#[derive(Debug)] +pub struct FileBytesSent<'a> { + pub byte_size: usize, + pub file: Cow<'a, str>, +} + +impl InternalEvent for FileBytesSent<'_> { + fn emit_logs(&self) { + trace!(message = "Bytes sent.", byte_size = %self.byte_size, protocol = "file", file = %self.file); + } + + fn emit_metrics(&self) { + counter!( + "component_sent_bytes_total", self.byte_size as u64, + "protocol" => "file", + "file" => self.file.clone().into_owned(), + ); + } +} + #[cfg(any(feature = "sources-file", feature = "sources-kubernetes_logs"))] mod source { use super::{FileOpen, InternalEvent}; diff --git a/src/sinks/file/mod.rs b/src/sinks/file/mod.rs index 238453179394e..255a644b028db 100644 --- a/src/sinks/file/mod.rs +++ b/src/sinks/file/mod.rs @@ -3,8 +3,7 @@ use crate::{ buffers::Acker, config::{log_schema, DataType, GenerateConfig, SinkConfig, SinkContext, SinkDescription}, event::Event, - internal_events::FileOpen, - internal_events::TemplateRenderingFailed, + internal_events::{EventsSent, FileBytesSent, FileOpen, TemplateRenderingFailed}, sinks::util::{ encoding::{EncodingConfig, EncodingConfiguration}, StreamSink, @@ -21,6 +20,7 @@ use futures::{ }; use serde::{Deserialize, Serialize}; use std::time::{Duration, Instant}; +use vector_core::ByteSizeOf; use tokio::{ fs::{self, File}, @@ -290,8 +290,19 @@ impl FileSink { }; trace!(message = "Writing an event to file.", path = ?path); - if let Err(error) = write_event_to_file(file, event, &self.encoding).await { - error!(message = "Failed to write file.", path = ?path, %error); + let event_size = event.size_of(); + match write_event_to_file(file, event, &self.encoding).await { + Ok(byte_size) => { + emit!(&EventsSent { + count: 1, + byte_size: event_size, + }); + emit!(&FileBytesSent { + byte_size, + file: String::from_utf8_lossy(&path), + }); + } + Err(error) => error!(message = "Failed to write file.", path = ?path, %error), } } } @@ -328,10 +339,10 @@ async fn write_event_to_file( file: &mut OutFile, event: Event, encoding: &EncodingConfig, -) -> Result<(), std::io::Error> { +) -> Result { let mut buf = encode_event(encoding, event); buf.push(b'\n'); - file.write_all(&buf[..]).await + file.write_all(&buf[..]).await.map(|()| buf.len()) } #[async_trait] @@ -348,10 +359,12 @@ impl StreamSink for FileSink { mod tests { use super::*; use crate::test_util::{ + components::{self, FILE_SINK_TAGS, SINK_TESTS}, lines_from_file, lines_from_gzip_file, random_events_with_stream, random_lines_with_stream, temp_dir, temp_file, trace_init, }; use futures::{stream, SinkExt}; + use pretty_assertions::assert_eq; use std::convert::TryInto; #[test] @@ -361,6 +374,7 @@ mod tests { #[tokio::test] async fn single_partition() { + components::init(); trace_init(); let template = temp_file(); @@ -377,6 +391,7 @@ mod tests { let events = Box::pin(stream::iter(input.clone().into_iter().map(Event::from))); sink.run(events).await.unwrap(); + SINK_TESTS.assert(&FILE_SINK_TAGS); let output = lines_from_file(template); for (input, output) in input.into_iter().zip(output) { @@ -386,6 +401,7 @@ mod tests { #[tokio::test] async fn single_partition_gzip() { + components::init(); trace_init(); let template = temp_file(); @@ -402,6 +418,7 @@ mod tests { let events = Box::pin(stream::iter(input.clone().into_iter().map(Event::from))); sink.run(events).await.unwrap(); + SINK_TESTS.assert(&FILE_SINK_TAGS); let output = lines_from_gzip_file(template); for (input, output) in input.into_iter().zip(output) { @@ -411,6 +428,7 @@ mod tests { #[tokio::test] async fn many_partitions() { + components::init(); trace_init(); let directory = temp_dir(); @@ -449,6 +467,7 @@ mod tests { let events = Box::pin(stream::iter(input.clone().into_iter())); sink.run(events).await.unwrap(); + SINK_TESTS.assert(&FILE_SINK_TAGS); let output = vec![ lines_from_file(&directory.join("warnings-2019-26-07.log")), @@ -495,8 +514,7 @@ mod tests { #[tokio::test] async fn reopening() { - use pretty_assertions::assert_eq; - + components::init(); trace_init(); let template = temp_file(); @@ -534,5 +552,7 @@ mod tests { // make sure we appended instead of overwriting let output = lines_from_file(template); assert_eq!(input, output); + + SINK_TESTS.assert(&FILE_SINK_TAGS); } } diff --git a/src/test_util/components.rs b/src/test_util/components.rs index e2d683f791b1b..8ec4f8ee4dede 100644 --- a/src/test_util/components.rs +++ b/src/test_util/components.rs @@ -28,6 +28,9 @@ pub const HTTP_PUSH_SOURCE_TAGS: [&str; 2] = ["http_path", "protocol"]; /// The standard set of tags for all `TcpSource`-based sources. pub const TCP_SOURCE_TAGS: [&str; 2] = ["peer_addr", "protocol"]; +/// The standard set of tags for all sinks that write a file. +pub const FILE_SINK_TAGS: [&str; 2] = ["file", "protocol"]; + /// The standard set of tags for all `HttpSink`-based sinks. pub const HTTP_SINK_TAGS: [&str; 2] = ["endpoint", "protocol"]; diff --git a/website/cue/reference/components/sinks/file.cue b/website/cue/reference/components/sinks/file.cue index fc26c4bb5d9c3..94859bb113dce 100644 --- a/website/cue/reference/components/sinks/file.cue +++ b/website/cue/reference/components/sinks/file.cue @@ -91,7 +91,10 @@ components: sinks: file: { } telemetry: metrics: { - events_discarded_total: components.sources.internal_metrics.output.metrics.events_discarded_total - processing_errors_total: components.sources.internal_metrics.output.metrics.processing_errors_total + component_sent_bytes_total: components.sources.internal_metrics.output.metrics.component_sent_bytes_total + component_sent_events_total: components.sources.internal_metrics.output.metrics.component_sent_events_total + component_sent_event_bytes_total: components.sources.internal_metrics.output.metrics.component_sent_event_bytes_total + events_discarded_total: components.sources.internal_metrics.output.metrics.events_discarded_total + processing_errors_total: components.sources.internal_metrics.output.metrics.processing_errors_total } } From 8c6d364d93b3e97a6ff5e54911809829a4737690 Mon Sep 17 00:00:00 2001 From: Jesse Szwedko Date: Mon, 18 Oct 2021 18:51:39 -0500 Subject: [PATCH 36/38] chore(deps): Ignore related RUSTSEC-2020-0159 for now (#9686) Signed-off-by: Jesse Szwedko --- deny.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/deny.toml b/deny.toml index ddf524d693a8a..677f8231d5504 100644 --- a/deny.toml +++ b/deny.toml @@ -46,4 +46,5 @@ ignore = [ # Potential segfault in the time crate # https://github.com/vectordotdev/vector/issues/9673 "RUSTSEC-2020-0071", + "RUSTSEC-2020-0159" ] From b100f786f6d0c625a3985ef70b649972c50eba04 Mon Sep 17 00:00:00 2001 From: Bruce Guenter Date: Mon, 18 Oct 2021 18:10:04 -0600 Subject: [PATCH 37/38] fix(file sink): Fix compilation of tests (#9684) Signed-off-by: Bruce Guenter --- src/sinks/file/mod.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/sinks/file/mod.rs b/src/sinks/file/mod.rs index 255a644b028db..dc6a2c9a6dee7 100644 --- a/src/sinks/file/mod.rs +++ b/src/sinks/file/mod.rs @@ -374,7 +374,7 @@ mod tests { #[tokio::test] async fn single_partition() { - components::init(); + components::init_test(); trace_init(); let template = temp_file(); @@ -401,7 +401,7 @@ mod tests { #[tokio::test] async fn single_partition_gzip() { - components::init(); + components::init_test(); trace_init(); let template = temp_file(); @@ -428,7 +428,7 @@ mod tests { #[tokio::test] async fn many_partitions() { - components::init(); + components::init_test(); trace_init(); let directory = temp_dir(); @@ -514,7 +514,7 @@ mod tests { #[tokio::test] async fn reopening() { - components::init(); + components::init_test(); trace_init(); let template = temp_file(); From fcb8c798af45fa3a28e10c68945fe44d6b5a989b Mon Sep 17 00:00:00 2001 From: Luc Perkins Date: Mon, 18 Oct 2021 17:15:44 -0700 Subject: [PATCH 38/38] Add missing URL and reduce line length (#9689) Signed-off-by: Luc Perkins --- .../content/en/docs/reference/vrl/errors.md | 28 +++++++++++++------ 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/website/content/en/docs/reference/vrl/errors.md b/website/content/en/docs/reference/vrl/errors.md index 8eeb14284e5f4..b6974f7bb9413 100644 --- a/website/content/en/docs/reference/vrl/errors.md +++ b/website/content/en/docs/reference/vrl/errors.md @@ -4,7 +4,9 @@ short: Errors weight: 2 --- -VRL is a [fail-safe][fail_safety] language, which means that a VRL program doesn't compile unless every potential error is handled. Observability data is notoriously unpredictable and fail safety ensures that your VRL programs elegantly handle malformed data. +VRL is a [fail-safe][fail_safety] language, which means that a VRL program doesn't compile unless +every potential error is handled. Observability data is notoriously unpredictable and fail safety +ensures that your VRL programs elegantly handle malformed data. ## Compile-time errors @@ -12,7 +14,9 @@ VRL is a [fail-safe][fail_safety] language, which means that a VRL program doesn ## Runtime errors -A runtime error occurs after compilation and during program runtime. Because VRL is fail safe, runtime error must be [handled](#handling). This forces you to address how VRL programs should respond to errors. +A runtime error occurs after compilation and during program runtime. Because VRL is fail safe, all +runtime errors must be [handled](#handling). This forces you to address how VRL programs should +respond to errors. Runtime errors are strings that describe the error. @@ -26,7 +30,8 @@ You have three options for handling errors in VRL: #### Assigning -As documented in the [assignment expression reference], you can **assign** errors when invoking an expression that's fallible. When assigned, runtime errors are simple strings: +As documented in the [assignment expression reference][assign], you can **assign** errors when +invoking an expression that's fallible. When assigned, runtime errors are simple strings: ```coffee structured, err = parse_json("not json") @@ -50,9 +55,12 @@ if err == null { } ``` -The above example compiles because `foo` will either be assigned the integer representation of `.foo` if it can be coerced to an integer, or it will be set to the "empty integer value" `0` if `.foo` can't be coerced into an integer. +The above example compiles because `foo` will either be assigned the integer representation of +`.foo` if it can be coerced to an integer, or it will be set to the "empty integer value" `0` if +`.foo` can't be coerced into an integer. -Because of this, it is important to always check if `err` is null before using the `ok` value of an infallible assignment. +Because of this, it is important to always check whether `err` is null before using the `ok` value +of an infallible assignment. ##### Empty values @@ -70,7 +78,8 @@ Null | `null` #### Coalescing -As documented in the [coalesce expression reference][coalesce], you can **coalesce** errors to efficiently step through multiple expressions: +As documented in the [coalesce expression reference][coalesce], you can **coalesce** errors to +efficiently step through multiple expressions: ```coffee structured = parse_json("not json") ?? parse_syslog("not syslog") ?? {} @@ -79,7 +88,8 @@ structured = parse_json("not json") ?? parse_syslog("not syslog") ?? {} #### Raising -As documented in the [function call reference][call], you can **raise** errors to immediately abort the program by adding a `!` to the end of the function name: +As documented in the [function call reference][call], you can **raise** errors to immediately abort +the program by adding a `!` to the end of the function name: ```coffee structured = parse_json!("not json") @@ -87,7 +97,9 @@ structured = parse_json!("not json") ``` {{< warning title="Raising errors should be used with caution" >}} -While raising errors can simplfy your program, you should think carefully before aborting your program. If this operation is critical to the structure of your data you should abort, otherwise consider handling the error and proceeding with the rest of your program. +While raising errors can simplfy your program, you should think carefully before aborting your +program. If this operation is critical to the structure of your data you should abort, otherwise +consider handling the error and proceeding with the rest of your program. {{< /warning >}} [assign]: /docs/reference/vrl/expressions/#assignment