Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

enhancement(remap): Add join function #6313

Merged
merged 26 commits into from
Feb 5, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 2 additions & 0 deletions lib/remap-functions/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ default = [
"ip_to_ipv6",
"ipv6_to_ipv4",
"is_nullish",
"join",
"length",
"log",
"match",
Expand Down Expand Up @@ -131,6 +132,7 @@ ip_subnet = ["lazy_static", "regex"]
ip_to_ipv6 = []
ipv6_to_ipv4 = []
is_nullish = []
join = []
length = []
log = ["tracing"]
match = ["regex"]
Expand Down
174 changes: 174 additions & 0 deletions lib/remap-functions/src/join.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,174 @@
use remap::prelude::*;
use std::borrow::Cow;

#[derive(Clone, Copy, Debug)]
pub struct Join;

impl Function for Join {
fn identifier(&self) -> &'static str {
"join"
}

fn parameters(&self) -> &'static [Parameter] {
&[
Parameter {
keyword: "value",
accepts: |v| matches!(v, Value::Array(_)),
required: true,
},
Parameter {
keyword: "separator",
accepts: |v| matches!(v, Value::Bytes(_)),
required: false,
},
]
}

fn compile(&self, mut arguments: ArgumentList) -> Result<Box<dyn Expression>> {
let value = arguments.required("value")?.boxed();
let separator = arguments.optional("separator").map(Expr::boxed);

Ok(Box::new(JoinFn { value, separator }))
}
}

#[derive(Clone, Debug)]
struct JoinFn {
value: Box<dyn Expression>,
separator: Option<Box<dyn Expression>>,
}

impl Expression for JoinFn {
fn execute(&self, state: &mut state::Program, object: &mut dyn Object) -> Result<Value> {
let array = self.value.execute(state, object)?.try_array()?;

let string_vec = array
.iter()
.map(|s| s.try_bytes_utf8_lossy().map_err(Into::into))
.collect::<Result<Vec<Cow<'_, str>>>>()
.map_err(|_| "all array items must be strings")?;

let separator: String = self
.separator
.as_ref()
.map(|s| {
s.execute(state, object)
.and_then(|v| Value::try_bytes(v).map_err(Into::into))
lucperkins marked this conversation as resolved.
Show resolved Hide resolved
})
.transpose()?
.map(|s| String::from_utf8_lossy(&s).to_string())
.unwrap_or_else(|| "".into());
lucperkins marked this conversation as resolved.
Show resolved Hide resolved

let joined = string_vec.join(&separator);

Ok(Value::from(joined))
}

fn type_def(&self, state: &state::Compiler) -> TypeDef {
use value::Kind;

let separator_type = self
.separator
.as_ref()
.map(|separator| separator.type_def(state).fallible_unless(Kind::Bytes));

self.value
.type_def(state)
.fallible_unless(Kind::Array)
.merge_optional(separator_type)
.fallible_unless_array_has_inner_type(Kind::Bytes)
.with_constraint(Kind::Bytes)
}
}

#[cfg(test)]
mod test {
use super::*;
use value::Kind;

test_type_def![
value_string_array_infallible {
expr: |_| JoinFn {
value: array!["one", "two", "three"].boxed(),
separator: Some(lit!(", ").boxed()),
},
def: TypeDef {
fallible: false,
kind: Kind::Bytes,
..Default::default()
},
}

value_mixed_array_fallible {
expr: |_| JoinFn {
value: array!["one", 1].boxed(),
separator: Some(lit!(", ").boxed()),
},
def: TypeDef {
fallible: true,
kind: Kind::Bytes,
..Default::default()
},
}

value_literal_fallible {
expr: |_| JoinFn {
value: lit!(427).boxed(),
separator: None,
},
def: TypeDef {
fallible: true,
kind: Kind::Bytes,
..Default::default()
},
}

separator_integer_fallible {
expr: |_| JoinFn {
value: array!["one", "two", "three"].boxed(),
separator: Some(lit!(427).boxed()),
},
def: TypeDef {
fallible: true,
kind: Kind::Bytes,
..Default::default()
},
}

both_types_wrong_fallible {
expr: |_| JoinFn {
value: lit!(true).boxed(),
separator: Some(lit!(427).boxed()),
},
def: TypeDef {
fallible: true,
kind: Kind::Bytes,
..Default::default()
},
}
];

test_function![
join => Join;

with_comma_separator {
args: func_args![value: array!["one", "two", "three"], separator: lit!(", ")],
want: Ok(value!("one, two, three")),
}

with_space_separator {
args: func_args![value: array!["one", "two", "three"], separator: lit!(" ")],
want: Ok(value!("one two three")),
}

without_separator {
args: func_args![value: array!["one", "two", "three"]],
want: Ok(value!("onetwothree")),
}

non_string_array_item_throws_error {
args: func_args![value: array!["one", "two", 3]],
want: Err("function call error: all array items must be strings"),
}
];
}
6 changes: 6 additions & 0 deletions lib/remap-functions/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,8 @@ mod ip_to_ipv6;
mod ipv6_to_ipv4;
#[cfg(feature = "is_nullish")]
mod is_nullish;
#[cfg(feature = "join")]
mod join;
#[cfg(feature = "length")]
mod length;
#[cfg(feature = "log")]
Expand Down Expand Up @@ -189,6 +191,8 @@ pub use ip_to_ipv6::IpToIpv6;
pub use ipv6_to_ipv4::Ipv6ToIpV4;
#[cfg(feature = "is_nullish")]
pub use is_nullish::IsNullish;
#[cfg(feature = "join")]
pub use join::Join;
#[cfg(feature = "length")]
pub use length::Length;
#[cfg(feature = "log")]
Expand Down Expand Up @@ -326,6 +330,8 @@ pub fn all() -> Vec<Box<dyn remap::Function>> {
Box::new(Ipv6ToIpV4),
#[cfg(feature = "is_nullish")]
Box::new(IsNullish),
#[cfg(feature = "join")]
Box::new(Join),
#[cfg(feature = "length")]
Box::new(Length),
#[cfg(feature = "log")]
Expand Down
73 changes: 73 additions & 0 deletions lib/remap-lang/src/type_def.rs
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,20 @@ impl TypeDef {
self
}

/// Applies a type constraint to the items in an array. If you need all items in the array to
/// be integers, for example, set `Kind::Integer`; if items can be either integers or Booleans,
/// set `Kind::Integer | Kind::Boolean`; and so on.
pub fn fallible_unless_array_has_inner_type(mut self, kind: impl Into<value::Kind>) -> Self {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍 to this constraint, but the naming / implementation feels a bit awkward to me. This might be due to the fact that inner_type is a bit awkward right now in-general, but, based on the current naming, I'm wondering if this function shouldn't be setting self.fallible = true if a non-array type is passed into it.

Another suggestion would be to call this fallible_unless_inner_type(...) and implement it such that it could accept any of our types that has an inner type.

In my dream world, I think maybe it'd make sense to have Kind::Array itself be an enum type so that you could do things like: Kind::Array(Kind::Integer | Kind::Boolean) to represent an array of integers/booleans, but that would be a more substantive refactoring.

Copy link
Contributor Author

@lucperkins lucperkins Feb 3, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@jszwedko Good call on that. I've updated the behavior to set fallible to true for cases when there's no inner type (plus a test for that 😎). I do like your suggestion about the desired end state but I'll leave that to one of our Remap Hero(in)es.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This function could be made much more generic. Some of the different rules we could want:

  • a map that has a certain field of a certain type
  • an array of maps that has a certain field of a certain type
  • either an array of strings or an array of maps that have a certain field of a certain type (say, either ["a", "b", "c"] or [{message: "a"}, {message: "b"}, {message: "c"}]

I think we should wait to see if we actually ever need that level of flexibility before tackling it because it would take a fair bit of thought to get right. So for now I'd say it's good enough to have a fairly specific function that does what we need, but no more. It does result in the awkward name, but I think that reflects the specific nature of where we are up to so far!

In my dream world.

I share this vision! The issue here is that Kind is a bit flag, so Kind::Array | Kind::String is still a Kind. Which is nice. If we make Kind:Array an an enum type, it could no longer be a bit flag. We would then need to distinguish between primitive Kinds and complex Kinds.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I share this vision! The issue here is that Kind is a bit flag, so Kind::Array | Kind::String is still a Kind. Which is nice. If we make Kind:Array an an enum type, it could no longer be a bit flag. We would then need to distinguish between primitive Kinds and complex Kinds.

Agreed, that seems to be the distinction here 👍

match &self.inner_type_def {
Some(InnerTypeDef::Array(inner_kind)) if kind.into() == inner_kind.kind => (),
_ => {
self.fallible = true;
}
}

self
}

pub fn merge(self, other: Self) -> Self {
self | other
}
Expand Down Expand Up @@ -412,4 +426,63 @@ mod tests {

assert_eq!(expected, type_def_a | type_def_b);
}

#[test]
fn array_inner_type() {
// All items are strings + all must be strings -> infallible
let non_mixed_array = TypeDef {
inner_type_def: Some(inner_type_def!([Kind::Bytes])),
..Default::default()
}
.fallible_unless_array_has_inner_type(Kind::Bytes);

assert!(!non_mixed_array.is_fallible());

// Items are strings or Booleans + all must be strings -> fallible
let mixed_array_mismatched = TypeDef {
inner_type_def: Some(inner_type_def!([Kind::Bytes | Kind::Boolean])),
..Default::default()
}
.fallible_unless_array_has_inner_type(Kind::Bytes);

assert!(mixed_array_mismatched.is_fallible());

// Items are integers or floats + all must be integers or floats -> infallible
let mixed_array_matched = TypeDef {
inner_type_def: Some(inner_type_def!([Kind::Integer | Kind::Float])),
..Default::default()
}
.fallible_unless_array_has_inner_type(Kind::Integer | Kind::Float);

assert!(!mixed_array_matched.is_fallible());

// Items are Booleans or maps + must be floats -> fallible
let mismatched_array = TypeDef {
inner_type_def: Some(inner_type_def!([Kind::Boolean | Kind::Map])),
..Default::default()
}
.fallible_unless_array_has_inner_type(Kind::Float);

assert!(mismatched_array.is_fallible());

// Setting a required array type on a map -> fallible
let map_type = TypeDef {
kind: Kind::Map,
inner_type_def: Some(inner_type_def!([Kind::Map])),
..Default::default()
}
.fallible_unless_array_has_inner_type(Kind::Bytes);

assert!(map_type.is_fallible());

// Any non-array should be fallible if an inner type constraint is
// applied
let non_array = TypeDef {
kind: Kind::Bytes | Kind::Float | Kind::Boolean,
..Default::default()
}
.fallible_unless_array_has_inner_type(Kind::Bytes);

assert!(non_array.is_fallible());
}
}
27 changes: 27 additions & 0 deletions tests/behavior/transforms/remap.toml
Original file line number Diff line number Diff line change
Expand Up @@ -1938,3 +1938,30 @@
source = '''
.a != ""
'''

[transforms.remap_function_join]
inputs = []
type = "remap"
source = """
items = ["foo", "bar", "baz"]
.comma = join(items, ", ")
.space = join(items, " ")
.none = join(items)
.from_split = join!(split("big bad booper", " "), "__")
"""
[[tests]]
name = "remap_function_join"
[tests.input]
insert_at = "remap_function_join"
type = "log"
[tests.input.log_fields]
[[tests.outputs]]
extract_from = "remap_function_join"
[[tests.outputs.conditions]]
type = "remap"
source = '''
.comma == "foo, bar, baz" && \
.space == "foo bar baz" && \
.none == "foobarbaz" && \
.from_split == "big__bad__booper"
'''