Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(stdlib): add optional psl argument to parse_etld #851

Merged
merged 4 commits into from
May 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 24 additions & 2 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,8 @@ stdlib = [
"dep:prost",
"dep:prost-reflect",
"dep:psl",
"dep:psl-types",
"dep:publicsuffix",
"dep:quoted_printable",
"dep:rand",
"dep:roxmltree",
Expand Down Expand Up @@ -154,6 +156,8 @@ prettytable-rs = { version = "0.10", default-features = false, optional = true }
quickcheck = { version = "1", optional = true }
quoted_printable = {version = "0.5", optional = true }
psl = { version = "2", optional = true }
psl-types = { version = "2", optional = true }
publicsuffix = { version = "2", optional = true }
rand = { version = "0.8", optional = true }
regex = { version = "1", default-features = false, optional = true, features = ["std", "perf", "unicode"] }
roxmltree = { version = "0.19", optional = true }
Expand Down
1 change: 1 addition & 0 deletions LICENSE-3rdparty.csv
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,7 @@ prost-reflect,https://github.com/andrewhickman/prost-reflect,MIT OR Apache-2.0,A
psl,https://github.com/addr-rs/psl,MIT OR Apache-2.0,rushmorem <rushmore@webenchanter.com>
psl-types,https://github.com/addr-rs/psl-types,MIT OR Apache-2.0,rushmorem <rushmore@webenchanter.com>
ptr_meta,https://github.com/djkoloski/ptr_meta,MIT,David Koloski <djkoloski@gmail.com>
publicsuffix,https://github.com/rushmorem/publicsuffix,MIT OR Apache-2.0,rushmorem <rushmore@webenchanter.com>
quanta,https://github.com/metrics-rs/quanta,MIT,Toby Lawrence <toby@nuclearfurnace.com>
quote,https://github.com/dtolnay/quote,MIT OR Apache-2.0,David Tolnay <dtolnay@gmail.com>
quoted_printable,https://github.com/staktrace/quoted-printable,0BSD,Kartikaya Gupta <kats@seldon.staktrace.com>
Expand Down
3 changes: 3 additions & 0 deletions changelog.d/851.feature.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Added `psl` argument to the `parse_etld` function. It enables customizing used public suffix list. If none is provided the default (https://publicsuffix.org/list/public_suffix_list.dat) is used, which is that was used before this change.

authors: esensar
3 changes: 3 additions & 0 deletions lib/tests/tests/functions/custom_public_suffix_list.dat
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
// ===BEGIN ICANN DOMAINS===

customdev
4 changes: 4 additions & 0 deletions lib/tests/tests/functions/parse_etld/custom_psl_file.vrl
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# object: { "host": "vector.customdev" }
# result: { "etld": "customdev", "etld_plus": "vector.customdev", "known_suffix": true }

parse_etld!(.host, plus_parts: 1, psl: "lib/tests/tests/functions/custom_public_suffix_list.dat")
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# object: { "host": "vector.customdev" }
# result:
#
# error[E610]: function compilation error: error[E403] invalid argument
# ┌─ :2:1
# │
# 2 │ parse_etld!(.host, plus_parts: 1, psl: "lib/tests/tests/functions/definitelydoesnot.exist")
# │ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
# │ │
# │ invalid argument "psl"
# │ error: Unable to read psl file
# │ received: "\"lib/tests/tests/functions/definitelydoesnot.exist\""
# │
# = learn more about error code 403 at https://errors.vrl.dev/403
# = see language documentation at https://vrl.dev
# = try your code in the VRL REPL, learn more at https://vrl.dev/examples

parse_etld!(.host, plus_parts: 1, psl: "lib/tests/tests/functions/definitelydoesnot.exist")
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# object: { "host": "vector.customdev" }
# result:
#
# error[E610]: function compilation error: error[E403] invalid argument
# ┌─ :2:1
# │
# 2 │ parse_etld!(.host, plus_parts: 1, psl: "lib/tests/tests/functions/parse_groks_alias_source.json")
# │ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
# │ │
# │ invalid argument "psl"
# │ error: Unable to parse psl file
# │ received: "\"lib/tests/tests/functions/parse_groks_alias_source.json\""
# │
# = learn more about error code 403 at https://errors.vrl.dev/403
# = see language documentation at https://vrl.dev
# = try your code in the VRL REPL, learn more at https://vrl.dev/examples

parse_etld!(.host, plus_parts: 1, psl: "lib/tests/tests/functions/parse_groks_alias_source.json")
3 changes: 1 addition & 2 deletions lib/tests/tests/functions/parse_etld/etld_plus_one.vrl
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
# object: { "host": "vector.dev" }
# result: { "etld": "dev", "etld_plus": "vector.dev", "known_suffix": true }

etld_result = parse_etld!(.host, plus_parts: 1)
etld_result
parse_etld!(.host, plus_parts: 1)
3 changes: 1 addition & 2 deletions lib/tests/tests/functions/parse_etld/etld_plus_ten.vrl
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
# object: { "host": "vector.dev" }
# result: { "etld": "dev", "etld_plus": "vector.dev", "known_suffix": true }

etld_result = parse_etld!(.host, plus_parts: 10)
etld_result
parse_etld!(.host, plus_parts: 10)
69 changes: 64 additions & 5 deletions src/stdlib/parse_etld.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
use psl::Psl;
use publicsuffix::List;

use crate::compiler::prelude::*;
use std::collections::BTreeMap;
use std::{collections::BTreeMap, path::Path};

#[derive(Clone, Copy, Debug)]
pub struct ParseEtld;
Expand All @@ -21,6 +24,11 @@ impl Function for ParseEtld {
kind: kind::INTEGER,
required: false,
},
Parameter {
keyword: "psl",
kind: kind::BYTES,
required: false,
},
]
}

Expand Down Expand Up @@ -64,21 +72,62 @@ impl Function for ParseEtld {

fn compile(
&self,
_state: &state::TypeState,
state: &state::TypeState,
_ctx: &mut FunctionCompileContext,
arguments: ArgumentList,
) -> Compiled {
let value = arguments.required("value");
let plus_parts = arguments.optional("plus_parts").unwrap_or_else(|| expr!(0));

Ok(ParseEtldFn { value, plus_parts }.as_expr())
let psl_expr = arguments.optional_expr("psl");
let mut psl: Option<List> = None;
if let Some(psl_expr) = psl_expr {
let psl_location = psl_expr
.clone()
.resolve_constant(state)
.ok_or(function::Error::ExpectedStaticExpression {
keyword: "psl",
expr: psl_expr.clone(),
})?
.try_bytes_utf8_lossy()
.map_err(|_| function::Error::InvalidArgument {
keyword: "psl",
value: format!("{psl_expr:?}").into(),
error: "psl should be a string",
})?
.into_owned();

let path = Path::new(&psl_location);
psl = Some(
std::fs::read_to_string(path)
.map_err(|_| function::Error::InvalidArgument {
keyword: "psl",
value: format!("{path:?}").into(),
error: "Unable to read psl file",
})?
.parse()
.map_err(|_| function::Error::InvalidArgument {
keyword: "psl",
value: format!("{path:?}").into(),
error: "Unable to parse psl file",
})?,
);
}

Ok(ParseEtldFn {
value,
plus_parts,
psl,
}
.as_expr())
}
}

#[derive(Debug, Clone)]
struct ParseEtldFn {
value: Box<dyn Expression>,
plus_parts: Box<dyn Expression>,
psl: Option<List>,
}

impl FunctionExpression for ParseEtldFn {
Expand All @@ -91,8 +140,12 @@ impl FunctionExpression for ParseEtldFn {
x => x as usize,
};

let etld = psl::suffix(string.as_bytes())
.ok_or(format!("unable to determine eTLD for {string}"))?;
let suffix_result = if let Some(list) = &self.psl {
list.suffix(string.as_bytes())
} else {
psl::suffix(string.as_bytes())
};
let etld = suffix_result.ok_or(format!("unable to determine eTLD for {string}"))?;
let etld_string = core::str::from_utf8(etld.as_bytes())
.map_err(|err| format!("could not convert eTLD to UTF8 {err}"))?;

Expand Down Expand Up @@ -236,5 +289,11 @@ mod tests {
want: Err("unable to determine eTLD for "),
tdef: TypeDef::object(inner_kind()).fallible(),
}

bad_psl_file {
args: func_args![value: value!("vector.dev"), psl: value!("definitelynotafile")],
want: Err("invalid argument"),
tdef: TypeDef::object(inner_kind()).fallible(),
}
];
}
Loading