diff --git a/turbopack/crates/turbopack-core/src/resolve/pattern.rs b/turbopack/crates/turbopack-core/src/resolve/pattern.rs index bede4a1df8f7b..4e5a34d58915b 100644 --- a/turbopack/crates/turbopack-core/src/resolve/pattern.rs +++ b/turbopack/crates/turbopack-core/src/resolve/pattern.rs @@ -812,10 +812,13 @@ impl Pattern { pub fn is_match(&self, value: &str) -> bool { if let Pattern::Alternatives(list) = self { - list.iter() - .any(|alt| alt.match_internal(value, None, false).is_match()) + list.iter().any(|alt| { + alt.match_internal(value, None, InNodeModules::False, false) + .is_match() + }) } else { - self.match_internal(value, None, false).is_match() + self.match_internal(value, None, InNodeModules::False, false) + .is_match() } } @@ -823,19 +826,24 @@ impl Pattern { /// pattern matching pub fn is_match_ignore_dynamic(&self, value: &str) -> bool { if let Pattern::Alternatives(list) = self { - list.iter() - .any(|alt| alt.match_internal(value, None, true).is_match()) + list.iter().any(|alt| { + alt.match_internal(value, None, InNodeModules::False, true) + .is_match() + }) } else { - self.match_internal(value, None, true).is_match() + self.match_internal(value, None, InNodeModules::False, true) + .is_match() } } pub fn match_position(&self, value: &str) -> Option { if let Pattern::Alternatives(list) = self { - list.iter() - .position(|alt| alt.match_internal(value, None, false).is_match()) + list.iter().position(|alt| { + alt.match_internal(value, None, InNodeModules::False, false) + .is_match() + }) } else { - self.match_internal(value, None, false) + self.match_internal(value, None, InNodeModules::False, false) .is_match() .then_some(0) } @@ -843,39 +851,50 @@ impl Pattern { pub fn could_match_others(&self, value: &str) -> bool { if let Pattern::Alternatives(list) = self { - list.iter() - .any(|alt| alt.match_internal(value, None, false).could_match_others()) + list.iter().any(|alt| { + alt.match_internal(value, None, InNodeModules::False, false) + .could_match_others() + }) } else { - self.match_internal(value, None, false).could_match_others() + self.match_internal(value, None, InNodeModules::False, false) + .could_match_others() } } /// Returns true if all matches of the pattern start with `value`. pub fn must_match(&self, value: &str) -> bool { if let Pattern::Alternatives(list) = self { - list.iter() - .all(|alt| alt.match_internal(value, None, false).could_match()) + list.iter().all(|alt| { + alt.match_internal(value, None, InNodeModules::False, false) + .could_match() + }) } else { - self.match_internal(value, None, false).could_match() + self.match_internal(value, None, InNodeModules::False, false) + .could_match() } } /// Returns true the pattern could match something that starts with `value`. pub fn could_match(&self, value: &str) -> bool { if let Pattern::Alternatives(list) = self { - list.iter() - .any(|alt| alt.match_internal(value, None, false).could_match()) + list.iter().any(|alt| { + alt.match_internal(value, None, InNodeModules::False, false) + .could_match() + }) } else { - self.match_internal(value, None, false).could_match() + self.match_internal(value, None, InNodeModules::False, false) + .could_match() } } pub fn could_match_position(&self, value: &str) -> Option { if let Pattern::Alternatives(list) = self { - list.iter() - .position(|alt| alt.match_internal(value, None, false).could_match()) + list.iter().position(|alt| { + alt.match_internal(value, None, InNodeModules::False, false) + .could_match() + }) } else { - self.match_internal(value, None, false) + self.match_internal(value, None, InNodeModules::False, false) .could_match() .then_some(0) } @@ -884,6 +903,7 @@ impl Pattern { &self, mut value: &'a str, mut any_offset: Option, + mut in_node_modules: InNodeModules, ignore_dynamic: bool, ) -> MatchResult<'a> { match self { @@ -894,6 +914,7 @@ impl Pattern { MatchResult::Consumed { remaining: &value[index + c.len()..], any_offset: None, + in_node_modules: InNodeModules::check(c), } } else { MatchResult::None @@ -907,6 +928,7 @@ impl Pattern { MatchResult::Consumed { remaining: &value[c.len()..], any_offset: None, + in_node_modules: InNodeModules::check(c), } } else if c.starts_with(value) { MatchResult::Partial @@ -920,10 +942,15 @@ impl Pattern { }); static FORBIDDEN_MATCH: LazyLock = LazyLock::new(|| Regex::new(r"\.d\.ts$|\.map$").unwrap()); - if let Some(m) = FORBIDDEN.find(value) { + if in_node_modules == InNodeModules::FolderSlashMatched + || (in_node_modules == InNodeModules::FolderMatched && value.starts_with('/')) + { + MatchResult::None + } else if let Some(m) = FORBIDDEN.find(value) { MatchResult::Consumed { remaining: value, any_offset: Some(m.start()), + in_node_modules: InNodeModules::False, } } else if FORBIDDEN_MATCH.find(value).is_some() { MatchResult::Partial @@ -937,6 +964,7 @@ impl Pattern { MatchResult::Consumed { remaining: value, any_offset: Some(match_length), + in_node_modules: InNodeModules::False, } } } @@ -945,21 +973,24 @@ impl Pattern { } Pattern::Concatenation(list) => { for part in list { - match part.match_internal(value, any_offset, ignore_dynamic) { + match part.match_internal(value, any_offset, in_node_modules, ignore_dynamic) { MatchResult::None => return MatchResult::None, MatchResult::Partial => return MatchResult::Partial, MatchResult::Consumed { remaining: new_value, any_offset: new_any_offset, + in_node_modules: new_in_node_modules, } => { value = new_value; any_offset = new_any_offset; + in_node_modules = new_in_node_modules } } } MatchResult::Consumed { remaining: value, any_offset, + in_node_modules, } } } @@ -971,6 +1002,7 @@ impl Pattern { &self, mut value: &'a str, mut any_offset: Option, + mut in_node_modules: InNodeModules, dynamics: &mut VecDeque<&'a str>, ) -> MatchResult<'a> { match self { @@ -984,6 +1016,7 @@ impl Pattern { MatchResult::Consumed { remaining: &value[index + c.len()..], any_offset: None, + in_node_modules: InNodeModules::check(c), } } else { MatchResult::None @@ -997,6 +1030,7 @@ impl Pattern { MatchResult::Consumed { remaining: &value[c.len()..], any_offset: None, + in_node_modules: InNodeModules::check(c), } } else if c.starts_with(value) { MatchResult::Partial @@ -1010,10 +1044,15 @@ impl Pattern { }); static FORBIDDEN_MATCH: LazyLock = LazyLock::new(|| Regex::new(r"\.d\.ts$|\.map$").unwrap()); - if let Some(m) = FORBIDDEN.find(value) { + if in_node_modules == InNodeModules::FolderSlashMatched + || (in_node_modules == InNodeModules::FolderMatched && value.starts_with('/')) + { + MatchResult::None + } else if let Some(m) = FORBIDDEN.find(value) { MatchResult::Consumed { remaining: value, any_offset: Some(m.start()), + in_node_modules: InNodeModules::False, } } else if FORBIDDEN_MATCH.find(value).is_some() { MatchResult::Partial @@ -1025,6 +1064,7 @@ impl Pattern { MatchResult::Consumed { remaining: value, any_offset: Some(match_length), + in_node_modules: InNodeModules::False, } } } @@ -1033,15 +1073,18 @@ impl Pattern { } Pattern::Concatenation(list) => { for part in list { - match part.match_collect_internal(value, any_offset, dynamics) { + match part.match_collect_internal(value, any_offset, in_node_modules, dynamics) + { MatchResult::None => return MatchResult::None, MatchResult::Partial => return MatchResult::Partial, MatchResult::Consumed { remaining: new_value, any_offset: new_any_offset, + in_node_modules: new_in_node_modules, } => { value = new_value; any_offset = new_any_offset; + in_node_modules = new_in_node_modules } } } @@ -1053,6 +1096,7 @@ impl Pattern { MatchResult::Consumed { remaining: value, any_offset, + in_node_modules, } } } @@ -1249,7 +1293,7 @@ impl Pattern { let mut dynamics = VecDeque::new(); // This is definitely a match, because it matched above in `self.match_position(value)` - source.match_collect_internal(value, None, &mut dynamics); + source.match_collect_internal(value, None, InNodeModules::False, &mut dynamics); let mut result = "".to_string(); match target { @@ -1293,6 +1337,26 @@ impl Pattern { } } +#[derive(PartialEq, Debug)] +enum InNodeModules { + False, + // Inside of a match ending in `node_modules` + FolderMatched, + // Inside of a match ending in `node_modules/` + FolderSlashMatched, +} +impl InNodeModules { + fn check(value: &str) -> Self { + if value.ends_with("node_modules/") { + InNodeModules::FolderSlashMatched + } else if value.ends_with("node_modules") { + InNodeModules::FolderMatched + } else { + InNodeModules::False + } + } +} + #[derive(PartialEq, Debug)] enum MatchResult<'a> { /// No match @@ -1306,6 +1370,9 @@ enum MatchResult<'a> { /// Set when the pattern ends with a dynamic part. The dynamic part /// could match n bytes more of the string. any_offset: Option, + /// Set when the pattern ends with `node_modules` or `node_modules/` (and a following + /// Pattern::Dynamic would thus match all existing packages) + in_node_modules: InNodeModules, }, } @@ -1318,6 +1385,7 @@ impl MatchResult<'_> { MatchResult::Consumed { remaining: rem, any_offset, + in_node_modules: _, } => { if let Some(offset) = any_offset { *offset == rem.len() @@ -1337,6 +1405,7 @@ impl MatchResult<'_> { MatchResult::Consumed { remaining: rem, any_offset, + in_node_modules: _, } => { if let Some(offset) = any_offset { *offset == rem.len() @@ -1356,6 +1425,7 @@ impl MatchResult<'_> { MatchResult::Consumed { remaining: rem, any_offset, + in_node_modules: _, } => { if let Some(offset) = any_offset { *offset == rem.len() @@ -1833,10 +1903,16 @@ fn split_last_segment(path: &str) -> (&str, &str) { #[cfg(test)] mod tests { + use std::path::Path; + use rstest::*; use turbo_rcstr::{RcStr, rcstr}; + use turbo_tasks_backend::{BackendOptions, TurboTasksBackend, noop_backing_storage}; + use turbo_tasks_fs::{DiskFileSystem, FileSystem}; - use super::{Pattern, longest_common_prefix, longest_common_suffix, split_last_segment}; + use super::{ + Pattern, longest_common_prefix, longest_common_suffix, read_matches, split_last_segment, + }; #[test] fn longest_common_prefix_test() { @@ -2284,6 +2360,16 @@ mod tests { assert!(pat.could_match("dir/inner/file.d.ts.map")); } + #[rstest] + #[case::slash(Pattern::Concatenation(vec![Pattern::Constant(rcstr!("node_modules/")),Pattern::Dynamic]))] + #[case::nested(Pattern::Constant(rcstr!("node_modules")).or_any_nested_file())] + fn dynamic_match_node_modules(#[case] pat: Pattern) { + assert!(!pat.is_match("node_modules/package")); + assert!(!pat.could_match("node_modules/package")); + assert!(!pat.is_match("node_modules/package/index.js")); + assert!(!pat.could_match("node_modules/package/index.js")); + } + #[rstest] fn dynamic_match2() { let pat = Pattern::Concatenation(vec![ @@ -2567,4 +2653,79 @@ mod tests { assert_eq!(split_last_segment("../../a"), ("../..", "a")); assert_eq!(split_last_segment("../../a/"), ("../..", "a")); } + + #[tokio::test(flavor = "multi_thread", worker_threads = 2)] + async fn test_read_matches() { + let tt = turbo_tasks::TurboTasks::new(TurboTasksBackend::new( + BackendOptions::default(), + noop_backing_storage(), + )); + tt.run_once(async { + crate::register(); + let root = DiskFileSystem::new( + rcstr!("test"), + Path::new(env!("CARGO_MANIFEST_DIR")) + .join("tests/pattern/read_matches") + .to_str() + .unwrap() + .into(), + ) + .root() + .owned() + .await?; + + // node_modules shouldn't be matched by Dynamic here + assert_eq!( + vec!["index.js", "sub", "sub/", "sub/foo-a.js", "sub/foo-b.js"], + read_matches( + root.clone(), + rcstr!(""), + false, + Pattern::new(Pattern::Dynamic), + ) + .await? + .into_iter() + .map(|m| m.name()) + .collect::>() + ); + + // basic dynamic file suffix + assert_eq!( + vec!["sub/foo-a.js", "sub/foo-b.js"], + read_matches( + root.clone(), + rcstr!(""), + false, + Pattern::new(Pattern::concat([ + Pattern::Constant(rcstr!("sub/foo")), + Pattern::Dynamic, + ])), + ) + .await? + .into_iter() + .map(|m| m.name()) + .collect::>() + ); + + // read_matches "node_modules/" should not return anything inside. We never + // want to enumerate the list of packages here. + assert_eq!( + vec!["node_modules"] as Vec<&str>, + read_matches( + root.clone(), + rcstr!(""), + false, + Pattern::new(Pattern::Constant(rcstr!("node_modules")).or_any_nested_file()), + ) + .await? + .into_iter() + .map(|m| m.name()) + .collect::>() + ); + + anyhow::Ok(()) + }) + .await + .unwrap(); + } } diff --git a/turbopack/crates/turbopack-core/tests/.gitignore b/turbopack/crates/turbopack-core/tests/.gitignore new file mode 100644 index 0000000000000..5f2fb0a1311f1 --- /dev/null +++ b/turbopack/crates/turbopack-core/tests/.gitignore @@ -0,0 +1 @@ +!** diff --git a/turbopack/crates/turbopack-core/tests/pattern/read_matches/index.js b/turbopack/crates/turbopack-core/tests/pattern/read_matches/index.js new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/turbopack/crates/turbopack-core/tests/pattern/read_matches/node_modules/pkg/LICENSE b/turbopack/crates/turbopack-core/tests/pattern/read_matches/node_modules/pkg/LICENSE new file mode 100644 index 0000000000000..5b1dd02ff27e8 --- /dev/null +++ b/turbopack/crates/turbopack-core/tests/pattern/read_matches/node_modules/pkg/LICENSE @@ -0,0 +1 @@ +Some Text diff --git a/turbopack/crates/turbopack-core/tests/pattern/read_matches/node_modules/pkg/README.md b/turbopack/crates/turbopack-core/tests/pattern/read_matches/node_modules/pkg/README.md new file mode 100644 index 0000000000000..5b1dd02ff27e8 --- /dev/null +++ b/turbopack/crates/turbopack-core/tests/pattern/read_matches/node_modules/pkg/README.md @@ -0,0 +1 @@ +Some Text diff --git a/turbopack/crates/turbopack-core/tests/pattern/read_matches/node_modules/pkg/index.js b/turbopack/crates/turbopack-core/tests/pattern/read_matches/node_modules/pkg/index.js new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/turbopack/crates/turbopack-core/tests/pattern/read_matches/node_modules/pkg/package.json b/turbopack/crates/turbopack-core/tests/pattern/read_matches/node_modules/pkg/package.json new file mode 100644 index 0000000000000..0967ef424bce6 --- /dev/null +++ b/turbopack/crates/turbopack-core/tests/pattern/read_matches/node_modules/pkg/package.json @@ -0,0 +1 @@ +{} diff --git a/turbopack/crates/turbopack-core/tests/pattern/read_matches/sub/foo-a.js b/turbopack/crates/turbopack-core/tests/pattern/read_matches/sub/foo-a.js new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/turbopack/crates/turbopack-core/tests/pattern/read_matches/sub/foo-a.js.map b/turbopack/crates/turbopack-core/tests/pattern/read_matches/sub/foo-a.js.map new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/turbopack/crates/turbopack-core/tests/pattern/read_matches/sub/foo-b.js b/turbopack/crates/turbopack-core/tests/pattern/read_matches/sub/foo-b.js new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/turbopack/crates/turbopack-ecmascript/src/references/node.rs b/turbopack/crates/turbopack-ecmascript/src/references/node.rs index 7421314d4aec3..fc7f14b03fe6f 100644 --- a/turbopack/crates/turbopack-ecmascript/src/references/node.rs +++ b/turbopack/crates/turbopack-ecmascript/src/references/node.rs @@ -1,5 +1,4 @@ use anyhow::Result; -use either::Either; use tracing::Instrument; use turbo_rcstr::{RcStr, rcstr}; use turbo_tasks::{ResolvedVc, ValueToString, Vc}; @@ -78,51 +77,42 @@ async fn resolve_reference_from_dir( ) -> Result> { let path_ref = path.await?; let (abs_path, rel_path) = path_ref.split_could_match("/ROOT/"); - let matches = match (abs_path, rel_path) { - (Some(abs_path), Some(rel_path)) => Either::Right( - read_matches( - parent_path.root().owned().await?, - rcstr!("/ROOT/"), - true, - Pattern::new(abs_path.or_any_nested_file()), - ) - .await? - .into_iter() - .chain( - read_matches( - parent_path, - rcstr!(""), - true, - Pattern::new(rel_path.or_any_nested_file()), - ) - .await? - .into_iter(), - ), - ), - (Some(abs_path), None) => Either::Left( - // absolute path only + if abs_path.is_none() && rel_path.is_none() { + return Ok(*ModuleResolveResult::unresolvable()); + } + + let abs_matches = if let Some(abs_path) = &abs_path { + Some( read_matches( parent_path.root().owned().await?, rcstr!("/ROOT/"), true, Pattern::new(abs_path.or_any_nested_file()), ) - .await? - .into_iter(), - ), - (None, Some(rel_path)) => Either::Left( - // relative path only + .await?, + ) + } else { + None + }; + let rel_matches = if let Some(rel_path) = &rel_path { + Some( read_matches( parent_path, rcstr!(""), true, Pattern::new(rel_path.or_any_nested_file()), ) - .await? - .into_iter(), - ), - (None, None) => return Ok(*ModuleResolveResult::unresolvable()), + .await?, + ) + } else { + None }; + + let matches = abs_matches + .into_iter() + .flatten() + .chain(rel_matches.into_iter().flatten()); + let mut affecting_sources = Vec::new(); let mut results = Vec::new(); for pat_match in matches { @@ -158,7 +148,7 @@ impl ModuleReference for DirAssetReference { async fn resolve_reference(&self) -> Result> { let parent_path = self.source.ident().path().await?.parent(); let span = tracing::info_span!( - "resolve DirAssetReference", + "trace directory", pattern = display(self.path.to_string().await?) ); async { diff --git a/turbopack/crates/turbopack-ecmascript/src/references/raw.rs b/turbopack/crates/turbopack-ecmascript/src/references/raw.rs index 2bb91a4e4ae62..4983221972b1e 100644 --- a/turbopack/crates/turbopack-ecmascript/src/references/raw.rs +++ b/turbopack/crates/turbopack-ecmascript/src/references/raw.rs @@ -1,4 +1,5 @@ use anyhow::Result; +use tracing::Instrument; use turbo_rcstr::RcStr; use turbo_tasks::{ResolvedVc, ValueToString, Vc}; use turbopack_core::{ @@ -28,7 +29,18 @@ impl ModuleReference for FileSourceReference { async fn resolve_reference(&self) -> Result> { let context_dir = self.source.ident().path().await?.parent(); - Ok(resolve_raw(context_dir, *self.path, false).as_raw_module_result()) + let span = tracing::info_span!( + "trace file", + pattern = display(self.path.to_string().await?) + ); + async { + resolve_raw(context_dir, *self.path, false) + .as_raw_module_result() + .resolve() + .await + } + .instrument(span) + .await } }