Skip to content

Commit

Permalink
fix(turborepo): Size comes from the thing being read. (#6092)
Browse files Browse the repository at this point in the history
Co-authored-by: Nathan Hammond <Nathan Hammond>
Co-authored-by: Greg Soltis <gsoltis@gmail.com>
Co-authored-by: Greg Soltis <greg.soltis@vercel.com>
Co-authored-by: Greg Soltis <Greg Soltis>
  • Loading branch information
3 people committed Oct 17, 2023
1 parent 6ffb2e9 commit 75a68ce
Show file tree
Hide file tree
Showing 2 changed files with 101 additions and 14 deletions.
1 change: 1 addition & 0 deletions crates/turborepo-scm/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#![feature(error_generic_member_access)]
#![feature(io_error_more)]
#![feature(assert_matches)]
#![deny(clippy::all)]

Expand Down
114 changes: 100 additions & 14 deletions crates/turborepo-scm/src/manual.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use std::{fs::Metadata, io::Read};
use std::io::{ErrorKind, Read};

use globwalk::fix_glob_pattern;
use hex::ToHex;
Expand All @@ -9,13 +9,19 @@ use wax::{any, Glob, Pattern};

use crate::{package_deps::GitHashes, Error};

fn git_like_hash_file(path: &AbsoluteSystemPath, metadata: &Metadata) -> Result<String, Error> {
fn git_like_hash_file(path: &AbsoluteSystemPath) -> Result<String, Error> {
let mut hasher = Sha1::new();
let mut f = path.open()?;
let mut buffer = Vec::new();
f.read_to_end(&mut buffer)?;
// Note that read_to_end reads the target if f is a symlink. Currently, this can
// happen when we are hashing a specific set of files, which in turn only
// happens for handling dotEnv files. It is likely that in the future we
// will want to ensure that the target is better accounted for in the set of
// inputs to the task. Manual hashing, as well as global deps and other
// places that support globs all ignore symlinks.
let size = f.read_to_end(&mut buffer)?;
hasher.update("blob ".as_bytes());
hasher.update(metadata.len().to_string().as_bytes());
hasher.update(size.to_string().as_bytes());
hasher.update([b'\0']);
hasher.update(buffer.as_slice());
let result = hasher.finalize();
Expand All @@ -30,17 +36,15 @@ pub(crate) fn hash_files(
let mut hashes = GitHashes::new();
for file in files.into_iter() {
let path = root_path.resolve(file.as_ref());
let metadata = match path.symlink_metadata() {
Ok(metadata) => metadata,
Err(e) => {
if allow_missing && e.is_io_error(std::io::ErrorKind::NotFound) {
continue;
}
return Err(e.into());
match git_like_hash_file(&path) {
Ok(hash) => hashes.insert(file.as_ref().to_unix(), hash),
Err(Error::Io(ref io_error, _))
if allow_missing && io_error.kind() == ErrorKind::NotFound =>
{
continue
}
Err(e) => return Err(e),
};
let hash = git_like_hash_file(&path, &metadata)?;
hashes.insert(file.as_ref().to_unix(), hash);
}
Ok(hashes)
}
Expand Down Expand Up @@ -109,14 +113,16 @@ pub(crate) fn get_package_file_hashes_from_processing_gitignore<S: AsRef<str>>(
if metadata.is_symlink() {
continue;
}
let hash = git_like_hash_file(path, &metadata)?;
let hash = git_like_hash_file(path)?;
hashes.insert(relative_path, hash);
}
Ok(hashes)
}

#[cfg(test)]
mod tests {
use std::assert_matches::assert_matches;

use test_case::test_case;
use turbopath::{
AbsoluteSystemPathBuf, AnchoredSystemPathBuf, RelativeUnixPath, RelativeUnixPathBuf,
Expand Down Expand Up @@ -164,6 +170,86 @@ mod tests {
}
}

#[test]
fn test_hash_symlink() {
let (_tmp, turbo_root) = tmp_dir();
let from_to_file = turbo_root.join_component("symlink-from-to-file");
let from_to_dir = turbo_root.join_component("symlink-from-to-dir");
let broken = turbo_root.join_component("symlink-broken");

let to_file = turbo_root.join_component("the-file-target");
to_file.create_with_contents("contents").unwrap();

let to_dir = turbo_root.join_component("the-dir-target");
to_dir.create_dir_all().unwrap();

from_to_file.symlink_to_file(to_file.to_string()).unwrap();
from_to_dir.symlink_to_dir(to_dir.to_string()).unwrap();
broken.symlink_to_file("does-not-exist").unwrap();

// Symlink to file.
let out = hash_files(
&turbo_root,
[AnchoredSystemPathBuf::from_raw("symlink-from-to-file").unwrap()].iter(),
true,
)
.unwrap();
let from_to_file_hash = out
.get(&RelativeUnixPathBuf::new("symlink-from-to-file").unwrap())
.unwrap();
assert_eq!(
from_to_file_hash,
"0839b2e9412b314cb8bb9a20f587aa13752ae310"
);

// Symlink to dir, allow_missing = true.
#[cfg(not(windows))]
{
let out = hash_files(
&turbo_root,
[AnchoredSystemPathBuf::from_raw("symlink-from-to-dir").unwrap()].iter(),
true,
);
match out.err().unwrap() {
Error::Io(io_error, _) => assert_eq!(io_error.kind(), ErrorKind::IsADirectory),
_ => panic!("wrong error"),
};
}

// Symlink to dir, allow_missing = false.
let out = hash_files(
&turbo_root,
[AnchoredSystemPathBuf::from_raw("symlink-from-to-dir").unwrap()].iter(),
false,
);
#[cfg(windows)]
let expected_err_kind = ErrorKind::PermissionDenied;
#[cfg(not(windows))]
let expected_err_kind = ErrorKind::IsADirectory;
assert_matches!(out.unwrap_err(), Error::Io(io_error, _) if io_error.kind() == expected_err_kind);

// Broken symlink with allow_missing = true.
let out = hash_files(
&turbo_root,
[AnchoredSystemPathBuf::from_raw("symlink-broken").unwrap()].iter(),
true,
)
.unwrap();
let broken_hash = out.get(&RelativeUnixPathBuf::new("symlink-broken").unwrap());
assert_eq!(broken_hash, None);

// Broken symlink with allow_missing = false.
let out = hash_files(
&turbo_root,
[AnchoredSystemPathBuf::from_raw("symlink-broken").unwrap()].iter(),
false,
);
match out.err().unwrap() {
Error::Io(io_error, _) => assert_eq!(io_error.kind(), ErrorKind::NotFound),
_ => panic!("wrong error"),
};
}

#[test]
fn test_get_package_file_hashes_from_processing_gitignore() {
let root_ignore_contents = ["ignoreme", "ignorethisdir/"].join("\n");
Expand Down

0 comments on commit 75a68ce

Please sign in to comment.