Skip to content

Commit

Permalink
Fix dir::get_size's handling of symlinks
Browse files Browse the repository at this point in the history
This makes `dir::get_size` give a correct calculation of the size
of a path of directory, that now matches that returned by eg:
`du --bytes -s <path>`.

Before:
- Symlinks were followed, causing double-counting of symlinked
  files (see #59).
- Whilst the metadata size of subdirectory entries was counted
  correctly, the metadata size of the parent directory (the initial
  `path` passed into `dir::get_size`) was not counted. See:
   #25 (comment)

Now:
- Symlinks are no longer followed (fixing the regression from #34),
   so that only the size of the symlink itself is included (as expected),
   and not the size of the symlink's target.
- If the initial `path` passed into `dir::get_size` is a directory, the
   size of that directory entry itself is now correctly counted (like it
   was already being counted for subdirectories).

Fixes #25.
Fixes #59.
  • Loading branch information
edmorley committed Jul 1, 2022
1 parent 5fccd17 commit 1bebf1e
Show file tree
Hide file tree
Showing 2 changed files with 76 additions and 16 deletions.
37 changes: 27 additions & 10 deletions src/dir.rs
Original file line number Diff line number Diff line change
Expand Up @@ -758,11 +758,13 @@ where
})
}

/// Returns the size of the file or directory in bytes
/// Returns the size of the file or directory in bytes.
///
/// If used on a directory, this function will recursively iterate over every file and every
/// directory inside the directory. This can be very time consuming if used on large directories.
///
/// Does not follow symlinks.
///
/// # Errors
///
/// This function will return an error in the following situations, but is not limited to just
Expand All @@ -784,20 +786,35 @@ pub fn get_size<P>(path: P) -> Result<u64>
where
P: AsRef<Path>,
{
let mut result = 0;
let mut size_in_bytes = 0;

if path.as_ref().is_dir() {
// Using `fs::symlink_metadata` since we don't want to follow symlinks,
// as we're calculating the exact size of the requested path itself.
let path_metadata = path.as_ref().symlink_metadata()?;

// For directories this is just the size of the directory entry itself, not its contents.
// Similarly for symlinks, this is the size of the symlink entry, not its target.
// In both cases, we still want to count these so that we get an accurate total size.
size_in_bytes += path_metadata.len();

if path_metadata.is_dir() {
for entry in read_dir(&path)? {
let _path = entry?.path();
result += _path.metadata()?.len();
if _path.is_dir() {
result += get_size(_path)?;
let entry = entry?;
// `DirEntry::metadata` does not follow symlinks (unlike `fs::metadata`), so in the
// case of symlinks, this is the size of the symlink itself, not its target.
let entry_metadata = entry.metadata()?;

if entry_metadata.is_dir() {
// The size of the directory entry itself will be counted inside the `get_size()` call,
// so we intentionally don't also add `entry_metadata.len()` to the total here.
size_in_bytes += get_size(entry.path())?;
} else {
size_in_bytes += entry_metadata.len();
}
}
} else {
result = path.as_ref().metadata()?.len();
}
Ok(result)

Ok(size_in_bytes)
}

/// Copies the directory contents from one place to another using recursive method,
Expand Down
55 changes: 49 additions & 6 deletions tests/dir.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
use std::collections::HashSet;
use std::fs::read_dir;
use std::fs::{self, read_dir};
use std::path::{Path, PathBuf};
use std::sync::mpsc::{self, TryRecvError};
use std::thread;
Expand Down Expand Up @@ -70,6 +70,22 @@ fn get_dir_size() -> u64 {
.len()
}

#[cfg(unix)]
fn create_file_symlink<P: AsRef<Path>, Q: AsRef<Path>>(
original: P,
link: Q,
) -> std::io::Result<()> {
std::os::unix::fs::symlink(original.as_ref(), link.as_ref())
}

#[cfg(windows)]
fn create_file_symlink<P: AsRef<Path>, Q: AsRef<Path>>(
original: P,
link: Q,
) -> std::io::Result<()> {
std::os::windows::fs::symlink_file(original.as_ref(), link.as_ref())
}

const TEST_FOLDER: &'static str = "./tests/temp/dir";

#[test]
Expand Down Expand Up @@ -2821,22 +2837,49 @@ fn it_get_folder_size() {

let mut file1 = path.clone();
file1.push("test1.txt");
fs_extra::file::write_all(&file1, "content1").unwrap();
fs_extra::file::write_all(&file1, &"A".repeat(100)).unwrap();
assert!(file1.exists());

let mut sub_dir_path = path.clone();
sub_dir_path.push("sub");
create(&sub_dir_path, true).unwrap();

let mut file2 = sub_dir_path.clone();
file2.push("test2.txt");
fs_extra::file::write_all(&file2, "content2").unwrap();
fs_extra::file::write_all(&file2, &"B".repeat(300)).unwrap();
assert!(file2.exists());

let result = get_size(&path).unwrap();
let symlink_file = sub_dir_path.join("symlink_file.txt");

// Rust stdlib APIs for creating a symlinked file only exist for Unix and Windows.
#[cfg(any(unix, windows))]
{
// Only passing the filename since we want this to be a relative symlink.
create_file_symlink("test2.txt", &symlink_file).unwrap();
assert!(symlink_file.exists());
}

// Total size comprises of:
// - 100 bytes for the standard file "test1.txt"
// - 300 bytes for the standard file "test2.txt"
// - 2 x directories (one for the top level directory "dir", another for the subdirectory "sub"),
// whose size varies by filesystem, so is dynamically calculated. We cannot use `get_dir_size()`
// since even the directory metadata size varies from directory to directory, so we instead have
// to retrieve the size of both "dir" and "sub" directly.
// - (On supported platforms) 1 x symlink whose whose size varies by filesystem, so is dynamically calculated.
let mut expected_size = 100
+ 300
+ fs::symlink_metadata(&path).unwrap().len()
+ fs::symlink_metadata(&sub_dir_path).unwrap().len();

let test_size: u64 = 16 + get_dir_size();
if symlink_file.exists() {
// `fs::symlink_metadata` does not follow symlinks, so this is the size of the symlink itself, not its target.
expected_size += fs::symlink_metadata(&symlink_file).unwrap().len();
}

let result = get_size(&path).unwrap();

assert_eq!(test_size, result);
assert_eq!(expected_size, result);
}

#[test]
Expand Down

0 comments on commit 1bebf1e

Please sign in to comment.