Skip to content

Commit

Permalink
feat(turborepo): FS Cache (#5473)
Browse files Browse the repository at this point in the history
### Description

Implements the FS cache on top of CacheItem. ~~This is stacked on top of
#5065~~

### Testing Instructions

Uses the same round-trip tests of HTTP cache.

---------

Co-authored-by: --global <Nicholas Yang>
Co-authored-by: Chris Olszewski <chris.olszewski@vercel.com>
  • Loading branch information
NicholasLYang and chris-olszewski committed Jul 17, 2023
1 parent 135c08f commit 46d0945
Show file tree
Hide file tree
Showing 8 changed files with 331 additions and 44 deletions.
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions crates/turborepo-cache/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ rustls-tls = ["turborepo-api-client/rustls-tls"]

[dev-dependencies]
anyhow = { workspace = true, features = ["backtrace"] }
futures = { workspace = true }
libc = "0.2.146"
port_scanner = { workspace = true }
tempfile = { workspace = true }
Expand Down
2 changes: 1 addition & 1 deletion crates/turborepo-cache/src/cache_archive/create.rs
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ impl<'a> CacheWriter<'a> {
// Makes a new CacheArchive at the specified path
// Wires up the chain of writers:
// tar::Builder -> zstd::Encoder (optional) -> BufWriter -> File
fn create(path: &AbsoluteSystemPath) -> Result<Self, CacheError> {
pub fn create(path: &AbsoluteSystemPath) -> Result<Self, CacheError> {
let mut options = OpenOptions::new();
options.write(true).create(true).truncate(true);

Expand Down
217 changes: 217 additions & 0 deletions crates/turborepo-cache/src/fs.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,217 @@
use std::{backtrace::Backtrace, fs::OpenOptions};

use serde::{Deserialize, Serialize};
use turbopath::{AbsoluteSystemPath, AbsoluteSystemPathBuf, AnchoredSystemPathBuf};

use crate::{
cache_archive::{CacheReader, CacheWriter},
CacheError, CacheResponse, CacheSource,
};

struct FSCache {

Check warning on line 11 in crates/turborepo-cache/src/fs.rs

View workflow job for this annotation

GitHub Actions / Go Unit Tests (ubuntu, ubuntu-latest)

struct `FSCache` is never constructed

Check warning on line 11 in crates/turborepo-cache/src/fs.rs

View workflow job for this annotation

GitHub Actions / Go Unit Tests (macos, macos-latest)

struct `FSCache` is never constructed

Check warning on line 11 in crates/turborepo-cache/src/fs.rs

View workflow job for this annotation

GitHub Actions / Go Unit Tests (windows, windows-latest)

struct `FSCache` is never constructed
cache_directory: AbsoluteSystemPathBuf,
}

#[derive(Debug, Deserialize, Serialize)]
struct CacheMetadata {
hash: String,
duration: u32,
}

impl CacheMetadata {
fn read(path: &AbsoluteSystemPath) -> Result<CacheMetadata, CacheError> {

Check warning on line 22 in crates/turborepo-cache/src/fs.rs

View workflow job for this annotation

GitHub Actions / Go Unit Tests (ubuntu, ubuntu-latest)

associated function `read` is never used

Check warning on line 22 in crates/turborepo-cache/src/fs.rs

View workflow job for this annotation

GitHub Actions / Go Unit Tests (macos, macos-latest)

associated function `read` is never used

Check warning on line 22 in crates/turborepo-cache/src/fs.rs

View workflow job for this annotation

GitHub Actions / Go Unit Tests (windows, windows-latest)

associated function `read` is never used
serde_json::from_str(&path.read_to_string()?)
.map_err(|e| CacheError::InvalidMetadata(e, Backtrace::capture()))
}
}

impl FSCache {
fn resolve_cache_dir(

Check warning on line 29 in crates/turborepo-cache/src/fs.rs

View workflow job for this annotation

GitHub Actions / Go Unit Tests (ubuntu, ubuntu-latest)

associated items `resolve_cache_dir`, `new`, `fetch`, `exists`, and `put` are never used

Check warning on line 29 in crates/turborepo-cache/src/fs.rs

View workflow job for this annotation

GitHub Actions / Go Unit Tests (macos, macos-latest)

associated items `resolve_cache_dir`, `new`, `fetch`, `exists`, and `put` are never used

Check warning on line 29 in crates/turborepo-cache/src/fs.rs

View workflow job for this annotation

GitHub Actions / Go Unit Tests (windows, windows-latest)

associated items `resolve_cache_dir`, `new`, `fetch`, `exists`, and `put` are never used
repo_root: &AbsoluteSystemPath,
override_dir: Option<&str>,
) -> AbsoluteSystemPathBuf {
if let Some(override_dir) = override_dir {
AbsoluteSystemPathBuf::from_unknown(repo_root, override_dir)
} else {
repo_root.join_components(&["node_modules", ".cache", "turbo"])
}
}

pub fn new(
override_dir: Option<&str>,
repo_root: &AbsoluteSystemPath,
) -> Result<Self, CacheError> {
let cache_directory = Self::resolve_cache_dir(repo_root, override_dir);
cache_directory.create_dir_all()?;

Ok(FSCache { cache_directory })
}

pub fn fetch(
&self,
anchor: &AbsoluteSystemPath,
hash: &str,
) -> Result<(CacheResponse, Vec<AnchoredSystemPathBuf>), CacheError> {
let uncompressed_cache_path = self
.cache_directory
.join_component(&format!("{}.tar", hash));
let compressed_cache_path = self
.cache_directory
.join_component(&format!("{}.tar.zst", hash));

let cache_path = if uncompressed_cache_path.exists() {
uncompressed_cache_path
} else if compressed_cache_path.exists() {
compressed_cache_path
} else {
return Err(CacheError::CacheMiss);
};

let mut cache_reader = CacheReader::open(&cache_path)?;

let restored_files = cache_reader.restore(anchor)?;

let meta = CacheMetadata::read(
&self
.cache_directory
.join_component(&format!("{}-meta.json", hash)),
)?;

Ok((
CacheResponse {
time_saved: meta.duration,
source: CacheSource::Local,
},
restored_files,
))
}

fn exists(&self, hash: &str) -> Result<CacheResponse, CacheError> {
let uncompressed_cache_path = self
.cache_directory
.join_component(&format!("{}.tar", hash));
let compressed_cache_path = self
.cache_directory
.join_component(&format!("{}.tar.zst", hash));

if !uncompressed_cache_path.exists() && !compressed_cache_path.exists() {
return Err(CacheError::CacheMiss);
}

let duration = CacheMetadata::read(
&self
.cache_directory
.join_component(&format!("{}-meta.json", hash)),
)
.map(|meta| meta.duration)
.unwrap_or(0);

Ok(CacheResponse {
time_saved: duration,
source: CacheSource::Local,
})
}

fn put(
&self,
anchor: &AbsoluteSystemPath,
hash: &str,
duration: u32,
files: Vec<AnchoredSystemPathBuf>,
) -> Result<(), CacheError> {
let cache_path = self
.cache_directory
.join_component(&format!("{}.tar.zst", hash));

let mut cache_item = CacheWriter::create(&cache_path)?;

for file in files {
cache_item.add_file(anchor, &file)?;
}

let metadata_path = self
.cache_directory
.join_component(&format!("{}-meta.json", hash));

let meta = CacheMetadata {
hash: hash.to_string(),
duration,
};

let mut metadata_options = OpenOptions::new();
metadata_options.create(true).write(true);

let metadata_file = metadata_path.open_with_options(metadata_options)?;

serde_json::to_writer(metadata_file, &meta)
.map_err(|e| CacheError::InvalidMetadata(e, Backtrace::capture()))?;

Ok(())
}
}

#[cfg(test)]
mod test {
use std::assert_matches::assert_matches;

use anyhow::Result;
use futures::future::try_join_all;
use tempfile::tempdir;

use super::*;
use crate::test_cases::{get_test_cases, TestCase};

#[tokio::test]
async fn test_fs_cache() -> Result<()> {
try_join_all(get_test_cases().into_iter().map(round_trip_test)).await?;

Ok(())
}

async fn round_trip_test(test_case: TestCase) -> Result<()> {
let repo_root = tempdir()?;
let repo_root_path = AbsoluteSystemPath::from_std_path(repo_root.path())?;
test_case.initialize(repo_root_path)?;

let cache = FSCache::new(None, &repo_root_path)?;

let expected_miss = cache
.exists(&test_case.hash)
.expect_err("Expected cache miss");
assert_matches!(expected_miss, CacheError::CacheMiss);

cache.put(
repo_root_path,
&test_case.hash,
test_case.duration,
test_case.files.iter().map(|f| f.path.clone()).collect(),
)?;

let expected_hit = cache.exists(&test_case.hash)?;
assert_eq!(
expected_hit,
CacheResponse {
time_saved: test_case.duration,
source: CacheSource::Local
}
);

let (status, files) = cache.fetch(&repo_root_path, &test_case.hash)?;
assert_eq!(
status,
CacheResponse {
time_saved: test_case.duration,
source: CacheSource::Local
}
);

assert_eq!(files.len(), test_case.files.len());
for (expected, actual) in test_case.files.iter().zip(files.iter()) {
assert_eq!(&expected.path, actual);
let actual_file = repo_root_path.resolve(actual);
assert_eq!(expected.contents, actual_file.read_to_string()?);
}

Ok(())
}
}
62 changes: 19 additions & 43 deletions crates/turborepo-cache/src/http.rs
Original file line number Diff line number Diff line change
Expand Up @@ -172,62 +172,38 @@ impl HttpCache {
#[cfg(test)]
mod test {
use anyhow::Result;
use futures::future::try_join_all;
use tempfile::tempdir;
use test_case::test_case;
use turbopath::{AbsoluteSystemPathBuf, AnchoredSystemPathBuf};
use turbopath::AbsoluteSystemPathBuf;
use turborepo_api_client::APIClient;
use vercel_api_mock::start_test_server;

use crate::{http::HttpCache, CacheSource};
use crate::{
http::HttpCache,
test_cases::{get_test_cases, TestCase},
CacheSource,
};

struct TestFile {
path: AnchoredSystemPathBuf,
contents: &'static str,
#[tokio::test]
async fn test_http_cache() -> Result<()> {
try_join_all(get_test_cases().into_iter().map(round_trip_test)).await?;

Ok(())
}

#[test_case(vec![
TestFile {
path: AnchoredSystemPathBuf::from_raw("package.json").unwrap(),
contents: "hello world"
}
], 58, "Faces Places")]
#[test_case(vec![
TestFile {
path: AnchoredSystemPathBuf::from_raw("package.json").unwrap(),
contents: "Days of Heaven"
},
TestFile {
path: AnchoredSystemPathBuf::from_raw("package-lock.json").unwrap(),
contents: "Badlands"
}
], 1284, "Cleo from 5 to 7")]
#[test_case(vec![
TestFile {
path: AnchoredSystemPathBuf::from_raw("package.json").unwrap(),
contents: "Days of Heaven"
},
TestFile {
path: AnchoredSystemPathBuf::from_raw("package-lock.json").unwrap(),
contents: "Badlands"
},
TestFile {
path: AnchoredSystemPathBuf::from_raw("src/main.js").unwrap(),
contents: "Tree of Life"
}
], 12845, "The Gleaners and I")]
#[tokio::test]
async fn test_round_trip(files: Vec<TestFile>, duration: u32, hash: &str) -> Result<()> {
async fn round_trip_test(test_case: TestCase) -> Result<()> {
let port = port_scanner::request_open_port().unwrap();
let handle = tokio::spawn(start_test_server(port));

let repo_root = tempdir()?;
let repo_root_path = AbsoluteSystemPathBuf::try_from(repo_root.path())?;
test_case.initialize(&repo_root_path)?;

for file in &files {
let file_path = repo_root_path.resolve(&file.path);
std::fs::create_dir_all(file_path.parent().unwrap())?;
std::fs::write(file_path, file.contents)?;
}
let TestCase {
hash,
files,
duration,
} = test_case;

let api_client = APIClient::new(&format!("http://localhost:{}", port), 200, "2.0.0", true)?;

Expand Down
10 changes: 10 additions & 0 deletions crates/turborepo-cache/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,14 @@
#![feature(error_generic_member_access)]
#![feature(provide_any)]
#![feature(assert_matches)]
#![deny(clippy::all)]

pub mod cache_archive;
pub mod fs;
pub mod http;
pub mod signature_authentication;
#[cfg(test)]
mod test_cases;

use std::{backtrace, backtrace::Backtrace};

Expand Down Expand Up @@ -51,6 +55,12 @@ pub enum CacheError {
WindowsUnsafeName(String, #[backtrace] Backtrace),
#[error("tar attempts to write outside of directory: {0}")]
LinkOutsideOfDirectory(String, #[backtrace] Backtrace),
#[error("Invalid cache metadata file")]
InvalidMetadata(serde_json::Error, #[backtrace] Backtrace),
#[error("Failed to write cache metadata file")]
MetadataWriteFailure(serde_json::Error, #[backtrace] Backtrace),
#[error("Cache miss")]
CacheMiss,
}

impl From<turborepo_api_client::Error> for CacheError {
Expand Down
Loading

0 comments on commit 46d0945

Please sign in to comment.