From d4299be2e0c23e7f358b44cd635f4b658d995c42 Mon Sep 17 00:00:00 2001 From: Jeff Valore Date: Thu, 5 Apr 2018 05:49:00 -0400 Subject: [PATCH] fix(linking): Dont integrity check build artifacts (#5470) * fix(linking): Dont integrity check build artifacts If an install script modified a file that existed in the package, then yarn would view the file as stale on every command, mark the package as "fresh", and re-run the install scripts. With this change, known build artifacts from the integrity file are checked, and if this package file is a build atrifact, it's size and timestamp are not checked. #932 * WIP log message for debugging on CI * WIP adding logging for debugging CI * WIP adding logging for debugging CI * remove debugging console statements. * fix file timestamps for node 8 copyFile. fixes issue on linux. * change file mode to what works on windows * add file timestamp comparison and skip if correction not needed * fix lint errors * add error handling to fixTimes for some edge cases * pass undefined instead of 0 as a file id * refactor fs code, move OS specific edge cases to fs-normalized.js * empty commit to rerun CI * fix logic error when closing file, and fix Flow error --- __tests__/commands/add.js | 34 ++++ .../a/foo.txt | 1 + .../a/install.js | 2 +- __tests__/util/{fs.js => fs-normalized.js} | 2 +- src/reporters/lang/en.js | 1 + src/util/fs-normalized.js | 170 ++++++++++++++++++ src/util/fs.js | 101 ++--------- 7 files changed, 226 insertions(+), 85 deletions(-) create mode 100644 __tests__/fixtures/add/retain-build-artifacts-after-add/a/foo.txt rename __tests__/util/{fs.js => fs-normalized.js} (96%) create mode 100644 src/util/fs-normalized.js diff --git a/__tests__/commands/add.js b/__tests__/commands/add.js index 55e625b4b3..fded53b61c 100644 --- a/__tests__/commands/add.js +++ b/__tests__/commands/add.js @@ -1063,6 +1063,40 @@ test.concurrent('should retain build artifacts after add', async () => { ); }); +// If an install script changed a file that was installed with the package, +// then all subsequent yarn commands (like add) would see the file as changed, marking +// the package as "fresh" and re-running install scripts. +// The fix was to not mark a package as "fresh" if a changed file was also a build artifact. +// This test writes the current timestamp to a file tht also existed in the package. +// The timestamp generated by package A shouldn't change when adding package B. +// See: https://github.com/yarnpkg/yarn/issues/932#issuecomment-370245462 +test('should not run scripts if build artifact changed', async () => { + await buildRun( + reporters.BufferReporter, + fixturesLoc, + async (args, flags, config, reporter, lockfile): Promise => { + const addA = new Add(args, flags, config, reporter, lockfile); + await addA.init(); + + const artifactFile = path.join(config.cwd, 'node_modules', 'a', 'foo.txt'); + + const beforeContents = await fs.readFileAny([artifactFile]); + + // re-read lockfile after the first `add` or else it will still be empty + lockfile = await Lockfile.fromDirectory(config.cwd); + + const addB = new Add(['file:b'], flags, config, reporter, lockfile); + await addB.init(); + + const afterContents = await fs.readFileAny([artifactFile]); + expect(afterContents).toEqual(beforeContents); + }, + ['file:a'], + {}, + 'retain-build-artifacts-after-add', + ); +}); + test.concurrent('installing with --pure-lockfile and then adding should keep build artifacts', async () => { const fixture = 'integrity-pure-lockfile'; diff --git a/__tests__/fixtures/add/retain-build-artifacts-after-add/a/foo.txt b/__tests__/fixtures/add/retain-build-artifacts-after-add/a/foo.txt new file mode 100644 index 0000000000..62d8fe9f6d --- /dev/null +++ b/__tests__/fixtures/add/retain-build-artifacts-after-add/a/foo.txt @@ -0,0 +1 @@ +X diff --git a/__tests__/fixtures/add/retain-build-artifacts-after-add/a/install.js b/__tests__/fixtures/add/retain-build-artifacts-after-add/a/install.js index 899265a0eb..9bc53b7341 100644 --- a/__tests__/fixtures/add/retain-build-artifacts-after-add/a/install.js +++ b/__tests__/fixtures/add/retain-build-artifacts-after-add/a/install.js @@ -1 +1 @@ -require('fs').writeFileSync('foo.txt', 'foobar'); +require('fs').writeFileSync('foo.txt', new Date().getTime()); diff --git a/__tests__/util/fs.js b/__tests__/util/fs-normalized.js similarity index 96% rename from __tests__/util/fs.js rename to __tests__/util/fs-normalized.js index 1017769cf2..e840991504 100644 --- a/__tests__/util/fs.js +++ b/__tests__/util/fs-normalized.js @@ -1,6 +1,6 @@ /* @flow */ -import {fileDatesEqual} from '../../src/util/fs.js'; +import {fileDatesEqual} from '../../src/util/fs-normalized.js'; describe('fileDatesEqual', () => { describe('!win32', () => { diff --git a/src/reporters/lang/en.js b/src/reporters/lang/en.js index e59d401b35..3ff9121cff 100644 --- a/src/reporters/lang/en.js +++ b/src/reporters/lang/en.js @@ -52,6 +52,7 @@ const messages = { verboseFileRemoveExtraneous: 'Removing extraneous file $0.', verboseFilePhantomExtraneous: "File $0 would be marked as extraneous but has been removed as it's listed as a phantom file.", + verboseFileSkipArtifact: 'Skipping copying of $0 as the file is marked as a built artifact and subject to change.', verboseFileFolder: 'Creating directory $0.', verboseRequestStart: 'Performing $0 request to $1.', diff --git a/src/util/fs-normalized.js b/src/util/fs-normalized.js new file mode 100644 index 0000000000..b9cf7e5d3c --- /dev/null +++ b/src/util/fs-normalized.js @@ -0,0 +1,170 @@ +/* @flow */ + +// This module serves as a wrapper for file operations that are inconsistant across node and OS versions. + +import fs from 'fs'; +import {promisify} from './promise.js'; + +export type CopyFileAction = { + src: string, + dest: string, + atime: Date, + mtime: Date, + mode: number, +}; + +let disableTimestampCorrection: ?boolean = undefined; // OS dependent. will be detected on first file copy. + +const readFileBuffer = promisify(fs.readFile); +const close: (fd: number) => Promise = promisify(fs.close); +const lstat: (path: string) => Promise = promisify(fs.lstat); +const open: (path: string, flags: string | number, mode: number) => Promise = promisify(fs.open); +const futimes: (fd: number, atime: number, mtime: number) => Promise = promisify(fs.futimes); + +const write: ( + fd: number, + buffer: Buffer, + offset: ?number, + length: ?number, + position: ?number, +) => Promise = promisify(fs.write); + +export const unlink: (path: string) => Promise = promisify(require('rimraf')); + +/** + * Unlinks the destination to force a recreation. This is needed on case-insensitive file systems + * to force the correct naming when the filename has changed only in character-casing. (Jest -> jest). + */ +export const copyFile = async function(data: CopyFileAction, cleanup: () => mixed): Promise { + try { + await unlink(data.dest); + await copyFilePoly(data.src, data.dest, 0, data); + } finally { + if (cleanup) { + cleanup(); + } + } +}; + +// Node 8.5.0 introduced `fs.copyFile` which is much faster, so use that when available. +// Otherwise we fall back to reading and writing files as buffers. +const copyFilePoly: (src: string, dest: string, flags: number, data: CopyFileAction) => Promise = ( + src, + dest, + flags, + data, +) => { + if (fs.copyFile) { + return new Promise((resolve, reject) => + fs.copyFile(src, dest, flags, err => { + if (err) { + reject(err); + } else { + fixTimes(undefined, dest, data).then(() => resolve()).catch(ex => reject(ex)); + } + }), + ); + } else { + return copyWithBuffer(src, dest, flags, data); + } +}; + +const copyWithBuffer: (src: string, dest: string, flags: number, data: CopyFileAction) => Promise = async ( + src, + dest, + flags, + data, +) => { + // Use open -> write -> futimes -> close sequence to avoid opening the file twice: + // one with writeFile and one with utimes + const fd = await open(dest, 'w', data.mode); + try { + const buffer = await readFileBuffer(src); + await write(fd, buffer, 0, buffer.length); + await fixTimes(fd, dest, data); + } finally { + await close(fd); + } +}; + +// We want to preserve file timestamps when copying a file, since yarn uses them to decide if a file has +// changed compared to the cache. +// There are some OS specific cases here: +// * On linux, fs.copyFile does not preserve timestamps, but does on OSX and Win. +// * On windows, you must open a file with write permissions to call `fs.futimes`. +// * On OSX you can open with read permissions and still call `fs.futimes`. +async function fixTimes(fd: ?number, dest: string, data: CopyFileAction): Promise { + const doOpen = fd === undefined; + let openfd: number = fd ? fd : -1; + + if (disableTimestampCorrection === undefined) { + // if timestamps match already, no correction is needed. + // the need to correct timestamps varies based on OS and node versions. + const destStat = await lstat(dest); + disableTimestampCorrection = fileDatesEqual(destStat.mtime, data.mtime); + } + + if (disableTimestampCorrection) { + return; + } + + if (doOpen) { + try { + openfd = await open(dest, 'a', data.mode); + } catch (er) { + // file is likely read-only + try { + openfd = await open(dest, 'r', data.mode); + } catch (err) { + // We can't even open this file for reading. + return; + } + } + } + + try { + if (openfd) { + await futimes(openfd, data.atime, data.mtime); + } + } catch (er) { + // If `futimes` throws an exception, we probably have a case of a read-only file on Windows. + // In this case we can just return. The incorrect timestamp will just cause that file to be recopied + // on subsequent installs, which will effect yarn performance but not break anything. + } finally { + if (doOpen && openfd) { + await close(openfd); + } + } +} + +// Compare file timestamps. +// Some versions of Node on windows zero the milliseconds when utime is used. +export const fileDatesEqual = (a: Date, b: Date) => { + const aTime = a.getTime(); + const bTime = b.getTime(); + + if (process.platform !== 'win32') { + return aTime === bTime; + } + + // See https://github.com/nodejs/node/pull/12607 + // Submillisecond times from stat and utimes are truncated on Windows, + // causing a file with mtime 8.0079998 and 8.0081144 to become 8.007 and 8.008 + // and making it impossible to update these files to their correct timestamps. + if (Math.abs(aTime - bTime) <= 1) { + return true; + } + + const aTimeSec = Math.floor(aTime / 1000); + const bTimeSec = Math.floor(bTime / 1000); + + // See https://github.com/nodejs/node/issues/2069 + // Some versions of Node on windows zero the milliseconds when utime is used + // So if any of the time has a milliseconds part of zero we suspect that the + // bug is present and compare only seconds. + if (aTime - aTimeSec * 1000 === 0 || bTime - bTimeSec * 1000 === 0) { + return aTimeSec === bTimeSec; + } + + return aTime === bTime; +}; diff --git a/src/util/fs.js b/src/util/fs.js index a1fb8b117b..e836e244da 100644 --- a/src/util/fs.js +++ b/src/util/fs.js @@ -2,6 +2,7 @@ import type {ReadStream} from 'fs'; import type Reporter from '../reporters/base-reporter.js'; +import type {CopyFileAction} from './fs-normalized.js'; import fs from 'fs'; import globModule from 'glob'; @@ -12,6 +13,7 @@ import BlockingQueue from './blocking-queue.js'; import * as promise from './promise.js'; import {promisify} from './promise.js'; import map from './map.js'; +import {copyFile, fileDatesEqual, unlink} from './fs-normalized.js'; export const constants = typeof fs.constants !== 'undefined' @@ -32,45 +34,19 @@ export const readdir: (path: string, opts: void) => Promise> = pro export const rename: (oldPath: string, newPath: string) => Promise = promisify(fs.rename); export const access: (path: string, mode?: number) => Promise = promisify(fs.access); export const stat: (path: string) => Promise = promisify(fs.stat); -export const unlink: (path: string) => Promise = promisify(require('rimraf')); export const mkdirp: (path: string) => Promise = promisify(require('mkdirp')); export const exists: (path: string) => Promise = promisify(fs.exists, true); export const lstat: (path: string) => Promise = promisify(fs.lstat); export const chmod: (path: string, mode: number | string) => Promise = promisify(fs.chmod); export const link: (src: string, dst: string) => Promise = promisify(fs.link); export const glob: (path: string, options?: Object) => Promise> = promisify(globModule); +export {unlink}; // fs.copyFile uses the native file copying instructions on the system, performing much better // than any JS-based solution and consumes fewer resources. Repeated testing to fine tune the // concurrency level revealed 128 as the sweet spot on a quad-core, 16 CPU Intel system with SSD. const CONCURRENT_QUEUE_ITEMS = fs.copyFile ? 128 : 4; -const open: (path: string, flags: string | number, mode: number) => Promise = promisify(fs.open); -const close: (fd: number) => Promise = promisify(fs.close); -const write: ( - fd: number, - buffer: Buffer, - offset: ?number, - length: ?number, - position: ?number, -) => Promise = promisify(fs.write); -const futimes: (fd: number, atime: number, mtime: number) => Promise = promisify(fs.futimes); -const copyFile: (src: string, dest: string, flags: number, data: CopyFileAction) => Promise = fs.copyFile - ? // Don't use `promisify` to avoid passing the last, argument `data`, to the native method - (src, dest, flags, data) => - new Promise((resolve, reject) => fs.copyFile(src, dest, flags, err => (err ? reject(err) : resolve(err)))) - : async (src, dest, flags, data) => { - // Use open -> write -> futimes -> close sequence to avoid opening the file twice: - // one with writeFile and one with utimes - const fd = await open(dest, 'w', data.mode); - try { - const buffer = await readFileBuffer(src); - await write(fd, buffer, 0, buffer.length); - await futimes(fd, data.atime, data.mtime); - } finally { - await close(fd); - } - }; const fsSymlink: (target: string, path: string, type?: 'dir' | 'file' | 'junction') => Promise = promisify( fs.symlink, ); @@ -89,14 +65,6 @@ export type CopyQueueItem = { type CopyQueue = Array; -type CopyFileAction = { - src: string, - dest: string, - atime: number, - mtime: number, - mode: number, -}; - type LinkFileAction = { src: string, dest: string, @@ -132,36 +100,6 @@ type FolderQueryResult = { folder: ?string, }; -export const fileDatesEqual = (a: Date, b: Date) => { - const aTime = a.getTime(); - const bTime = b.getTime(); - - if (process.platform !== 'win32') { - return aTime === bTime; - } - - // See https://github.com/nodejs/node/pull/12607 - // Submillisecond times from stat and utimes are truncated on Windows, - // causing a file with mtime 8.0079998 and 8.0081144 to become 8.007 and 8.008 - // and making it impossible to update these files to their correct timestamps. - if (Math.abs(aTime - bTime) <= 1) { - return true; - } - - const aTimeSec = Math.floor(aTime / 1000); - const bTimeSec = Math.floor(bTime / 1000); - - // See https://github.com/nodejs/node/issues/2069 - // Some versions of Node on windows zero the milliseconds when utime is used - // So if any of the time has a milliseconds part of zero we suspect that the - // bug is present and compare only seconds. - if (aTime - aTimeSec * 1000 === 0 || bTime - bTimeSec * 1000 === 0) { - return aTimeSec === bTimeSec; - } - - return aTime === bTime; -}; - async function buildActionsForCopy( queue: CopyQueue, events: CopyOptions, @@ -276,6 +214,13 @@ async function buildActionsForCopy( } catch (err) {} } */ + if (bothFiles && artifactFiles.has(dest)) { + // this file gets changed during build, likely by a custom install script. Don't bother checking it. + onDone(); + reporter.verbose(reporter.lang('verboseFileSkipArtifact', src)); + return; + } + if (bothFiles && srcStat.size === destStat.size && fileDatesEqual(srcStat.mtime, destStat.mtime)) { // we can safely assume this is the same file onDone(); @@ -467,6 +412,13 @@ async function buildActionsForHardlink( } } + if (bothFiles && artifactFiles.has(dest)) { + // this file gets changed during build, likely by a custom install script. Don't bother checking it. + onDone(); + reporter.verbose(reporter.lang('verboseFileSkipArtifact', src)); + return; + } + // correct hardlink if (bothFiles && srcStat.ino !== null && srcStat.ino === destStat.ino) { onDone(); @@ -558,23 +510,6 @@ export function copy(src: string, dest: string, reporter: Reporter): Promise jest). - * It also calls a cleanup function once it is done. - * - * `data` contains target file attributes like mode, atime and mtime. Built-in copyFile copies these - * automatically but our polyfill needs the do this manually, thus needs the info. - */ -const safeCopyFile = async function(data: CopyFileAction, cleanup: () => mixed): Promise { - try { - await unlink(data.dest); - await copyFile(data.src, data.dest, 0, data); - } finally { - cleanup(); - } -}; - export async function copyBulk( queue: CopyQueue, reporter: Reporter, @@ -610,7 +545,7 @@ export async function copyBulk( } reporter.verbose(reporter.lang('verboseFileCopy', data.src, data.dest)); - const copier = safeCopyFile(data, () => currentlyWriting.delete(data.dest)); + const copier = copyFile(data, () => currentlyWriting.delete(data.dest)); currentlyWriting.set(data.dest, copier); events.onProgress(data.dest); return copier;