Skip to content

Commit

Permalink
Enable pinning with stack_snapshot
Browse files Browse the repository at this point in the history
Adds an optional attribute `stack_snapshot_json` to `stack_snapshot`
which pins the result of `stack ls dependencies json` and additional
reproducibility information. If specified, then `stack_snapshot` will
not need to invoke `stack` to resolve package versions and dependencies
or to fetch Hackage or archive dependencies.

- This makes re-fetching `stack_snapshot` repositories faster as Hackage
  and archive dependencies can be cached in Bazel's repository cache,
  and `stack` does not need to update it's database to determine package
  versions and dependencies.
- Additionally, this avoids issues due to concurrent invocations of
  `stack` which can fail due to a race condition on the Hackage security
  lock.

The lock file can be generated or updated by invoking the script
`@<workspace>-unpinned//:pin`.

If the `stack_snapshot_json` attribute is not specified, then
`stack_snapshot` will behave as before and resolve and fetch external
packages using `stack` on every Bazel fetch.
  • Loading branch information
aherrmann committed Jun 29, 2020
1 parent 6983902 commit c1e46ac
Show file tree
Hide file tree
Showing 4 changed files with 369 additions and 5 deletions.
1 change: 1 addition & 0 deletions WORKSPACE
Expand Up @@ -85,6 +85,7 @@ stack_snapshot(
],
setup_deps = {"polysemy": ["cabal-doctest"]},
snapshot = test_stack_snapshot,
stack_snapshot_json = "//:stackage_snapshot.json",
tools = [
# This is not required, as `stack_snapshot` would build alex
# automatically, however it is used as a test for user provided
Expand Down
238 changes: 233 additions & 5 deletions haskell/cabal.bzl
Expand Up @@ -1049,7 +1049,114 @@ library
# Sort the items to make sure that generated outputs are deterministic.
return {name: resolved[name] for name in sorted(resolved.keys())}

def _pin_packages_inplace(repository_ctx, resolved):
"""Pin resolved packages.
Extends the package specs inplace with `sha256` (Hackage and archive) and
Cabal file content (Hackage).
"""
errmsg = "Unexpected format in all-cabal-hashes: %s"

for spec in resolved.values():
# Determine package sha256
if spec["location"]["type"] == "hackage":
# stack does not expose sha256, see https://github.com/commercialhaskell/stack/issues/5274
# instead we fetch the sha256 from the all-cabal-hashes repository.
json_urls = [
"https://raw.githubusercontent.com/commercialhaskell/all-cabal-hashes/hackage/{name}/{version}/{name}.json".format(**spec),
]
repository_ctx.download(
json_urls,
output = "{name}-{version}.json".format(**spec),
executable = False,
)

# Cabal file downloads are not reproducible due to Cabal revisions.
# We pin the Cabal file by including it verbatim in the lock file.
cabal_urls = [
"https://raw.githubusercontent.com/commercialhaskell/all-cabal-hashes/hackage/{name}/{version}/{name}.cabal".format(**spec),
"https://hackage.haskell.org/package/{name}-{version}/{name}.cabal".format(**spec),
]
repository_ctx.download(
cabal_urls,
output = "{name}-{version}.cabal".format(**spec),
executable = False,
)
json = json_parse(repository_ctx.read("{name}-{version}.json".format(**spec)))
hashes = _parse_json_field(json, "package-hashes", "dict", errmsg)
spec["pinned"] = {
"url": _parse_json_field(json, "package-locations", "list", errmsg),
"sha256": _parse_json_field(hashes, "SHA256", "string", errmsg),
"cabal-file": repository_ctx.read("{name}-{version}.cabal".format(**spec)),
}
elif spec["location"]["type"] == "archive":
# stack does not yet expose sha-256, see https://github.com/commercialhaskell/stack/pull/5280
# instead we fetch the archive and let Bazel calculate the sha256.
spec["pinned"] = {
"sha256": repository_ctx.download(
spec["location"]["url"],
output = "{name}-{version}.tar.gz".format(**spec),
executable = False,
).sha256,
}
elif spec["location"]["type"] in ["git", "hg"]:
# Bazel cannot cache git (or hg) repositories in the repository
# cache as of now. Therefore, we fall back to fetching them using
# stack rather than Bazel.
# See https://github.com/bazelbuild/bazel/issues/5086
pass

def _download_packages(repository_ctx, snapshot, resolved):
"""Downlad all remote packages.
Downloads hackage and archive packages using Bazel, eligible to repository
cache. Downloads git and hg packages using `stack unpack`, not eligible to
repository cache.
"""
stack_unpack = {}

# Unpack hackage and archive packages.
for package in resolved.values():
if package["location"]["type"] == "hackage":
repository_ctx.download_and_extract(
package["pinned"]["url"],
output = "{name}-{version}".format(**package),
sha256 = package["pinned"]["sha256"],
stripPrefix = "{name}-{version}".format(**package),
)

# Overwrite the Cabal file with the pinned revision.
repository_ctx.file(
"{name}-{version}/{name}.cabal".format(**package),
# struct.to_json escapes special characters but json_parse does
# not undo this escaping. So, we need to do this manually.
# XXX: The JSON encoding breaks unicode characters in Cabal files.
content = package["pinned"]["cabal-file"].replace("\\n", "\n").replace("\\r", "\r").replace("\\\"", "\"").replace("\\t", "\t").replace("\\\\", "\\"),
executable = False,
)
elif package["location"]["type"] == "archive":
repository_ctx.download_and_extract(
package["pinned"]["url"],
output = "{name}-{version}".format(**package),
sha256 = package["pinned"]["sha256"],
)
elif package["location"]["type"] in ["git", "hg"]:
# Unpack remote packages.
#
# Bazel cannot cache git (or hg) repositories in the repository
# cache as of now. Therefore, we fall back to fetching them using
# stack rather than Bazel.
# See https://github.com/bazelbuild/bazel/issues/5086
#
# XXX: Implement this using Bazel to avoid `stack update`.
stack_unpack[package["name"]] = package

if stack_unpack:
# Enforce dependency on stack_update
repository_ctx.read(repository_ctx.attr.stack_update)
_download_packages_unpinned(repository_ctx, snapshot, stack_unpack)

def _download_packages_unpinned(repository_ctx, snapshot, resolved):
"""Download remote packages using `stack unpack`."""
remote_packages = [
package["name"]
Expand Down Expand Up @@ -1147,21 +1254,22 @@ def _parse_packages_list(packages, vendored_packages):
unversioned = unversioned_packages,
)

def _stack_snapshot_impl(repository_ctx):
def _stack_snapshot_unpinned_impl(repository_ctx):
snapshot = _parse_stack_snapshot(
repository_ctx,
repository_ctx.attr.snapshot,
repository_ctx.attr.local_snapshot,
)

# Enforce dependency on stack_update
repository_ctx.read(repository_ctx.attr.stack_update)

vendored_packages = _invert(repository_ctx.attr.vendored_packages)
packages = _parse_packages_list(
repository_ctx.attr.packages,
vendored_packages,
)

# Enforce dependency on stack_update
repository_ctx.read(repository_ctx.attr.stack_update)

resolved = _resolve_packages(
repository_ctx,
snapshot,
Expand All @@ -1170,7 +1278,84 @@ def _stack_snapshot_impl(repository_ctx):
packages.unversioned,
vendored_packages,
)
_download_packages(repository_ctx, snapshot, resolved)

_pin_packages_inplace(repository_ctx, resolved)

repository_ctx.file(
"snapshot.json",
executable = False,
# Write one package per line sorted by name to be reproducible and diff friendly.
content = "{%s}" % ",\n".join([
'"%s": ' % name + struct(**spec).to_json()
for (name, spec) in resolved.items()
]),
)

repository_name = repository_ctx.name[:-len("-unpinned")]

if repository_ctx.attr.stack_snapshot_json:
stack_snapshot_location = paths.join(
repository_ctx.attr.stack_snapshot_json.package,
repository_ctx.attr.stack_snapshot_json.name,
)
else:
stack_snapshot_location = "%s_snapshot.json" % repository_name

repository_ctx.template(
"pin.sh",
repository_ctx.path(Label("@rules_haskell//haskell:private/stack_snapshot_pin.sh.tpl")),
executable = True,
substitutions = {
"{repository_name}": repository_name,
"{stack_snapshot_source}": "snapshot.json",
"{stack_snapshot_location}": stack_snapshot_location,
"{predefined_stack_snapshot}": str(repository_ctx.attr.stack_snapshot_json != None),
},
)

repository_ctx.file(
"BUILD.bazel",
executable = False,
content = """\
sh_binary(
name = "pin",
data = ["snapshot.json"],
deps = ["@bazel_tools//tools/bash/runfiles"],
srcs = ["pin.sh"],
)
""",
)

def _stack_snapshot_impl(repository_ctx):
snapshot = _parse_stack_snapshot(
repository_ctx,
repository_ctx.attr.snapshot,
repository_ctx.attr.local_snapshot,
)

vendored_packages = _invert(repository_ctx.attr.vendored_packages)
packages = _parse_packages_list(
repository_ctx.attr.packages,
vendored_packages,
)

# Resolve and fetch packages
if repository_ctx.attr.stack_snapshot_json == None:
# Enforce dependency on stack_update
repository_ctx.read(repository_ctx.attr.stack_update)
resolved = _resolve_packages(
repository_ctx,
snapshot,
packages.core,
packages.versioned,
packages.unversioned,
vendored_packages,
)
_download_packages_unpinned(repository_ctx, snapshot, resolved)
else:
# XXX: Factor out and validate lock file parsing.
resolved = json_parse(repository_ctx.read(repository_ctx.attr.stack_snapshot_json))
_download_packages(repository_ctx, snapshot, resolved)

user_components = {
name: _parse_components(name, components)
Expand Down Expand Up @@ -1341,9 +1526,24 @@ haskell_cabal_binary(
build_file_content = "\n".join(build_file_builder)
repository_ctx.file("BUILD.bazel", build_file_content, executable = False)

_stack_snapshot_unpinned = repository_rule(
_stack_snapshot_unpinned_impl,
attrs = {
"stack_snapshot_json": attr.label(allow_single_file = True),
"snapshot": attr.string(),
"local_snapshot": attr.label(allow_single_file = True),
"packages": attr.string_list(),
"vendored_packages": attr.label_keyed_string_dict(),
"flags": attr.string_list_dict(),
"stack": attr.label(),
"stack_update": attr.label(),
},
)

_stack_snapshot = repository_rule(
_stack_snapshot_impl,
attrs = {
"stack_snapshot_json": attr.label(allow_single_file = True),
"snapshot": attr.string(),
"local_snapshot": attr.label(allow_single_file = True),
"packages": attr.string_list(),
Expand Down Expand Up @@ -1490,6 +1690,7 @@ def stack_snapshot(
vendored_packages = {},
snapshot = "",
local_snapshot = None,
stack_snapshot_json = None,
packages = [],
flags = {},
haddock = True,
Expand All @@ -1516,6 +1717,17 @@ def stack_snapshot(
`<package>-<version>` in the `packages` attribute. Note that you cannot
override the version of any [packages built into GHC][ghc-builtins].
This rule invokes the `stack` tool for version and dependency resolution
based on the specified snapshot. You can generate a `stack_snapshot.json`
file to avoid invoking `stack` on every fetch and instead pin the outcome
in a file that can be checked into revision control. Specify the
`stack_snapshot_json` attribute and execute the following command to
generate this file:
```
bazel run @stackage-unpinned//:pin
```
By default `stack_snapshot` defines a library target for each package. If a
package does not contain a library component or contains executable
components, then you need to declare so yourself using the `components`
Expand Down Expand Up @@ -1599,6 +1811,8 @@ def stack_snapshot(
snapshot: The name of a Stackage snapshot. Incompatible with local_snapshot.
local_snapshot: A custom Stack snapshot file, as per the Stack documentation.
Incompatible with snapshot.
stack_snapshot_json: A label to a `stack_snapshot.json` file, e.g. `//:stack_snapshot.json`.
Specify this to use pinned artifacts for generating build targets.
packages: A set of package identifiers. For packages in the snapshot,
version numbers can be omitted.
vendored_packages: Add or override a package to the snapshot with a custom
Expand Down Expand Up @@ -1648,10 +1862,23 @@ def stack_snapshot(
name = "rules_haskell_stack_update",
stack = stack,
)
_stack_snapshot_unpinned(
name = name + "-unpinned",
stack = stack,
# Dependency for ordered execution, stack update before stack unpack.
stack_update = "@rules_haskell_stack_update//:stack_update",
vendored_packages = _invert(vendored_packages),
snapshot = snapshot,
local_snapshot = local_snapshot,
stack_snapshot_json = stack_snapshot_json,
packages = packages,
flags = flags,
)
_stack_snapshot(
name = name,
stack = stack,
# Dependency for ordered execution, stack update before stack unpack.
# TODO Make this optional when fetching of git and hg dependencies is implemented.
stack_update = "@rules_haskell_stack_update//:stack_update",
# TODO Remove _from_string_keyed_label_list_dict once following issue
# is resolved: https://github.com/bazelbuild/bazel/issues/7989.
Expand All @@ -1661,6 +1888,7 @@ def stack_snapshot(
vendored_packages = _invert(vendored_packages),
snapshot = snapshot,
local_snapshot = local_snapshot,
stack_snapshot_json = stack_snapshot_json,
packages = packages,
flags = flags,
haddock = haddock,
Expand Down
57 changes: 57 additions & 0 deletions haskell/private/stack_snapshot_pin.sh.tpl
@@ -0,0 +1,57 @@
#!/usr/bin/env bash
set -euo pipefail

if [ "$BUILD_WORKSPACE_DIRECTORY" = "" ]; then
cat <<EOF >&2
It looks like you are trying to invoke the pin script incorrectly.
We only support calling the pin script with
bazel run @STACKAGE-unpinned//:pin
EOF
exit 1
fi
cd "$BUILD_WORKSPACE_DIRECTORY"

# Copy-pasted from the Bazel Bash runfiles library v2.
set -uo pipefail; f=bazel_tools/tools/bash/runfiles/runfiles.bash
source "${RUNFILES_DIR:-/dev/null}/$f" 2>/dev/null || \
source "$(grep -sm1 "^$f " "${RUNFILES_MANIFEST_FILE:-/dev/null}" | cut -f2- -d' ')" 2>/dev/null || \
source "$0.runfiles/$f" 2>/dev/null || \
source "$(grep -sm1 "^$f " "$0.runfiles_manifest" | cut -f2- -d' ')" 2>/dev/null || \
source "$(grep -sm1 "^$f " "$0.exe.runfiles_manifest" | cut -f2- -d' ')" 2>/dev/null || \
{ echo>&2 "ERROR: cannot find $f"; exit 1; }; f=; set -e
# --- end runfiles.bash initialization v2 ---

set -euo pipefail

cp "$(rlocation "{repository_name}-unpinned/{stack_snapshot_source}")" "{stack_snapshot_location}"

if [ "{predefined_stack_snapshot}" = "True" ]; then
cat <<EOF >&2
Successfully pinned resolved artifacts for @{repository_name}, {stack_snapshot_location} is now up-to-date.
EOF
else
cat <<EOF >&2
Successfully pinned resolved artifacts for @{repository_name} in {stack_snapshot_location}.
This file should be checked in your version control system.
Next, please update your WORKSPACE file by adding the stack_snapshot_json attribute.
For example:
=============================================================
stack_snapshot(
packages = ...,
snapshot = ...,
stack_snapshot_json = "//:{repository_name}_snapshot.json",
)
=============================================================
To update {repository_name}_snapshot.json, run this command to re-pin the unpinned repository:
bazel run @{repository_name}-unpinned//:pin
EOF
fi

0 comments on commit c1e46ac

Please sign in to comment.