Skip to content

Commit

Permalink
Merge pull request #143 from tweag/fd/bazel
Browse files Browse the repository at this point in the history
Package sparkle apps with clodl
  • Loading branch information
facundominguez committed Jul 4, 2018
2 parents 6086b54 + f645032 commit 670a9e5
Show file tree
Hide file tree
Showing 12 changed files with 377 additions and 17 deletions.
28 changes: 25 additions & 3 deletions .circleci/config.yml
Expand Up @@ -22,7 +22,7 @@ jobs:
docker tag tweag/sparkle tweag/sparkle:$CIRCLE_TAG
docker push tweag/sparkle:$CIRCLE_TAG
fi
build:
build-stack:
docker:
- image: nixos/nix
working_directory: ~/sparkle
Expand All @@ -41,7 +41,7 @@ jobs:
keys:
- sparkle-stack-dependencies-{{ arch }}-{{ checksum "/tmp/stack-deps" }}
- run:
name: Build dependencies
name: Build dependencies with stack
command: |
# -j2: Limit parallelism to avoid memory exhaustion
stack -j2 --no-terminal --nix build --only-snapshot --prefetch --no-haddock --test --bench
Expand All @@ -59,12 +59,34 @@ jobs:
# XXX --packages flag should not be necessary. Workaround
# regression in Spark 2.2 vs 2.1 in Nixpkgs.
stack --no-terminal --nix exec -- spark-submit --master 'local[1]' --packages com.amazonaws:aws-java-sdk:1.7.4,org.apache.hadoop:hadoop-aws:2.7.2,com.google.guava:guava:12.0 sparkle-example-hello.jar
build-bazel:
docker:
- image: nixos/nix
working_directory: ~/sparkle
environment:
- NIXRUN: nix-shell -I nixpkgs=./nixpkgs.nix -p gcc bazel --run
steps:
- checkout
- run:
name: Install system dependencies
command: |
apk update --no-progress && apk --no-progress add ca-certificates bash binutils zip
$NIXRUN "echo nix dependencies installed"
- run:
name: Build project
command: $NIXRUN "bazel build //..."
- run:
name: Smoke test using sparkle-example-hello
command: |
$NIXRUN "bazel build //apps/hello:sparkle-example-hello_deploy.jar"
$NIXRUN "bazel run spark-submit -- --master 'local[1]' --packages com.amazonaws:aws-java-sdk:1.11.253,org.apache.hadoop:hadoop-aws:2.7.2,com.google.guava:guava:23.0 $(pwd)/bazel-bin/apps/hello/sparkle-example-hello_deploy.jar"
workflows:
version: 2
build:
jobs:
- build
- build-stack
- build-bazel
publish:
jobs:
- publish:
Expand Down
1 change: 1 addition & 0 deletions .gitignore
Expand Up @@ -17,3 +17,4 @@ cabal.sandbox.config
.stack-work
.gradle
build
bazel-*
70 changes: 70 additions & 0 deletions BUILD
@@ -0,0 +1,70 @@
package(default_visibility = ["//visibility:public"])

load(
"@io_tweag_rules_haskell//haskell:haskell.bzl",
"haskell_binary",
"haskell_library",
"haskell_toolchain",
"haskell_cc_import",
)

_sparkle_java_deps = [
"@org_apache_spark_spark_core//jar",
"@com_esotericsoftware_kryo_shaded//jar",
]

java_library(
name = "sparkle-jar",
deps = _sparkle_java_deps,
srcs = glob(["src/main/java/io/tweag/sparkle/**/*.java"]),
)

cc_library(
name = "sparkle-bootstrap-cc",
srcs = ["cbits/bootstrap.c", "cbits/io_tweag_sparkle_Sparkle.h"],
deps = ["@openjdk//:include", "@sparkle-toolchain//:include"],
copts = ["-std=c99"],
)

haskell_library(
name = "sparkle-lib",
src_strip_prefix = "src",
srcs = glob(['src/**/*.hs']),
deps = [
"@io_tweag_inline_java//jni",
"@io_tweag_inline_java//jvm",
"@io_tweag_inline_java//jvm-streaming",
"@io_tweag_inline_java//:inline-java",
"@org_apache_spark_spark_catalyst//jar",
"@org_apache_spark_spark_sql//jar",
"@org_scala_lang_scala_library//jar",
"@org_scala_lang_scala_reflect//jar",
":sparkle-jar",
":sparkle-bootstrap-cc",
] + _sparkle_java_deps,
prebuilt_dependencies = [
"base",
"binary",
"bytestring",
"choice",
"constraints",
"distributed-closure",
"singletons",
"streaming",
"text",
"vector",
],
)

haskell_toolchain(
name = "sparkle-toolchain",
version = "8.2.2",
tools = "@sparkle-toolchain//:bin",
extra_binaries = ["@openjdk//:bin"],
)

# Provided for convenience to run sparkle applications.
sh_binary(
name = "spark-submit",
srcs = ["@spark//:spark-submit"],
)
12 changes: 12 additions & 0 deletions README.md
Expand Up @@ -22,6 +22,18 @@ $ stack exec -- sparkle package sparkle-example-hello
$ stack exec -- spark-submit --master 'local[1]' --packages com.amazonaws:aws-java-sdk:1.11.253,org.apache.hadoop:hadoop-aws:2.7.2,com.google.guava:guava:23.0 sparkle-example-hello.jar
```

### Using bazel

There is experimental support for [bazel]. This mechanism doesn't require
executing `sparkle package`.

```
$ bazel build //apps/hello:sparkle-example-hello_deploy.jar
$ bazel run spark-submit -- --packages com.amazonaws:aws-java-sdk:1.11.253,org.apache.hadoop:hadoop-aws:2.7.2,com.google.guava:guava:23.0 $(pwd)/bazel-bin/apps/hello/sparkle-example-hello_deploy.jar
```

[bazel]: https://bazel.build

## How to use

To run a Spark application the process is as follows:
Expand Down
186 changes: 186 additions & 0 deletions WORKSPACE
@@ -0,0 +1,186 @@
workspace(name = "io_tweag_sparkle")

http_archive(
name = "io_tweag_rules_haskell",
strip_prefix = "rules_haskell-730d42c225f008a13e48bf5e9c13010174324b8c",
urls = ["https://github.com/tweag/rules_haskell/archive/730d42c225f008a13e48bf5e9c13010174324b8c.tar.gz"]
)

http_archive(
name = "io_tweag_clodl",
strip_prefix = "clodl-6cc7349a792ea39f2059ab3f0c4ed53839dbffba",
urls = ["https://github.com/tweag/clodl/archive/6cc7349a792ea39f2059ab3f0c4ed53839dbffba.tar.gz"]
)

http_archive(
name = "io_tweag_rules_nixpkgs",
strip_prefix = "rules_nixpkgs-d9df5c834f07c72be1b9e320eb742796557612f8",
urls = ["https://github.com/tweag/rules_nixpkgs/archive/d9df5c834f07c72be1b9e320eb742796557612f8.tar.gz"],
)

# Required due to rules_haskell use of skylib.
http_archive(
name = "bazel_skylib",
strip_prefix = "bazel-skylib-0.2.0",
urls = ["https://github.com/bazelbuild/bazel-skylib/archive/0.2.0.tar.gz"]
)

http_archive(
name = "io_tweag_inline_java",
strip_prefix = "inline-java-3a68626c27ed9c3315dc44ff500a1bf3568c982d",
urls = ["https://github.com/tweag/inline-java/archive/3a68626c27ed9c3315dc44ff500a1bf3568c982d.tar.gz"],
)

load("@io_tweag_rules_nixpkgs//nixpkgs:nixpkgs.bzl",
"nixpkgs_git_repository",
"nixpkgs_package",
)

nixpkgs_git_repository(
name = "nixpkgs",
# Keep consistent with ./nixpkgs.nix.
revision = "1fa2503f9dba814eb23726a25642d2180ce791c3",
)

# Maven dependencies from 'gradle dependencies' + grep on compile
# errors to see what needs to be filled in.
maven_jar(
name = "org_apache_spark_spark_core",
artifact = "org.apache.spark:spark-core_2.11:2.2.0",
)

maven_jar(
name = "org_apache_spark_spark_sql",
artifact = "org.apache.spark:spark-sql_2.11:2.2.0",
)

maven_jar(
name = "org_apache_spark_spark_catalyst",
artifact = "org.apache.spark:spark-catalyst_2.11:2.2.0",
)

maven_jar(
name = "com_esotericsoftware_kryo_shaded",
artifact = "com.esotericsoftware:kryo:3.0.3",
)

maven_jar(
name = "org_scala_lang_scala_library",
artifact = "org.scala-lang:scala-library:2.11.8",
)

maven_jar(
name = "org_scala_lang_scala_reflect",
artifact = "org.scala-lang:scala-reflect:2.11.8",
)


nixpkgs_package(
name = "sparkle-toolchain",
repository = "@nixpkgs",
# This is a hack abusing the fact that CLASSPATH can point at things
# that don't exist. We pass these jars Haskell part of sparkle as
# extra dependencies and they are available just in time for that
# rule. This lets javac be called with the CLASSPATH set. It's not
# very nice for obvious reasons of hard-coding things.
nix_file_content = """
let pkgs = import <nixpkgs> {};
in pkgs.buildEnv {
name = "sparkle-toolchain";
paths = with pkgs; [
(haskell.packages.ghc822.ghcWithPackages (p: with p; [
Cabal
base
binary
bytestring
choice
constraints
containers
deepseq
directory
distributed-closure
exceptions
filemanip
filepath
ghc
hspec
inline-c
language-java
mtl
process
regex-tdfa
singletons
streaming
template-haskell
temporary
text
vector
zip-archive
]))
openjdk
];
}
""",
build_file_content = """
package(default_visibility = [ "//visibility:public" ])
filegroup(
name = "bin",
srcs = glob(["bin/*"]),
)
cc_library(
name = "include",
hdrs = glob(["lib/ghc-*/include/**/*.h"]),
strip_include_prefix = glob(["lib/ghc-*/include"], exclude_directories=0)[0],
)
""",
)

nixpkgs_package(
name = "openjdk",
repository = "@nixpkgs",
build_file_content = """
package(default_visibility = [ "//visibility:public" ])
filegroup (
name = "lib",
srcs = ["lib/openjdk/jre/lib/amd64/server/libjvm.so"],
visibility = ["//visibility:public"],
)
filegroup (
name = "bin",
srcs = ["bin/javac"],
visibility = ["//visibility:public"],
)
filegroup (
name = "jni_header",
srcs = ["include/jni.h"],
visibility = ["//visibility:public"],
)
filegroup (
name = "jni_md_header",
srcs = ["include/jni_md.h"],
visibility = ["//visibility:public"],
)
cc_library(
name = "include",
hdrs = glob(["include/*.h"]),
strip_include_prefix = "include",
)
"""
)

nixpkgs_package(
name = "spark",
repository = "@nixpkgs",
build_file_content = """
package(default_visibility = [ "//visibility:public" ])
filegroup (
name = "spark-submit",
srcs = ["bin/spark-submit"],
visibility = ["//visibility:public"],
)
"""
)

register_toolchains("//:sparkle-toolchain")
28 changes: 28 additions & 0 deletions apps/hello/BUILD.bazel
@@ -0,0 +1,28 @@
package(default_visibility = ["//visibility:public"])

load(
"@io_tweag_rules_haskell//haskell:haskell.bzl",
"haskell_binary",
)

load("@//:sparkle.bzl", "sparkle_package")

haskell_binary(
name = "hello-hs",
srcs = ["HelloSpark.hs"],
main_file = "HelloSpark.hs",
deps = [
"//:sparkle-lib",
],
prebuilt_dependencies = [
"base",
"distributed-closure",
"text",
],
compiler_flags = ["-threaded", "-dynamic", "-pie"],
)

sparkle_package(
name = "sparkle-example-hello",
src = ":hello-hs",
)
2 changes: 1 addition & 1 deletion nixpkgs.nix
@@ -1 +1 @@
import (fetchTarball "https://github.com/nixos/nixpkgs/archive/1354099daf98b7a1f79e6c41ce6bfda5c40177ae.tar.gz")
import (fetchTarball "https://github.com/nixos/nixpkgs/archive/1fa2503f9dba814eb23726a25642d2180ce791c3.tar.gz")

0 comments on commit 670a9e5

Please sign in to comment.