Merge 4086cee into 063e9cf

conbench · Jun 23, 2021 · 8b49599 · 8b49599
2 parents 063e9cf + 4086cee
commit 8b49599
Show file tree

Hide file tree

Showing 5 changed files with 416 additions and 48 deletions.
diff --git a/README.md b/README.md
@@ -52,6 +52,10 @@ repository, and the results are hosted on the
 
 * [Contributing](https://github.com/ursacomputing/connbench#contributing)
 * [Authoring benchmarks](https://github.com/ursacomputing/conbench#authoring-benchmarks)
+  * [Simple benchmarks](https://github.com/ursacomputing/conbench#example-simple-benchmarks)
+  * [External benchmarks](https://github.com/ursacomputing/conbench#example-external-benchmarks)
+  * [Case benchmarks](https://github.com/ursacomputing/conbench#example-case-benchmarks)
+  * [R benchmarks](https://github.com/ursacomputing/conbench#example-r-benchmarks)
 
 
 ## Contributing
@@ -311,7 +315,7 @@ class ExternalBenchmark(conbench.runner.Benchmark):
 
     def run(self, **kwargs):
         # external results from an API call, command line execution, etc
-        data = {
+        result = {
             "data": [100, 200, 300],
             "unit": "i/s",
             "times": [0.100, 0.200, 0.300],
@@ -320,7 +324,7 @@ class ExternalBenchmark(conbench.runner.Benchmark):
 
         context = {"benchmark_language": "C++"}
         return self.conbench.external(
-            data, self.name, context=context, options=kwargs, output=data
+            result, self.name, context=context, options=kwargs, output=result
         )
 ```
 
@@ -342,9 +346,13 @@ Options:
 ```
 
 
+Note that the use of `--iterations=3` results in 3 runs of the benchmark, and
+the `mean`, `stdev`, etc calculated.
+
+
 ```
 $ cd ~/workspace/conbench/conbench/tests/benchmark/
-$ conbench external
+$ conbench external --iterations=3
 
 Benchmark result:
 {
@@ -451,6 +459,7 @@ class CasesBenchmark(conbench.runner.Benchmark):
 ```
 $ cd ~/workspace/conbench/conbench/tests/benchmark/
 $ conbench matrix --help
+
 Usage: conbench matrix [OPTIONS]
 
   Run matrix benchmark(s).
@@ -482,6 +491,10 @@ Options:
 ```
 
 
+Note that the use of `--all=true` results in 3 benchmark results, one for each
+case (`10 x 10`, `2, x 10`, and `10, x 2`).
+
+
 ```
 $ cd ~/workspace/conbench/conbench/tests/benchmark/
 $ conbench matrix --all=true
@@ -645,3 +658,139 @@ Benchmark result:
     }
 }
 ```
+
+### Example R benchmarks
+
+A few examples illustrating how to integrate R benchmarks with Conbench.
+
+The first one just times `1 + 1` in R, and the second one executes an R
+benchmark from a library of R benchmarks (in this case
+[arrowbench](https://github.com/ursacomputing/arrowbench)).
+
+If you find yourself wrapping a lot of R benchmarks in Python to integrate them
+with Conbench (to get uniform JSON benchmark results which you can persist and
+publish on a Conbench server), you'll probably want to extract much of the
+boilerplate out into a base class.
+
+
+```python
+import conbench.runner
+
+
+@conbench.runner.register_benchmark
+class ExternalBenchmarkR(conbench.runner.Benchmark):
+    """Example benchmark that records an R benchmark result."""
+
+    external = True
+    name = "external-r"
+
+    def run(self, **kwargs):
+        result, output = self._run_r_command()
+        return self.conbench.external(
+            {"data": [result], "unit": "s"},
+            self.name,
+            context=self.conbench.r_info,
+            options=kwargs,
+            output=output,
+        )
+
+    def _run_r_command(self):
+        output = self.conbench.execute_r_command(self._get_r_command())
+        result = float(output.split("\n")[-1].split("[1] ")[1])
+        return result, output
+
+    def _get_r_command(self):
+        return (
+            f"addition <- function() { 1 + 1 }; "
+            f"start_time <- Sys.time();"
+            f"addition(); "
+            f"end_time <- Sys.time(); "
+            f"result <- end_time - start_time; "
+            f"as.numeric(result); "
+        )
+```
+
+
+```
+$ cd ~/workspace/conbench/conbench/tests/benchmark/
+$ conbench external-r --help
+
+Usage: conbench external-r [OPTIONS]
+
+  Run external-r benchmark.
+
+Options:
+  --show-result BOOLEAN  [default: True]
+  --show-output BOOLEAN  [default: False]
+  --run-id TEXT          Group executions together with a run id.
+  --run-name TEXT        Name of run (commit, pull request, etc).
+  --help                 Show this message and exit.
+```
+
+
+```python
+import json
+
+import conbench.runner
+
+
+@conbench.runner.register_benchmark
+class ExternalBenchmarkOptionsR(conbench.runner.Benchmark):
+    """Example benchmark that records an R benchmark result (with options)."""
+
+    external = True
+    name = "external-r-options"
+    options = {
+        "iterations": {"default": 1, "type": int},
+        "drop_caches": {"type": bool, "default": "false"},
+    }
+
+    def run(self, **kwargs):
+        data, iterations = [], kwargs.get("iterations", 1)
+
+        for _ in range(iterations):
+            if kwargs.get("drop_caches", False):
+                self.conbench.sync_and_drop_caches()
+            result, output = self._run_r_command()
+            data.append(result["result"][0]["real"])
+
+        return self.conbench.external(
+            {"data": data, "unit": "s"},
+            self.name,
+            context=self.conbench.r_info,
+            options=kwargs,
+            output=output,
+        )
+
+    def _run_r_command(self):
+        r_command = self._get_r_command()
+        self.conbench.execute_r_command(r_command)
+        with open('placebo.json') as json_file:
+            data = json.load(json_file)
+        return data, json.dumps(data, indent=2)
+
+    def _get_r_command(self):
+        return (
+            f"library(arrowbench); "
+            f"out <- run_one(arrowbench:::placebo); "
+            f"cat(jsonlite::toJSON(out), file='placebo.json'); "
+        )
+```
+
+```
+$ cd ~/workspace/conbench/conbench/tests/benchmark/
+$ conbench external-r --help
+
+Usage: conbench external-r-options [OPTIONS]
+
+  Run external-r-options benchmark.
+
+Options:
+  --iterations INTEGER   [default: 1]
+  --drop-caches BOOLEAN  [default: False]
+  --show-result BOOLEAN  [default: True]
+  --show-output BOOLEAN  [default: False]
+  --run-id TEXT          Group executions together with a run id.
+  --run-name TEXT        Name of run (commit, pull request, etc).
+  --help                 Show this message and exit.
+```
diff --git a/conbench/machine_info.py b/conbench/machine_info.py
@@ -54,14 +54,32 @@ def _sysctl(stat):
 }
 
 
-def language():
+def python_info():
     version = _exec_command(["python", "--version"])
     return {
         "benchmark_language": "Python",
         "benchmark_language_version": version,
     }
 
 
+def r_info():
+    r = "cat(version[['version.string']], '\n')"
+    version = _exec_command(["R", "-s", "-q", "-e", r])
+    return {
+        "benchmark_language": "R",
+        "benchmark_language_version": version,
+    }
+
+
+def github_info():
+    commit = _exec_command(["git", "rev-parse", "HEAD"])
+    repository = _exec_command(["git", "remote", "get-url", "origin"])
+    return {
+        "commit": commit,
+        "repository": repository.rsplit(".git")[0],
+    }
+
+
 def machine_info(host_name):
     os_name, os_version = platform.platform(terse=1).split("-", maxsplit=1)
 

diff --git a/conbench/runner.py b/conbench/runner.py
@@ -9,7 +9,7 @@
 
 import numpy as np
 
-from .machine_info import language, machine_info
+from .machine_info import github_info, machine_info, python_info, r_info
 from .util import Connection
 
 
@@ -115,12 +115,38 @@ def list(self, classes):
 class Conbench(Connection):
     def __init__(self):
         super().__init__()
-        self.machine_info = machine_info(self.config.host_name)
-        self.language = language()
         self.batch_id = uuid.uuid4().hex
+        self._machine_info = None
+        self._python_info = None
+        self._r_info = None
+        self._github_info = None
         self._drop_caches_failed = False
         self._purge_failed = False
 
+    @property
+    def python_info(self):
+        if not self._python_info:
+            self._python_info = python_info()
+        return self._python_info
+
+    @property
+    def r_info(self):
+        if not self._r_info:
+            self._r_info = r_info()
+        return self._r_info
+
+    @property
+    def github_info(self):
+        if not self._github_info:
+            self._github_info = github_info()
+        return self._github_info
+
+    @property
+    def machine_info(self):
+        if not self._machine_info:
+            self._machine_info = machine_info(self.config.host_name)
+        return self._machine_info
+
     def run(self, f, name, **kwargs):
         """Benchmark a function and publish the result."""
         tags, context, github, options, _ = self._init(kwargs)
@@ -160,7 +186,7 @@ def benchmark(self, f, name, **kwargs):
             raise ValueError(f"Invalid iterations: {iterations}")
 
         data, output = self._get_timing(f, iterations, timing_options)
-        context.update(self.language)
+        context.update(self.python_info)
         benchmark, _ = self.record(
             {"data": data, "unit": "s"},
             name,
@@ -198,15 +224,6 @@ def record(self, result, name, **kwargs):
         }
         return benchmark, output
 
-    def get_github_info(self):
-        command = ["git", "rev-parse", "HEAD"]
-        result = subprocess.run(command, capture_output=True, check=True)
-        commit = result.stdout.decode("utf-8").strip()
-        command = ["git", "remote", "get-url", "origin"]
-        result = subprocess.run(command, capture_output=True, check=True)
-        repository = result.stdout.decode("utf-8").strip().rsplit(".git")[0]
-        return {"repository": repository, "commit": commit}
-
     def mark_new_batch(self):
         self.batch_id = uuid.uuid4().hex
 
@@ -234,7 +251,7 @@ def _init(self, kwargs):
         context = kwargs.get("context", {})
         github = kwargs.get("github", {})
         options = kwargs.get("options", {})
-        github = github if github else self.get_github_info()
+        github = github if github else self.github_info
         return tags, context, github, options, kwargs.get("output")
 
     def _get_timing(self, f, iterations, options):
@@ -308,3 +325,15 @@ def _format(f, data, min_length=0):
             result["run_name"] = run_name
 
         return result
+
+    def execute_r_command(self, r_command, quiet=True):
+        if quiet:
+            command = ["R", "-s", "-q", "-e", r_command]
+        else:
+            command = ["R", "-e", r_command]
+        result = subprocess.run(command, capture_output=True)
+        output = result.stdout.decode("utf-8").strip()
+        error = result.stderr.decode("utf-8").strip()
+        if result.returncode != 0:
+            raise Exception(error)
+        return output