updating caliper analysis to use more recent version

Signed-off-by: vsoch <vsochat@stanford.edu>
vsoch · Jan 3, 2021 · 46b6ed3 · 46b6ed3
1 parent 25f2d1e
commit 46b6ed3
Show file tree

Hide file tree

Showing 52 changed files with 1,768 additions and 1,522 deletions.
diff --git a/README.md b/README.md
@@ -19,7 +19,7 @@ For each of the above, to ensure some reproducibility and headless-ness across r
 
  - set a seed for each of numpy and tensorflow
  - remove matplotlib plots and print out model information instead
- - for some long running models, decreasing training epochs or similar
+ - for some long running models, decreasing training epochs, increase batch size, or similar
  - removed any GPU usage, replacing with CPU (I don't have GPU)
 
 ## Usage
@@ -45,51 +45,5 @@ python run_analysis.py --config caliper.yaml
 This is going to save output to a hidden `.caliper` directory (also in this present
 working directory) that will have a json dump of tensorflow versions matched to dockerfiles
 that can build them, and then test results (output, error, and return code) for each.
-
-### caliper.yml
-
-The caliper.yml file is a small configuration file to run caliper. Currently, it's fairly simply
-and we need to define the dependency to run tests over (e.g., tensorflow), the Dockerfile template,
-a name, and then a list of runs:
-
-```yaml
-analysis:
-  name: Testing tensorflow
-  dockerfile: Dockerfile
-  dependency: tensorflow
-  tests:
-    - tensorflow_v0.11/5_MultiGPU/multigpu_basics.py
-    - tensorflow_v0.11/1_Introduction/basic_operations.py
-    - tensorflow_v0.11/1_Introduction/helloworld.py
-    - tensorflow_v0.11/4_Utils/tensorboard_advanced.py
-```
-
-An additional test to just import the library is added by default, so if you don't define
-any tests, this will be the only one run. If you want to add custom arguments for your template (beyond a base image that
-is derived for your Python software, and the dependency name to derive wheels to install)
-you can do this with args:
-
-```yaml
-analysis:
-  name: Testing tensorflow
-  dockerfile: Dockerfile
-  args:
-     additionaldeps: 
-       - scikit-learn
-```
-
-The functionality of your arguments is up to you. In the example above, `additionaldeps`
-would be a list, so likely you would loop over it in your Dockerfile template (which uses jinja2).
-
-
-### Dockerfile
-
-The [Dockerfile](Dockerfile) template (specified in the caliper.yaml) should expect
-the following arguments from the caliper analysis script:
-
- - **base**: The base python image, derived from the wheel we need to install
- - **filename**: the url filename of the wheel to download with wget
- - **basename**: the basename of that to install with pip
-
-Additional arguments under args will be handed to the template, and are up to you
-to define and render appropriately.
+To learn more about the format of the `caliper.yaml` you should see the [caliper](https://github.com/vsoch/caliper)
+repository.
diff --git a/caliper.yaml b/caliper.yaml
@@ -1,7 +1,10 @@
 analysis:
   name: Testing tensorflow
+  packagemanager: pypi
   dockerfile: Dockerfile
   dependency: tensorflow
+  python_versions:
+    - cp27
   tests:
     - tensorflow_v0.11/5_MultiGPU/multigpu_basics.py
     - tensorflow_v0.11/1_Introduction/basic_operations.py

diff --git a/run_analysis.py b/run_analysis.py
@@ -5,84 +5,9 @@
 __license__ = "MPL 2.0"
 
 import argparse
-from jinja2 import Template
-from caliper.managers import PypiManager
-import subprocess
-import threading
-import yaml
+from caliper.analysis import CaliperAnalyzer
 import sys
 import os
-import json
-
-here = os.path.abspath(os.path.dirname(__file__))
-
-# helper functions
-
-
-def read_file(filename, readlines=True):
-    with open(filename, "r") as filey:
-        content = filey.read()
-    return content
-
-
-def write_file(filename, content):
-    with open(filename, "w") as filey:
-        filey.write(content)
-
-
-def read_yaml(filename):
-    stream = read_file(filename, readlines=False)
-    return yaml.load(stream, Loader=yaml.FullLoader)
-
-
-class CommandRunner(object):
-    def __init__(self):
-        self.reset()
-
-    def reset(self):
-        self.error = []
-        self.output = []
-        self.retval = None
-
-    def reader(self, stream, context):
-        """Get output and error lines and save to command runner."""
-        # Make sure we save to the correct field
-        lines = self.error
-        if context == "stdout":
-            lines = self.output
-
-        while True:
-            s = stream.readline()
-            if not s:
-                break
-            lines.append(s.decode("utf-8"))
-        stream.close()
-
-    def run_command(self, cmd, env=None, **kwargs):
-        self.reset()
-
-        # If we need to update the environment
-        # **IMPORTANT: this will include envars from host. Absolutely cannot
-        # be any secrets (they should be defined in the app settings file)
-        envars = os.environ.copy()
-        if env:
-            envars.update(env)
-
-        p = subprocess.Popen(
-            cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=envars, **kwargs
-        )
-
-        # Create threads for error and output
-        t1 = threading.Thread(target=self.reader, args=(p.stdout, "stdout"))
-        t1.start()
-        t2 = threading.Thread(target=self.reader, args=(p.stderr, "stderr"))
-        t2.start()
-
-        p.wait()
-        t1.join()
-        t2.join()
-        self.retval = p.returncode
-        return self.output
 
 
 def get_parser():
@@ -106,119 +31,9 @@ def main():
     if not args.config or not os.path.exists(args.config):
         sys.exit("A --config yaml file that exists on the filesystem is required.")
 
-    config = read_yaml(args.config).get("analysis")
-
-    # Get the Dockerfile, ensure it exists
-    dockerfile = config.get("dockerfile", "Dockerfile")
-    if not os.path.exists(dockerfile):
-        sys.exit("The Dockerfile does not exist.")
-
-    # Read in the template, populate with each deps version
-    template = Template(read_file(dockerfile))
-
-    # Currently only supports testing one dependency, and any added args
-    dependency = config.get("dependency")
-    args = config.get("args", {})
-    manager = PypiManager(dependency)
-
-    # Get all version of Python supported for linux (manylinux x86_64)
-    all_releases = manager.filter_releases(".*manylinux.*x86_64.*")
-    python_versions = manager.get_python_versions()
-
-    # prepare a command runner, check that docker is installe
-    runner = CommandRunner()
-    runner.run_command(["which", "docker"])
-    if runner.retval != 0:
-        sys.exit("Docker must be installed to build containers.")
-
-    # make caliper output directory
-    outdir = os.path.join(here, ".caliper")
-    if not os.path.exists(outdir):
-        os.makedirs(outdir)
-
-    # Build a container for each version (todo, can we scale this requiring docker?)
-    for version, releases in all_releases.items():
-
-        # Don't redo if we already have an output file
-        outfile = os.path.join(outdir, "%s-%s.json" % (dependency, version))
-        if os.path.exists(outfile):
-            continue
-
-        # Create a lookup based on Python version
-        lookup = {x["python_version"]: x for x in releases}
-
-        # Store results based on Python versions
-        single_result = {x: {} for x in python_versions}
-        for python_version in python_versions:
-            print("Testing %s" % python_version)
-
-            # We can't do tests if the Python version isn't supported, considered a fail (return value 1)
-            if python_version not in lookup:
-                single_result[python_version]["build_retval"] = 1
-                continue
-
-            # Build a container with the correct Python version and filename
-            spec = lookup[python_version]
-            container_base = "python:%s" % ".".join(
-                [x for x in python_version.lstrip("cp")]
-            )
-            result = template.render(
-                base=container_base,
-                filename=spec["url"],
-                basename=spec["filename"],
-                **args
-            )
-
-            # Write and build temporary Dockerfile, and build the container
-            write_file("Dockerfile.caliper", result)
-            container_name = "%s-container" % dependency
-            runner.run_command(
-                [
-                    "docker",
-                    "build",
-                    "-f",
-                    "Dockerfile.caliper",
-                    "-t",
-                    container_name,
-                    ".",
-                ],
-                cwd=here,
-            )
-
-            # Keep a result for each script
-            single_result[python_version]["build_retval"] = runner.retval
-            single_result[python_version]["dockerfile"] = result
-            if runner.retval != 0:
-                continue
-
-            # Get packages installed for each container
-            runner.run_command(["docker", "run", container_name, "pip", "freeze"])
-            single_result[python_version]["requirements.txt"] = runner.output
-
-            # Test basic import of library
-            tests = {}
-            runner.run_command(
-                ["docker", "run", container_name, "python", "-c", "'import tensorflow'"]
-            )
-            tests["import tensorflow"] = {
-                "error": runner.error,
-                "output": runner.output,
-                "retval": runner.retval,
-            }
-
-            # Run each test
-            for script in config.get("tests", []):
-                runner.run_command(["docker", "run", container_name, "python", script])
-                tests[script] = {
-                    "error": runner.error,
-                    "output": runner.output,
-                    "retval": runner.retval,
-                }
-
-            single_result[python_version]["tests"] = tests
-
-        # Save the single result to file
-        write_file(outfile, json.dumps(single_result))
+    client = CaliperAnalyzer(args.config)
+    analyzer = client.get_analyzer()
+    analyzer.run_analysis()
 
 
 if __name__ == "__main__":

diff --git a/tensorflow_v0.11/1_Introduction/basic_operations.py b/tensorflow_v0.11/1_Introduction/basic_operations.py
@@ -1,9 +1,9 @@
-'''
+"""
 Basic Operations example using TensorFlow library.
 
 Author: Aymeric Damien
 Project: https://github.com/aymericdamien/TensorFlow-Examples/
-'''
+"""
 
 from __future__ import print_function
 
@@ -24,8 +24,8 @@
 # Launch the default graph.
 with tf.Session() as sess:
     print("a=2, b=3")
-    print("Addition with constants: %i" % sess.run(a+b))
-    print("Multiplication with constants: %i" % sess.run(a*b))
+    print("Addition with constants: %i" % sess.run(a + b))
+    print("Multiplication with constants: %i" % sess.run(a * b))
 
 # Basic Operations with variable as graph input
 # The value returned by the constructor represents the output
@@ -54,10 +54,10 @@
 #
 # The value returned by the constructor represents the output
 # of the Constant op.
-matrix1 = tf.constant([[3., 3.]])
+matrix1 = tf.constant([[3.0, 3.0]])
 
 # Create another Constant that produces a 2x1 matrix.
-matrix2 = tf.constant([[2.],[2.]])
+matrix2 = tf.constant([[2.0], [2.0]])
 
 # Create a Matmul op that takes 'matrix1' and 'matrix2' as inputs.
 # The returned value, 'product', represents the result of the matrix

diff --git a/tensorflow_v0.11/1_Introduction/helloworld.py b/tensorflow_v0.11/1_Introduction/helloworld.py
@@ -1,9 +1,9 @@
-'''
+"""
 HelloWorld example using TensorFlow library.
 
 Author: Aymeric Damien
 Project: https://github.com/aymericdamien/TensorFlow-Examples/
-'''
+"""
 
 from __future__ import print_function
 
@@ -22,7 +22,7 @@
 #
 # The value returned by the constructor represents the output
 # of the Constant op.
-hello = tf.constant('Hello, TensorFlow!')
+hello = tf.constant("Hello, TensorFlow!")
 
 # Start tf session
 sess = tf.Session()