Revise part 06 of the programmers' "workflows" tutorial.

Provide solutions to the exercises thereof.
gc3pie · Jan 24, 2017 · e37e150 · e37e150
1 parent a2a4d3c
commit e37e150
Show file tree

Hide file tree

Showing 9 changed files with 462 additions and 51 deletions.
diff --git a/docs/programmers/tutorials/workflows/downloads/simAsset.R b/docs/programmers/tutorials/workflows/downloads/simAsset.R
@@ -112,5 +112,5 @@ paste("DEBUG: nsims=", nsims);
 S <- assetPaths(S0, mu, sig, dt, etime, nsims);
 
 ## write out results
-write.csv(S, "results.csv", row.names=F);
+write.table(t(S), "results.csv", row.names=FALSE, col.names=FALSE, sep=",");
 plotPaths(S, "results.pdf");
diff --git a/docs/programmers/tutorials/workflows/part06.pdf b/docs/programmers/tutorials/workflows/part06.pdf
diff --git a/docs/programmers/tutorials/workflows/part06.tex b/docs/programmers/tutorials/workflows/part06.tex
@@ -17,7 +17,7 @@
   \\[1ex]
   University of Zurich
 }
-\date{November~14--17, 2016}
+\date{January~23--27, 2017}
 
 \begin{document}
 
@@ -224,7 +224,10 @@ \part{Post-processing}
 
   \+
   The path to the output directory is available as
-  \lstinline|self.output_dir|; if \texttt{stdout} and \texttt{stderr}
+  \lstinline|self.output_dir|.
+
+  \+
+  If \texttt{stdout} and \texttt{stderr}
   have been captured, the \textbf{relative} paths to the capture files
   are available as \lstinline|self.stdout| and
   \lstinline|self.stderr|.
@@ -249,6 +252,32 @@ \part{Post-processing}
 \end{frame}
 
 
+\begin{frame}[fragile]
+  \frametitle{Useful in post-processing}\small
+
+  These attributes are available in the \texttt{terminated()} method:
+
+  \+
+  \begin{describe}{\lstinline|self.inputs|}
+    Python dictionary, mapping local (absolute) paths to remote paths (relative
+    to execution directory)
+  \end{describe}
+
+  \+
+  \begin{describe}{\lstinline|self.outputs|}
+    Python dictionary, mapping remote paths (relative to execution directory) to
+    \emph{URLs} where they have been copied. In particular,
+    \lstinline|self.outputs.keys()| is the list of output file names.
+  \end{describe}
+
+  \+
+  \begin{describe}{\lstinline|self.output_dir|}
+    Path to the local directory where output files have been downloaded.
+  \end{describe}
+
+\end{frame}
+
+
 \begin{frame}
   \begin{exercise*}[6.A]
 
@@ -308,7 +337,7 @@ \part{Termination status}
       7 signal}} for a list of OS signals and their numeric values.
 
   \+
-  {\bfseries Note that GC3Pie overloads some signal codes (unused
+  {\bfseries Note that GC3Pie uses some signal codes (not used
     by the OS) to represent its own specific errors.}
 
   \+
@@ -402,7 +431,7 @@ \part{Termination status}
 
 
 \begin{frame}[fragile]
-  \begin{exercise*}[6.C] \emph{(Difficult)} \small
+  \begin{exercise*}[6.C] \emph{(Difficult)} \footnotesize
 
     MATLAB has the annoying habit of exiting with code 0 even when some error occurred.
 
@@ -413,13 +442,14 @@ \part{Termination status}
       script file, like this: \texttt{app = MatlabApp("\href{https://github.com/uzh/gc3pie/blob/master/docs/programmers/tutorials/workflows/downloads/ra.m}{ra.m}")};
     \item Runs the following command:
 \begin{semiverbatim}
-matlab -nodesktop -nojvm \emph{file.m}
+matlab -nodesktop -nojvm -r \emph{file}
 \end{semiverbatim}
       where \emph{file.m} is the file given to the
       \texttt{MatlabApp()} constructor.
-    \item captures the standard error output (\texttt{stderr}) of the
-      MATLAB script and, if the string ``\texttt{Out of memory.}''
-      occurs in it, sets the application exitcode to 11.
+    \item captures the standard error output (\texttt{stderr}) of the MATLAB
+      script and, if one of the strings ``\texttt{Out of memory.}'' or
+      ``\texttt{exceeds maximum array size}'' occurs in it, sets the application
+      exitcode to 11.
     \end{itemize}
 
     Verify that it works by running MATLAB script
@@ -430,59 +460,76 @@ \part{Termination status}
 \end{frame}
 
 
-% \begin{frame}[fragile]
-%   \frametitle{Global post-processing}
-%   To add some code which will be executed \emph{just before the script
-%     exits,} add a \lstinline|after_main_loop| method:
-
-%   \begin{python}
-% def after_main_loop(self):
-%   model_names = {}
-%   for app in ~\HL{self.session.tasks.values()}~:
-%     if app.execution.state != Run.State.TERMINATED:
-%       return
-%     if app.model_name in model_names:
-%         model_names[app.model_name] += 1
-%     else:
-%         model_names[app.model_name] = 1
-%   \end{python}
-
-%   \begin{itemize}
-%   \item \lstinline|self.session.tasks| is a map
-%     \lstinline|JobID|~$\Rightarrow$~\lstinline|Application| object
-%   \item \lstinline|self.session.tasks.values()| thus contains a list
-%     of all the \textbf{Application}s created by the \lstinline|new_tasks|
-%   \end{itemize}
-% \end{frame}
-
-
 \begin{frame}
-  \frametitle{Global post-processing}
+  \frametitle{Global post-processing, I}
 Further options for customizing a session-based script:
-\begin{description}
-\item [\texttt{before\_main\_loop(self)}] to execute some code
-  \emph{before} the submission of the jobs.
-\item [\texttt{after\_main\_loop(self)}] to execute some code
-  \emph{after} the main loop. A list of all Application objects is
-  available in the \lstinline|self.session.tasks.values()| list.
-\end{description}
+
+\+
+\begin{describe}{\lstinline|before_main_loop(self)|}
+  to execute some code \emph{before} the main loop starts.
+\end{describe}
+
+\+
+\begin{describe}{\lstinline|after_main_loop(self)|}
+  to execute some code \emph{after} the main loop, i.e., before the script
+  quits. A list of all Application objects is available in the
+  \lstinline|self.session.tasks.values()| list.
+\end{describe}
+\end{frame}
+
+
+\begin{frame}[fragile]
+  \frametitle{Global post-processing, II}
+  Example: compute statistical distribution of termination statuses:
+
+  \begin{python}
+def after_main_loop(self):
+  # check that all tasks are terminated
+  can_postprocess = True
+  for task in self.session.tasks.values():
+    if task.execution.state != 'TERMINATED':
+      can_postprocess = False
+      break
+  if can_postprocess:
+    # do stuff... (see next slide)
+  \end{python}
+\end{frame}
+
+
+\begin{frame}[fragile]
+  \frametitle{Global post-processing, III}
+  Example: compute statistical distribution of termination statuses (cont'd):
+
+  \begin{python}
+def after_main_loop(self):
+  # ... (see prev slide)
+  if can_postprocess:
+    status_counts = defaultdict(int)
+    for app in self.session.tasks.values():
+      termstatus = app.execution.returncode
+      status_counts[termstatus] += 1
+  \end{python}
+
+  \+\small Variable \lstinline|self.session.tasks| holds a mapping
+  \lstinline|JobID|~$\Rightarrow$~\lstinline|Application|; thus
+  \lstinline|self.session.tasks.values()| is a list of all the
+  \texttt{Application} instances returned by \lstinline|new_tasks|
 \end{frame}
 
 
 \begin{frame}
-  \frametitle{Detour: Asian ``put'' option pricing, I}
-  \small
-  The script \texttt{simAsset.R} simulates pricing
-  \href{https://en.wikipedia.org/wiki/Asian_option}{Asian ``put''
-    options} over a certain amount of time.  Different pricing paths
-  are generated, all starting from the same initial price.
+  \frametitle{Detour: asset pricing, I}
+  \small The script \texttt{simAsset.R} simulates asset pricing over a certain
+  amount of time. Different pricing paths are generated using a
+  \href{https://en.wikipedia.org/wiki/Wiener_process}{1D Brownian motion},
+  all starting from the same initial price.
   \begin{center}
     \includegraphics[width=0.75\linewidth]{fig/simAsset.pdf}
   \end{center}
 \end{frame}
 
 \begin{frame}[fragile]
-  \frametitle{Detour: Asian ``put'' option pricing, II}
+  \frametitle{Detour: asset pricing, II}
   \small
   You can run the \texttt{simAsset.R} script with these positional parameters:
   \begin{description}
@@ -516,7 +563,7 @@ \part{Termination status}
     \item takes the same command-line positional arguments as \texttt{simAsset.R}, \emph{plus} an additional integer trailing parameter $P$;
     \item runs \texttt{simAsset.R} (in parallel) $P$ times with the given arguments (so, effectively simulates $N \cdot P$ price paths);
     \item reads all the generated \texttt{results.csv} files, and
-    \item computes and prints the average value of the option at the end of the simulated time, across all $N \cdot P$ price paths.
+    \item computes and prints the average value of the asset at the end of the simulated time, across all $N \cdot P$ price paths.
     \end{itemize}
 
     \+ {\footnotesize (For easier reading CSV files, you can use the standard

diff --git a/docs/programmers/tutorials/workflows/solutions/ex6a.py b/docs/programmers/tutorials/workflows/solutions/ex6a.py
@@ -0,0 +1,79 @@
+#! /usr/bin/env python
+
+"""
+Exercise 6.A: In the ``colorize.py`` script from Exercise 4.A,
+modify the ColorizeApp application to move the output picture file
+into directory ``/home/ubuntu/pictures``.  You might need to store the
+output file name to have it available when the application has
+terminated running.
+"""
+
+import os
+from os.path import abspath, basename, exists, join
+import sys
+
+from gc3libs import Application, log
+from gc3libs.cmdline import SessionBasedScript
+
+
+if __name__ == '__main__':
+    from ex6a import ColorizeScript
+    ColorizeScript().run()
+
+
+class ColorizeScript(SessionBasedScript):
+    """
+    Colorize multiple images and collect results
+    into directory ``./pictures``
+    """
+    def __init__(self):
+        super(ColorizeScript, self).__init__(version='1.0')
+    def setup_args(self):
+        self.add_param('colors', nargs=3,   help="Three colors")
+        self.add_param('images', nargs='+', help="Images to colorize")
+    def new_tasks(self, extra):
+        col1, col2, col3 = self.params.colors
+        apps_to_run = []
+        for input_file in self.params.images:
+            input_file = abspath(input_file)
+            apps_to_run.append(ColorizeApp(input_file, col1, col2, col3))
+        return apps_to_run
+
+
+from shutil import move
+
+from gc3libs.quantity import GB
+
+class ColorizeApp(Application):
+    """Add colors to a grayscale image."""
+    def __init__(self, img, col1, col2, col3):
+        inp = basename(img)
+        # need to save this for later reference in ``terminated()``
+        self.output_file_name = "color-" + inp
+        Application.__init__(
+            self,
+            arguments=[
+                "convert", inp,
+                "(", "xc:"+col1,  "xc:"+col2, "xc:"+col3, "+append", ")", "-clut",
+                self.output_file_name],
+            inputs=[img],
+            outputs=[self.output_file_name],
+            output_dir="colorized-" + inp + ".d",
+            stdout="stdout.txt",
+            stderr="stderr.txt",
+            # required for running on the cloud, see GC3Pie issue #559
+            requested_memory=1*GB)
+    def terminated(self):
+        # full path to output file on local filesystem
+        output_file = join(self.output_dir, self.output_file_name)
+        # if the output file is not there, log an error and exit
+        if not exists(output_file):
+            log.error("Expected output file `%s` from %s does not exists!",
+                      output_file, self)
+            return
+        # ensure destination directory exists
+        if not exists('pictures'):
+            os.mkdir('pictures')
+        # the trailing slash ensures `shutil.move` raises an error if
+        # the destination exists but is not a directory
+        move(output_file, 'pictures/')
diff --git a/docs/programmers/tutorials/workflows/solutions/ex6b.py b/docs/programmers/tutorials/workflows/solutions/ex6b.py
@@ -0,0 +1,57 @@
+#! /usr/bin/env python
+
+"""
+Exercise 6.B: Modify the grayscaling script ex2c (or the code it
+depends upon) so that, when a ``GrayscaleApp`` task has terminated
+execution, it prints:
+
+* whether the program has been killed by a signal, and the signal number;
+* whether the program has terminated by exiting, and the exit code.
+"""
+
+import os
+from os.path import abspath, basename
+import sys
+
+from gc3libs import Application, log
+from gc3libs.cmdline import SessionBasedScript
+from gc3libs.quantity import GB
+
+
+if __name__ == '__main__':
+    from ex6b import GrayscaleScript
+    GrayscaleScript().run()
+
+
+class GrayscaleScript(SessionBasedScript):
+    """
+    Convert images to grayscale.
+    """
+    def __init__(self):
+        super(GrayscaleScript, self).__init__(version='1.0')
+    def new_tasks(self, extra):
+        # since `self.params.args` is already a list of file names,
+        # just iterate over it to build the list of apps to run...
+        apps_to_run = []
+        for input_file in self.params.args:
+            input_file = abspath(input_file)
+            apps_to_run.append(VerboseGrayscaleApp(input_file))
+        return apps_to_run
+
+
+# alternately, one could just copy code from `grayscale_app.py` here,
+# and append the `terminated()` method to the definition
+
+from grayscale_app import GrayscaleApp
+
+class VerboseGrayscaleApp(GrayscaleApp):
+    """Convert a single image file to grayscale and log termination status."""
+    def terminated(self):
+        if self.execution.signal != 0:
+            log.info("Task %s killed by signal %d", self, self.execution.signal)
+        else:
+            # self.execution.signal == 0, hence normal termination
+            if self.execution.exitcode == 0:
+                log.info("Task %s exited successfully!", self)
+            else:
+                log.info("Task %s exited with error code %d", self, self.execution.exitcode)