Skip to content

Commit

Permalink
Revise part 06 of the programmers' "workflows" tutorial.
Browse files Browse the repository at this point in the history
Provide solutions to the exercises thereof.
  • Loading branch information
riccardomurri committed Jan 24, 2017
1 parent a2a4d3c commit e37e150
Show file tree
Hide file tree
Showing 9 changed files with 462 additions and 51 deletions.
2 changes: 1 addition & 1 deletion docs/programmers/tutorials/workflows/downloads/simAsset.R
Expand Up @@ -112,5 +112,5 @@ paste("DEBUG: nsims=", nsims);
S <- assetPaths(S0, mu, sig, dt, etime, nsims);

## write out results
write.csv(S, "results.csv", row.names=F);
write.table(t(S), "results.csv", row.names=FALSE, col.names=FALSE, sep=",");
plotPaths(S, "results.pdf");
Binary file modified docs/programmers/tutorials/workflows/part06.pdf
Binary file not shown.
147 changes: 97 additions & 50 deletions docs/programmers/tutorials/workflows/part06.tex
Expand Up @@ -17,7 +17,7 @@
\\[1ex]
University of Zurich
}
\date{November~14--17, 2016}
\date{January~23--27, 2017}

\begin{document}

Expand Down Expand Up @@ -224,7 +224,10 @@ \part{Post-processing}

\+
The path to the output directory is available as
\lstinline|self.output_dir|; if \texttt{stdout} and \texttt{stderr}
\lstinline|self.output_dir|.

\+
If \texttt{stdout} and \texttt{stderr}
have been captured, the \textbf{relative} paths to the capture files
are available as \lstinline|self.stdout| and
\lstinline|self.stderr|.
Expand All @@ -249,6 +252,32 @@ \part{Post-processing}
\end{frame}


\begin{frame}[fragile]
\frametitle{Useful in post-processing}\small

These attributes are available in the \texttt{terminated()} method:

\+
\begin{describe}{\lstinline|self.inputs|}
Python dictionary, mapping local (absolute) paths to remote paths (relative
to execution directory)
\end{describe}

\+
\begin{describe}{\lstinline|self.outputs|}
Python dictionary, mapping remote paths (relative to execution directory) to
\emph{URLs} where they have been copied. In particular,
\lstinline|self.outputs.keys()| is the list of output file names.
\end{describe}

\+
\begin{describe}{\lstinline|self.output_dir|}
Path to the local directory where output files have been downloaded.
\end{describe}

\end{frame}


\begin{frame}
\begin{exercise*}[6.A]

Expand Down Expand Up @@ -308,7 +337,7 @@ \part{Termination status}
7 signal}} for a list of OS signals and their numeric values.

\+
{\bfseries Note that GC3Pie overloads some signal codes (unused
{\bfseries Note that GC3Pie uses some signal codes (not used
by the OS) to represent its own specific errors.}

\+
Expand Down Expand Up @@ -402,7 +431,7 @@ \part{Termination status}


\begin{frame}[fragile]
\begin{exercise*}[6.C] \emph{(Difficult)} \small
\begin{exercise*}[6.C] \emph{(Difficult)} \footnotesize

MATLAB has the annoying habit of exiting with code 0 even when some error occurred.

Expand All @@ -413,13 +442,14 @@ \part{Termination status}
script file, like this: \texttt{app = MatlabApp("\href{https://github.com/uzh/gc3pie/blob/master/docs/programmers/tutorials/workflows/downloads/ra.m}{ra.m}")};
\item Runs the following command:
\begin{semiverbatim}
matlab -nodesktop -nojvm \emph{file.m}
matlab -nodesktop -nojvm -r \emph{file}
\end{semiverbatim}
where \emph{file.m} is the file given to the
\texttt{MatlabApp()} constructor.
\item captures the standard error output (\texttt{stderr}) of the
MATLAB script and, if the string ``\texttt{Out of memory.}''
occurs in it, sets the application exitcode to 11.
\item captures the standard error output (\texttt{stderr}) of the MATLAB
script and, if one of the strings ``\texttt{Out of memory.}'' or
``\texttt{exceeds maximum array size}'' occurs in it, sets the application
exitcode to 11.
\end{itemize}

Verify that it works by running MATLAB script
Expand All @@ -430,59 +460,76 @@ \part{Termination status}
\end{frame}


% \begin{frame}[fragile]
% \frametitle{Global post-processing}
% To add some code which will be executed \emph{just before the script
% exits,} add a \lstinline|after_main_loop| method:

% \begin{python}
% def after_main_loop(self):
% model_names = {}
% for app in ~\HL{self.session.tasks.values()}~:
% if app.execution.state != Run.State.TERMINATED:
% return
% if app.model_name in model_names:
% model_names[app.model_name] += 1
% else:
% model_names[app.model_name] = 1
% \end{python}

% \begin{itemize}
% \item \lstinline|self.session.tasks| is a map
% \lstinline|JobID|~$\Rightarrow$~\lstinline|Application| object
% \item \lstinline|self.session.tasks.values()| thus contains a list
% of all the \textbf{Application}s created by the \lstinline|new_tasks|
% \end{itemize}
% \end{frame}


\begin{frame}
\frametitle{Global post-processing}
\frametitle{Global post-processing, I}
Further options for customizing a session-based script:
\begin{description}
\item [\texttt{before\_main\_loop(self)}] to execute some code
\emph{before} the submission of the jobs.
\item [\texttt{after\_main\_loop(self)}] to execute some code
\emph{after} the main loop. A list of all Application objects is
available in the \lstinline|self.session.tasks.values()| list.
\end{description}

\+
\begin{describe}{\lstinline|before_main_loop(self)|}
to execute some code \emph{before} the main loop starts.
\end{describe}

\+
\begin{describe}{\lstinline|after_main_loop(self)|}
to execute some code \emph{after} the main loop, i.e., before the script
quits. A list of all Application objects is available in the
\lstinline|self.session.tasks.values()| list.
\end{describe}
\end{frame}


\begin{frame}[fragile]
\frametitle{Global post-processing, II}
Example: compute statistical distribution of termination statuses:

\begin{python}
def after_main_loop(self):
# check that all tasks are terminated
can_postprocess = True
for task in self.session.tasks.values():
if task.execution.state != 'TERMINATED':
can_postprocess = False
break
if can_postprocess:
# do stuff... (see next slide)
\end{python}
\end{frame}


\begin{frame}[fragile]
\frametitle{Global post-processing, III}
Example: compute statistical distribution of termination statuses (cont'd):

\begin{python}
def after_main_loop(self):
# ... (see prev slide)
if can_postprocess:
status_counts = defaultdict(int)
for app in self.session.tasks.values():
termstatus = app.execution.returncode
status_counts[termstatus] += 1
\end{python}

\+\small Variable \lstinline|self.session.tasks| holds a mapping
\lstinline|JobID|~$\Rightarrow$~\lstinline|Application|; thus
\lstinline|self.session.tasks.values()| is a list of all the
\texttt{Application} instances returned by \lstinline|new_tasks|
\end{frame}


\begin{frame}
\frametitle{Detour: Asian ``put'' option pricing, I}
\small
The script \texttt{simAsset.R} simulates pricing
\href{https://en.wikipedia.org/wiki/Asian_option}{Asian ``put''
options} over a certain amount of time. Different pricing paths
are generated, all starting from the same initial price.
\frametitle{Detour: asset pricing, I}
\small The script \texttt{simAsset.R} simulates asset pricing over a certain
amount of time. Different pricing paths are generated using a
\href{https://en.wikipedia.org/wiki/Wiener_process}{1D Brownian motion},
all starting from the same initial price.
\begin{center}
\includegraphics[width=0.75\linewidth]{fig/simAsset.pdf}
\end{center}
\end{frame}

\begin{frame}[fragile]
\frametitle{Detour: Asian ``put'' option pricing, II}
\frametitle{Detour: asset pricing, II}
\small
You can run the \texttt{simAsset.R} script with these positional parameters:
\begin{description}
Expand Down Expand Up @@ -516,7 +563,7 @@ \part{Termination status}
\item takes the same command-line positional arguments as \texttt{simAsset.R}, \emph{plus} an additional integer trailing parameter $P$;
\item runs \texttt{simAsset.R} (in parallel) $P$ times with the given arguments (so, effectively simulates $N \cdot P$ price paths);
\item reads all the generated \texttt{results.csv} files, and
\item computes and prints the average value of the option at the end of the simulated time, across all $N \cdot P$ price paths.
\item computes and prints the average value of the asset at the end of the simulated time, across all $N \cdot P$ price paths.
\end{itemize}

\+ {\footnotesize (For easier reading CSV files, you can use the standard
Expand Down
79 changes: 79 additions & 0 deletions docs/programmers/tutorials/workflows/solutions/ex6a.py
@@ -0,0 +1,79 @@
#! /usr/bin/env python

"""
Exercise 6.A: In the ``colorize.py`` script from Exercise 4.A,
modify the ColorizeApp application to move the output picture file
into directory ``/home/ubuntu/pictures``. You might need to store the
output file name to have it available when the application has
terminated running.
"""

import os
from os.path import abspath, basename, exists, join
import sys

from gc3libs import Application, log
from gc3libs.cmdline import SessionBasedScript


if __name__ == '__main__':
from ex6a import ColorizeScript
ColorizeScript().run()


class ColorizeScript(SessionBasedScript):
"""
Colorize multiple images and collect results
into directory ``./pictures``
"""
def __init__(self):
super(ColorizeScript, self).__init__(version='1.0')
def setup_args(self):
self.add_param('colors', nargs=3, help="Three colors")
self.add_param('images', nargs='+', help="Images to colorize")
def new_tasks(self, extra):
col1, col2, col3 = self.params.colors
apps_to_run = []
for input_file in self.params.images:
input_file = abspath(input_file)
apps_to_run.append(ColorizeApp(input_file, col1, col2, col3))
return apps_to_run


from shutil import move

from gc3libs.quantity import GB

class ColorizeApp(Application):
"""Add colors to a grayscale image."""
def __init__(self, img, col1, col2, col3):
inp = basename(img)
# need to save this for later reference in ``terminated()``
self.output_file_name = "color-" + inp
Application.__init__(
self,
arguments=[
"convert", inp,
"(", "xc:"+col1, "xc:"+col2, "xc:"+col3, "+append", ")", "-clut",
self.output_file_name],
inputs=[img],
outputs=[self.output_file_name],
output_dir="colorized-" + inp + ".d",
stdout="stdout.txt",
stderr="stderr.txt",
# required for running on the cloud, see GC3Pie issue #559
requested_memory=1*GB)
def terminated(self):
# full path to output file on local filesystem
output_file = join(self.output_dir, self.output_file_name)
# if the output file is not there, log an error and exit
if not exists(output_file):
log.error("Expected output file `%s` from %s does not exists!",
output_file, self)
return
# ensure destination directory exists
if not exists('pictures'):
os.mkdir('pictures')
# the trailing slash ensures `shutil.move` raises an error if
# the destination exists but is not a directory
move(output_file, 'pictures/')
57 changes: 57 additions & 0 deletions docs/programmers/tutorials/workflows/solutions/ex6b.py
@@ -0,0 +1,57 @@
#! /usr/bin/env python

"""
Exercise 6.B: Modify the grayscaling script ex2c (or the code it
depends upon) so that, when a ``GrayscaleApp`` task has terminated
execution, it prints:
* whether the program has been killed by a signal, and the signal number;
* whether the program has terminated by exiting, and the exit code.
"""

import os
from os.path import abspath, basename
import sys

from gc3libs import Application, log
from gc3libs.cmdline import SessionBasedScript
from gc3libs.quantity import GB


if __name__ == '__main__':
from ex6b import GrayscaleScript
GrayscaleScript().run()


class GrayscaleScript(SessionBasedScript):
"""
Convert images to grayscale.
"""
def __init__(self):
super(GrayscaleScript, self).__init__(version='1.0')
def new_tasks(self, extra):
# since `self.params.args` is already a list of file names,
# just iterate over it to build the list of apps to run...
apps_to_run = []
for input_file in self.params.args:
input_file = abspath(input_file)
apps_to_run.append(VerboseGrayscaleApp(input_file))
return apps_to_run


# alternately, one could just copy code from `grayscale_app.py` here,
# and append the `terminated()` method to the definition

from grayscale_app import GrayscaleApp

class VerboseGrayscaleApp(GrayscaleApp):
"""Convert a single image file to grayscale and log termination status."""
def terminated(self):
if self.execution.signal != 0:
log.info("Task %s killed by signal %d", self, self.execution.signal)
else:
# self.execution.signal == 0, hence normal termination
if self.execution.exitcode == 0:
log.info("Task %s exited successfully!", self)
else:
log.info("Task %s exited with error code %d", self, self.execution.exitcode)

0 comments on commit e37e150

Please sign in to comment.