Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions docs/.buildinfo
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Sphinx build info version 1
# This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done.
config:
tags:
config: 874109c9e8f56215fdcb46cac4aab9f9
tags: 645f666f9bcd5a90fca523b33c5a78b7
10 changes: 5 additions & 5 deletions docs/_modules/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,13 @@
<html xmlns="http://www.w3.org/1999/xhtml" lang="en">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
<title>Overview: module code &#8212; TensorFlowOnSpark 1.1.0 documentation</title>
<title>Overview: module code &#8212; TensorFlowOnSpark 1.2.0 documentation</title>
<link rel="stylesheet" href="../_static/classic.css" type="text/css" />
<link rel="stylesheet" href="../_static/pygments.css" type="text/css" />
<script type="text/javascript">
var DOCUMENTATION_OPTIONS = {
URL_ROOT: '../',
VERSION: '1.1.0',
VERSION: '1.2.0',
COLLAPSE_INDEX: false,
FILE_SUFFIX: '.html',
HAS_SOURCE: true,
Expand All @@ -34,7 +34,7 @@ <h3>Navigation</h3>
<li class="right" >
<a href="../py-modindex.html" title="Python Module Index"
>modules</a> |</li>
<li class="nav-item nav-item-0"><a href="../index.html">TensorFlowOnSpark 1.1.0 documentation</a> &#187;</li>
<li class="nav-item nav-item-0"><a href="../index.html">TensorFlowOnSpark 1.2.0 documentation</a> &#187;</li>
</ul>
</div>

Expand Down Expand Up @@ -85,12 +85,12 @@ <h3>Navigation</h3>
<li class="right" >
<a href="../py-modindex.html" title="Python Module Index"
>modules</a> |</li>
<li class="nav-item nav-item-0"><a href="../index.html">TensorFlowOnSpark 1.1.0 documentation</a> &#187;</li>
<li class="nav-item nav-item-0"><a href="../index.html">TensorFlowOnSpark 1.2.0 documentation</a> &#187;</li>
</ul>
</div>
<div class="footer" role="contentinfo">
&#169; Copyright 2017, Yahoo Inc.
Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.6.5.
Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.6.7.
</div>
</body>
</html>
40 changes: 32 additions & 8 deletions docs/_modules/tensorflowonspark/TFCluster.html
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,13 @@
<html xmlns="http://www.w3.org/1999/xhtml" lang="en">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
<title>tensorflowonspark.TFCluster &#8212; TensorFlowOnSpark 1.1.0 documentation</title>
<title>tensorflowonspark.TFCluster &#8212; TensorFlowOnSpark 1.2.0 documentation</title>
<link rel="stylesheet" href="../../_static/classic.css" type="text/css" />
<link rel="stylesheet" href="../../_static/pygments.css" type="text/css" />
<script type="text/javascript">
var DOCUMENTATION_OPTIONS = {
URL_ROOT: '../../',
VERSION: '1.1.0',
VERSION: '1.2.0',
COLLAPSE_INDEX: false,
FILE_SUFFIX: '.html',
HAS_SOURCE: true,
Expand All @@ -34,7 +34,7 @@ <h3>Navigation</h3>
<li class="right" >
<a href="../../py-modindex.html" title="Python Module Index"
>modules</a> |</li>
<li class="nav-item nav-item-0"><a href="../../index.html">TensorFlowOnSpark 1.1.0 documentation</a> &#187;</li>
<li class="nav-item nav-item-0"><a href="../../index.html">TensorFlowOnSpark 1.2.0 documentation</a> &#187;</li>
<li class="nav-item nav-item-1"><a href="../index.html" accesskey="U">Module code</a> &#187;</li>
</ul>
</div>
Expand Down Expand Up @@ -233,7 +233,8 @@ <h1>Source code for tensorflowonspark.TFCluster</h1><div class="highlight"><pre>
<span class="n">tb_url</span> <span class="o">=</span> <span class="s2">&quot;http://</span><span class="si">{0}</span><span class="s2">:</span><span class="si">{1}</span><span class="s2">&quot;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="n">node</span><span class="p">[</span><span class="s1">&#39;host&#39;</span><span class="p">],</span> <span class="n">node</span><span class="p">[</span><span class="s1">&#39;tb_port&#39;</span><span class="p">])</span>
<span class="k">return</span> <span class="n">tb_url</span></div></div>

<div class="viewcode-block" id="run"><a class="viewcode-back" href="../../tensorflowonspark.TFCluster.html#tensorflowonspark.TFCluster.run">[docs]</a><span class="k">def</span> <span class="nf">run</span><span class="p">(</span><span class="n">sc</span><span class="p">,</span> <span class="n">map_fun</span><span class="p">,</span> <span class="n">tf_args</span><span class="p">,</span> <span class="n">num_executors</span><span class="p">,</span> <span class="n">num_ps</span><span class="p">,</span> <span class="n">tensorboard</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span> <span class="n">input_mode</span><span class="o">=</span><span class="n">InputMode</span><span class="o">.</span><span class="n">TENSORFLOW</span><span class="p">,</span> <span class="n">log_dir</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">queues</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;input&#39;</span><span class="p">,</span> <span class="s1">&#39;output&#39;</span><span class="p">]):</span>
<div class="viewcode-block" id="run"><a class="viewcode-back" href="../../tensorflowonspark.TFCluster.html#tensorflowonspark.TFCluster.run">[docs]</a><span class="k">def</span> <span class="nf">run</span><span class="p">(</span><span class="n">sc</span><span class="p">,</span> <span class="n">map_fun</span><span class="p">,</span> <span class="n">tf_args</span><span class="p">,</span> <span class="n">num_executors</span><span class="p">,</span> <span class="n">num_ps</span><span class="p">,</span> <span class="n">tensorboard</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span> <span class="n">input_mode</span><span class="o">=</span><span class="n">InputMode</span><span class="o">.</span><span class="n">TENSORFLOW</span><span class="p">,</span>
<span class="n">log_dir</span><span class="o">=</span><span class="kc">None</span><span class="p">,</span> <span class="n">driver_ps_nodes</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span> <span class="n">queues</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;input&#39;</span><span class="p">,</span> <span class="s1">&#39;output&#39;</span><span class="p">]):</span>
<span class="sd">&quot;&quot;&quot;Starts the TensorFlowOnSpark cluster and Runs the TensorFlow &quot;main&quot; function on the Spark executors</span>

<span class="sd"> Args:</span>
Expand All @@ -245,6 +246,7 @@ <h1>Source code for tensorflowonspark.TFCluster</h1><div class="highlight"><pre>
<span class="sd"> :tensorboard: boolean indicating if the chief worker should spawn a Tensorboard server.</span>
<span class="sd"> :input_mode: TFCluster.InputMode</span>
<span class="sd"> :log_dir: directory to save tensorboard event logs. If None, defaults to a fixed path on local filesystem.</span>
<span class="sd"> :driver_ps_nodes: run the PS nodes on the driver locally instead of on the spark executors; this help maximizing computing resources (esp. GPU). You will need to set cluster_size = num_executors + num_ps</span>
<span class="sd"> :queues: *INTERNAL_USE*</span>

<span class="sd"> Returns:</span>
Expand All @@ -253,10 +255,14 @@ <h1>Source code for tensorflowonspark.TFCluster</h1><div class="highlight"><pre>
<span class="n">logging</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s2">&quot;Reserving TFSparkNodes </span><span class="si">{0}</span><span class="s2">&quot;</span><span class="o">.</span><span class="n">format</span><span class="p">(</span><span class="s2">&quot;w/ TensorBoard&quot;</span> <span class="k">if</span> <span class="n">tensorboard</span> <span class="k">else</span> <span class="s2">&quot;&quot;</span><span class="p">))</span>
<span class="k">assert</span> <span class="n">num_ps</span> <span class="o">&lt;</span> <span class="n">num_executors</span>

<span class="k">if</span> <span class="n">driver_ps_nodes</span> <span class="ow">and</span> <span class="n">input_mode</span> <span class="o">!=</span> <span class="n">InputMode</span><span class="o">.</span><span class="n">TENSORFLOW</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">Exception</span><span class="p">(</span><span class="s1">&#39;running PS nodes on driver locally is only supported in InputMode.TENSORFLOW&#39;</span><span class="p">)</span>

<span class="c1"># build a cluster_spec template using worker_nums</span>
<span class="n">cluster_template</span> <span class="o">=</span> <span class="p">{}</span>
<span class="n">cluster_template</span><span class="p">[</span><span class="s1">&#39;ps&#39;</span><span class="p">]</span> <span class="o">=</span> <span class="nb">range</span><span class="p">(</span><span class="n">num_ps</span><span class="p">)</span>
<span class="n">cluster_template</span><span class="p">[</span><span class="s1">&#39;worker&#39;</span><span class="p">]</span> <span class="o">=</span> <span class="nb">range</span><span class="p">(</span><span class="n">num_ps</span><span class="p">,</span> <span class="n">num_executors</span><span class="p">)</span>
<span class="n">logging</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s2">&quot;worker node range </span><span class="si">%s</span><span class="s2">, ps node range </span><span class="si">%s</span><span class="s2">&quot;</span> <span class="o">%</span> <span class="p">(</span><span class="n">cluster_template</span><span class="p">[</span><span class="s1">&#39;worker&#39;</span><span class="p">],</span> <span class="n">cluster_template</span><span class="p">[</span><span class="s1">&#39;ps&#39;</span><span class="p">]))</span>

<span class="c1"># get default filesystem from spark</span>
<span class="n">defaultFS</span> <span class="o">=</span> <span class="n">sc</span><span class="o">.</span><span class="n">_jsc</span><span class="o">.</span><span class="n">hadoopConfiguration</span><span class="p">()</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">&quot;fs.defaultFS&quot;</span><span class="p">)</span>
Expand All @@ -281,7 +287,25 @@ <h1>Source code for tensorflowonspark.TFCluster</h1><div class="highlight"><pre>
<span class="s1">&#39;working_dir&#39;</span><span class="p">:</span> <span class="n">working_dir</span><span class="p">,</span>
<span class="s1">&#39;server_addr&#39;</span><span class="p">:</span> <span class="n">server_addr</span>
<span class="p">}</span>
<span class="n">nodeRDD</span> <span class="o">=</span> <span class="n">sc</span><span class="o">.</span><span class="n">parallelize</span><span class="p">(</span><span class="nb">range</span><span class="p">(</span><span class="n">num_executors</span><span class="p">),</span> <span class="n">num_executors</span><span class="p">)</span>
<span class="k">if</span> <span class="n">driver_ps_nodes</span><span class="p">:</span>
<span class="n">nodeRDD</span> <span class="o">=</span> <span class="n">sc</span><span class="o">.</span><span class="n">parallelize</span><span class="p">(</span><span class="nb">range</span><span class="p">(</span><span class="n">num_ps</span><span class="p">,</span> <span class="n">num_executors</span><span class="p">),</span> <span class="n">num_executors</span> <span class="o">-</span> <span class="n">num_ps</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">nodeRDD</span> <span class="o">=</span> <span class="n">sc</span><span class="o">.</span><span class="n">parallelize</span><span class="p">(</span><span class="nb">range</span><span class="p">(</span><span class="n">num_executors</span><span class="p">),</span> <span class="n">num_executors</span><span class="p">)</span>

<span class="k">if</span> <span class="n">driver_ps_nodes</span><span class="p">:</span>
<span class="k">def</span> <span class="nf">_start_ps</span><span class="p">(</span><span class="n">node_index</span><span class="p">):</span>
<span class="n">logging</span><span class="o">.</span><span class="n">info</span><span class="p">(</span><span class="s2">&quot;starting ps node locally </span><span class="si">%d</span><span class="s2">&quot;</span> <span class="o">%</span> <span class="n">node_index</span><span class="p">)</span>
<span class="n">TFSparkNode</span><span class="o">.</span><span class="n">run</span><span class="p">(</span><span class="n">map_fun</span><span class="p">,</span>
<span class="n">tf_args</span><span class="p">,</span>
<span class="n">cluster_meta</span><span class="p">,</span>
<span class="n">tensorboard</span><span class="p">,</span>
<span class="n">log_dir</span><span class="p">,</span>
<span class="n">queues</span><span class="p">,</span>
<span class="n">background</span><span class="o">=</span><span class="p">(</span><span class="n">input_mode</span> <span class="o">==</span> <span class="n">InputMode</span><span class="o">.</span><span class="n">SPARK</span><span class="p">))([</span><span class="n">node_index</span><span class="p">])</span>
<span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="n">cluster_template</span><span class="p">[</span><span class="s1">&#39;ps&#39;</span><span class="p">]:</span>
<span class="n">ps_thread</span> <span class="o">=</span> <span class="n">threading</span><span class="o">.</span><span class="n">Thread</span><span class="p">(</span><span class="n">target</span><span class="o">=</span><span class="k">lambda</span><span class="p">:</span> <span class="n">_start_ps</span><span class="p">(</span><span class="n">i</span><span class="p">))</span>
<span class="n">ps_thread</span><span class="o">.</span><span class="n">daemon</span> <span class="o">=</span> <span class="kc">True</span>
<span class="n">ps_thread</span><span class="o">.</span><span class="n">start</span><span class="p">()</span>

<span class="c1"># start TF on a background thread (on Spark driver) to allow for feeding job</span>
<span class="k">def</span> <span class="nf">_start</span><span class="p">():</span>
Expand All @@ -291,7 +315,7 @@ <h1>Source code for tensorflowonspark.TFCluster</h1><div class="highlight"><pre>
<span class="n">tensorboard</span><span class="p">,</span>
<span class="n">log_dir</span><span class="p">,</span>
<span class="n">queues</span><span class="p">,</span>
<span class="p">(</span><span class="n">input_mode</span> <span class="o">==</span> <span class="n">InputMode</span><span class="o">.</span><span class="n">SPARK</span><span class="p">)))</span>
<span class="n">background</span><span class="o">=</span><span class="p">(</span><span class="n">input_mode</span> <span class="o">==</span> <span class="n">InputMode</span><span class="o">.</span><span class="n">SPARK</span><span class="p">)))</span>
<span class="n">t</span> <span class="o">=</span> <span class="n">threading</span><span class="o">.</span><span class="n">Thread</span><span class="p">(</span><span class="n">target</span><span class="o">=</span><span class="n">_start</span><span class="p">)</span>
<span class="n">t</span><span class="o">.</span><span class="n">start</span><span class="p">()</span>

Expand Down Expand Up @@ -366,13 +390,13 @@ <h3>Navigation</h3>
<li class="right" >
<a href="../../py-modindex.html" title="Python Module Index"
>modules</a> |</li>
<li class="nav-item nav-item-0"><a href="../../index.html">TensorFlowOnSpark 1.1.0 documentation</a> &#187;</li>
<li class="nav-item nav-item-0"><a href="../../index.html">TensorFlowOnSpark 1.2.0 documentation</a> &#187;</li>
<li class="nav-item nav-item-1"><a href="../index.html" >Module code</a> &#187;</li>
</ul>
</div>
<div class="footer" role="contentinfo">
&#169; Copyright 2017, Yahoo Inc.
Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.6.5.
Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.6.7.
</div>
</body>
</html>
10 changes: 5 additions & 5 deletions docs/_modules/tensorflowonspark/TFManager.html
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,13 @@
<html xmlns="http://www.w3.org/1999/xhtml" lang="en">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
<title>tensorflowonspark.TFManager &#8212; TensorFlowOnSpark 1.1.0 documentation</title>
<title>tensorflowonspark.TFManager &#8212; TensorFlowOnSpark 1.2.0 documentation</title>
<link rel="stylesheet" href="../../_static/classic.css" type="text/css" />
<link rel="stylesheet" href="../../_static/pygments.css" type="text/css" />
<script type="text/javascript">
var DOCUMENTATION_OPTIONS = {
URL_ROOT: '../../',
VERSION: '1.1.0',
VERSION: '1.2.0',
COLLAPSE_INDEX: false,
FILE_SUFFIX: '.html',
HAS_SOURCE: true,
Expand All @@ -34,7 +34,7 @@ <h3>Navigation</h3>
<li class="right" >
<a href="../../py-modindex.html" title="Python Module Index"
>modules</a> |</li>
<li class="nav-item nav-item-0"><a href="../../index.html">TensorFlowOnSpark 1.1.0 documentation</a> &#187;</li>
<li class="nav-item nav-item-0"><a href="../../index.html">TensorFlowOnSpark 1.2.0 documentation</a> &#187;</li>
<li class="nav-item nav-item-1"><a href="../index.html" accesskey="U">Module code</a> &#187;</li>
</ul>
</div>
Expand Down Expand Up @@ -146,13 +146,13 @@ <h3>Navigation</h3>
<li class="right" >
<a href="../../py-modindex.html" title="Python Module Index"
>modules</a> |</li>
<li class="nav-item nav-item-0"><a href="../../index.html">TensorFlowOnSpark 1.1.0 documentation</a> &#187;</li>
<li class="nav-item nav-item-0"><a href="../../index.html">TensorFlowOnSpark 1.2.0 documentation</a> &#187;</li>
<li class="nav-item nav-item-1"><a href="../index.html" >Module code</a> &#187;</li>
</ul>
</div>
<div class="footer" role="contentinfo">
&#169; Copyright 2017, Yahoo Inc.
Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.6.5.
Created using <a href="http://sphinx-doc.org/">Sphinx</a> 1.6.7.
</div>
</body>
</html>
Loading