Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -324,9 +324,13 @@ <h1>Source code for gen3.tools.indexing.download_manifest</h1><div class="highli
<span class="k">for</span> <span class="n">record</span> <span class="ow">in</span> <span class="nb">list</span><span class="p">(</span><span class="n">records</span><span class="p">):</span>
<span class="n">manifest_row</span> <span class="o">=</span> <span class="p">[</span>
<span class="n">record</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">&quot;did&quot;</span><span class="p">),</span>
<span class="s2">&quot; &quot;</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">record</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">&quot;urls&quot;</span><span class="p">)),</span>
<span class="s2">&quot; &quot;</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">record</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">&quot;authz&quot;</span><span class="p">)),</span>
<span class="s2">&quot; &quot;</span><span class="o">.</span><span class="n">join</span><span class="p">(</span><span class="n">record</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">&quot;acl&quot;</span><span class="p">)),</span>
<span class="s2">&quot; &quot;</span><span class="o">.</span><span class="n">join</span><span class="p">(</span>
<span class="p">[</span><span class="n">url</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s2">&quot; &quot;</span><span class="p">,</span> <span class="s2">&quot;%20&quot;</span><span class="p">)</span> <span class="k">for</span> <span class="n">url</span> <span class="ow">in</span> <span class="n">record</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">&quot;urls&quot;</span><span class="p">)]</span>
<span class="p">),</span>
<span class="s2">&quot; &quot;</span><span class="o">.</span><span class="n">join</span><span class="p">(</span>
<span class="p">[</span><span class="n">auth</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s2">&quot; &quot;</span><span class="p">,</span> <span class="s2">&quot;%20&quot;</span><span class="p">)</span> <span class="k">for</span> <span class="n">auth</span> <span class="ow">in</span> <span class="n">record</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">&quot;authz&quot;</span><span class="p">)]</span>
<span class="p">),</span>
<span class="s2">&quot; &quot;</span><span class="o">.</span><span class="n">join</span><span class="p">([</span><span class="n">a</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s2">&quot; &quot;</span><span class="p">,</span> <span class="s2">&quot;%20&quot;</span><span class="p">)</span> <span class="k">for</span> <span class="n">a</span> <span class="ow">in</span> <span class="n">record</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">&quot;acl&quot;</span><span class="p">)]),</span>
<span class="n">record</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">&quot;hashes&quot;</span><span class="p">,</span> <span class="p">{})</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">&quot;md5&quot;</span><span class="p">),</span>
<span class="n">record</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">&quot;size&quot;</span><span class="p">),</span>
<span class="n">record</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s2">&quot;file_name&quot;</span><span class="p">),</span>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -267,15 +267,15 @@ <h1>Source code for gen3.tools.indexing.index_manifest</h1><div class="highlight
<span class="k">try</span><span class="p">:</span>
<span class="n">urls</span> <span class="o">=</span> <span class="p">(</span>
<span class="p">[</span>
<span class="n">element</span><span class="o">.</span><span class="n">strip</span><span class="p">()</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s2">&quot;&#39;&quot;</span><span class="p">,</span> <span class="s2">&quot;&quot;</span><span class="p">)</span>
<span class="n">element</span><span class="o">.</span><span class="n">strip</span><span class="p">()</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s2">&quot;&#39;&quot;</span><span class="p">,</span> <span class="s2">&quot;&quot;</span><span class="p">)</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s2">&quot;%20&quot;</span><span class="p">,</span> <span class="s2">&quot; &quot;</span><span class="p">)</span>
<span class="k">for</span> <span class="n">element</span> <span class="ow">in</span> <span class="n">_standardize_str</span><span class="p">(</span><span class="n">fi</span><span class="p">[</span><span class="s2">&quot;url&quot;</span><span class="p">])</span><span class="o">.</span><span class="n">strip</span><span class="p">()[</span><span class="mi">1</span><span class="p">:</span><span class="o">-</span><span class="mi">1</span><span class="p">]</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s2">&quot; &quot;</span><span class="p">)</span>
<span class="p">]</span>
<span class="k">if</span> <span class="s2">&quot;url&quot;</span> <span class="ow">in</span> <span class="n">fi</span> <span class="ow">and</span> <span class="n">fi</span><span class="p">[</span><span class="s2">&quot;url&quot;</span><span class="p">]</span> <span class="o">!=</span> <span class="s2">&quot;[]&quot;</span>
<span class="k">else</span> <span class="p">[]</span>
<span class="p">)</span>
<span class="n">authz</span> <span class="o">=</span> <span class="p">(</span>
<span class="p">[</span>
<span class="n">element</span><span class="o">.</span><span class="n">strip</span><span class="p">()</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s2">&quot;&#39;&quot;</span><span class="p">,</span> <span class="s2">&quot;&quot;</span><span class="p">)</span>
<span class="n">element</span><span class="o">.</span><span class="n">strip</span><span class="p">()</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s2">&quot;&#39;&quot;</span><span class="p">,</span> <span class="s2">&quot;&quot;</span><span class="p">)</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s2">&quot;%20&quot;</span><span class="p">,</span> <span class="s2">&quot; &quot;</span><span class="p">)</span>
<span class="k">for</span> <span class="n">element</span> <span class="ow">in</span> <span class="n">_standardize_str</span><span class="p">(</span><span class="n">fi</span><span class="p">[</span><span class="s2">&quot;authz&quot;</span><span class="p">])</span><span class="o">.</span><span class="n">strip</span><span class="p">()[</span><span class="mi">1</span><span class="p">:</span><span class="o">-</span><span class="mi">1</span><span class="p">]</span><span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s2">&quot; &quot;</span><span class="p">)</span>
<span class="p">]</span>
<span class="k">if</span> <span class="s2">&quot;authz&quot;</span> <span class="ow">in</span> <span class="n">fi</span> <span class="ow">and</span> <span class="n">fi</span><span class="p">[</span><span class="s2">&quot;authz&quot;</span><span class="p">]</span> <span class="o">!=</span> <span class="s2">&quot;[]&quot;</span>
Expand All @@ -288,7 +288,7 @@ <h1>Source code for gen3.tools.indexing.index_manifest</h1><div class="highlight
<span class="k">else</span><span class="p">:</span>
<span class="n">acl</span> <span class="o">=</span> <span class="p">(</span>
<span class="p">[</span>
<span class="n">element</span><span class="o">.</span><span class="n">strip</span><span class="p">()</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s2">&quot;&#39;&quot;</span><span class="p">,</span> <span class="s2">&quot;&quot;</span><span class="p">)</span>
<span class="n">element</span><span class="o">.</span><span class="n">strip</span><span class="p">()</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s2">&quot;&#39;&quot;</span><span class="p">,</span> <span class="s2">&quot;&quot;</span><span class="p">)</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s2">&quot;%20&quot;</span><span class="p">,</span> <span class="s2">&quot; &quot;</span><span class="p">)</span>
<span class="k">for</span> <span class="n">element</span> <span class="ow">in</span> <span class="n">_standardize_str</span><span class="p">(</span><span class="n">fi</span><span class="p">[</span><span class="s2">&quot;acl&quot;</span><span class="p">])</span>
<span class="o">.</span><span class="n">strip</span><span class="p">()[</span><span class="mi">1</span><span class="p">:</span><span class="o">-</span><span class="mi">1</span><span class="p">]</span>
<span class="o">.</span><span class="n">split</span><span class="p">(</span><span class="s2">&quot; &quot;</span><span class="p">)</span>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -404,7 +404,7 @@ <h1>Source code for gen3.tools.indexing.verify_manifest</h1><div class="highligh
<span class="n">output</span> <span class="o">=</span> <span class="sa">f</span><span class="s2">&quot;</span><span class="si">{</span><span class="n">guid</span><span class="si">}</span><span class="s2">|md5|expected </span><span class="si">{</span><span class="n">md5</span><span class="si">}</span><span class="s2">|actual </span><span class="si">{</span><span class="n">actual_record</span><span class="p">[</span><span class="s1">&#39;hashes&#39;</span><span class="p">]</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="s1">&#39;md5&#39;</span><span class="p">)</span><span class="si">}</span><span class="se">\n</span><span class="s2">&quot;</span>
<span class="k">await</span> <span class="n">output_queue</span><span class="o">.</span><span class="n">put</span><span class="p">(</span><span class="n">output</span><span class="p">)</span>
<span class="n">logging</span><span class="o">.</span><span class="n">error</span><span class="p">(</span><span class="n">output</span><span class="p">)</span>

<span class="n">urls</span> <span class="o">=</span> <span class="p">[</span><span class="n">url</span><span class="o">.</span><span class="n">replace</span><span class="p">(</span><span class="s2">&quot;%20&quot;</span><span class="p">,</span> <span class="s2">&quot; &quot;</span><span class="p">)</span> <span class="k">for</span> <span class="n">url</span> <span class="ow">in</span> <span class="n">urls</span><span class="p">]</span>
<span class="k">if</span> <span class="nb">sorted</span><span class="p">(</span><span class="n">urls</span><span class="p">)</span> <span class="o">!=</span> <span class="nb">sorted</span><span class="p">(</span><span class="n">actual_record</span><span class="p">[</span><span class="s2">&quot;urls&quot;</span><span class="p">]):</span>
<span class="n">output</span> <span class="o">=</span> <span class="sa">f</span><span class="s2">&quot;</span><span class="si">{</span><span class="n">guid</span><span class="si">}</span><span class="s2">|urls|expected </span><span class="si">{</span><span class="n">urls</span><span class="si">}</span><span class="s2">|actual </span><span class="si">{</span><span class="n">actual_record</span><span class="p">[</span><span class="s1">&#39;urls&#39;</span><span class="p">]</span><span class="si">}</span><span class="se">\n</span><span class="s2">&quot;</span>
<span class="k">await</span> <span class="n">output_queue</span><span class="o">.</span><span class="n">put</span><span class="p">(</span><span class="n">output</span><span class="p">)</span>
Expand Down
2 changes: 1 addition & 1 deletion docs/_build/html/tools/indexing.html
Original file line number Diff line number Diff line change
Expand Up @@ -312,7 +312,7 @@ <h1>Indexing Tools<a class="headerlink" href="#indexing-tools" title="Permalink

<dl class="py function">
<dt id="gen3.tools.indexing.verify_manifest.async_verify_object_manifest">
<em class="property">async </em><code class="sig-prename descclassname">gen3.tools.indexing.verify_manifest.</code><code class="sig-name descname">async_verify_object_manifest</code><span class="sig-paren">(</span><em class="sig-param">commons_url</em>, <em class="sig-param">manifest_file</em>, <em class="sig-param">max_concurrent_requests=24</em>, <em class="sig-param">manifest_row_parsers={'acl': &lt;function _get_acl_from_row&gt;</em>, <em class="sig-param">'authz': &lt;function _get_authz_from_row&gt;</em>, <em class="sig-param">'file_name': &lt;function _get_file_name_from_row&gt;</em>, <em class="sig-param">'file_size': &lt;function _get_file_size_from_row&gt;</em>, <em class="sig-param">'guid': &lt;function _get_guid_from_row&gt;</em>, <em class="sig-param">'md5': &lt;function _get_md5_from_row&gt;</em>, <em class="sig-param">'urls': &lt;function _get_urls_from_row&gt;}</em>, <em class="sig-param">manifest_file_delimiter=None</em>, <em class="sig-param">output_filename='verify-manifest-errors-1588359228.0173433.log'</em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/gen3/tools/indexing/verify_manifest.html#async_verify_object_manifest"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#gen3.tools.indexing.verify_manifest.async_verify_object_manifest" title="Permalink to this definition">¶</a></dt>
<em class="property">async </em><code class="sig-prename descclassname">gen3.tools.indexing.verify_manifest.</code><code class="sig-name descname">async_verify_object_manifest</code><span class="sig-paren">(</span><em class="sig-param">commons_url</em>, <em class="sig-param">manifest_file</em>, <em class="sig-param">max_concurrent_requests=24</em>, <em class="sig-param">manifest_row_parsers={'acl': &lt;function _get_acl_from_row&gt;</em>, <em class="sig-param">'authz': &lt;function _get_authz_from_row&gt;</em>, <em class="sig-param">'file_name': &lt;function _get_file_name_from_row&gt;</em>, <em class="sig-param">'file_size': &lt;function _get_file_size_from_row&gt;</em>, <em class="sig-param">'guid': &lt;function _get_guid_from_row&gt;</em>, <em class="sig-param">'md5': &lt;function _get_md5_from_row&gt;</em>, <em class="sig-param">'urls': &lt;function _get_urls_from_row&gt;}</em>, <em class="sig-param">manifest_file_delimiter=None</em>, <em class="sig-param">output_filename='verify-manifest-errors-1589556920.2149627.log'</em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/gen3/tools/indexing/verify_manifest.html#async_verify_object_manifest"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#gen3.tools.indexing.verify_manifest.async_verify_object_manifest" title="Permalink to this definition">¶</a></dt>
<dd><p>Verify all file object records into a manifest csv</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
Expand Down
2 changes: 1 addition & 1 deletion docs/_build/html/tools/metadata.html
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ <h1>Metadata Tools<a class="headerlink" href="#metadata-tools" title="Permalink

<dl class="py function">
<dt id="gen3.tools.metadata.ingest_manifest.async_ingest_metadata_manifest">
<em class="property">async </em><code class="sig-prename descclassname">gen3.tools.metadata.ingest_manifest.</code><code class="sig-name descname">async_ingest_metadata_manifest</code><span class="sig-paren">(</span><em class="sig-param">commons_url</em>, <em class="sig-param">manifest_file</em>, <em class="sig-param">metadata_source</em>, <em class="sig-param">auth=None</em>, <em class="sig-param">max_concurrent_requests=24</em>, <em class="sig-param">manifest_row_parsers={'guid_for_row': &lt;function _get_guid_for_row&gt;</em>, <em class="sig-param">'indexed_file_object_guid': &lt;function _query_for_associated_indexd_record_guid&gt;}</em>, <em class="sig-param">manifest_file_delimiter=None</em>, <em class="sig-param">output_filename='ingest-metadata-manifest-errors-1588359228.3701751.log'</em>, <em class="sig-param">get_guid_from_file=True</em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/gen3/tools/metadata/ingest_manifest.html#async_ingest_metadata_manifest"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#gen3.tools.metadata.ingest_manifest.async_ingest_metadata_manifest" title="Permalink to this definition">¶</a></dt>
<em class="property">async </em><code class="sig-prename descclassname">gen3.tools.metadata.ingest_manifest.</code><code class="sig-name descname">async_ingest_metadata_manifest</code><span class="sig-paren">(</span><em class="sig-param">commons_url</em>, <em class="sig-param">manifest_file</em>, <em class="sig-param">metadata_source</em>, <em class="sig-param">auth=None</em>, <em class="sig-param">max_concurrent_requests=24</em>, <em class="sig-param">manifest_row_parsers={'guid_for_row': &lt;function _get_guid_for_row&gt;</em>, <em class="sig-param">'indexed_file_object_guid': &lt;function _query_for_associated_indexd_record_guid&gt;}</em>, <em class="sig-param">manifest_file_delimiter=None</em>, <em class="sig-param">output_filename='ingest-metadata-manifest-errors-1589556920.5896738.log'</em>, <em class="sig-param">get_guid_from_file=True</em><span class="sig-paren">)</span><a class="reference internal" href="../_modules/gen3/tools/metadata/ingest_manifest.html#async_ingest_metadata_manifest"><span class="viewcode-link">[source]</span></a><a class="headerlink" href="#gen3.tools.metadata.ingest_manifest.async_ingest_metadata_manifest" title="Permalink to this definition">¶</a></dt>
<dd><p>Ingest all metadata records into a manifest csv</p>
<dl class="field-list simple">
<dt class="field-odd">Parameters</dt>
Expand Down
10 changes: 7 additions & 3 deletions gen3/tools/indexing/download_manifest.py
Original file line number Diff line number Diff line change
Expand Up @@ -291,9 +291,13 @@ async def _parse_from_queue(queue):
for record in list(records):
manifest_row = [
record.get("did"),
" ".join(record.get("urls")),
" ".join(record.get("authz")),
" ".join(record.get("acl")),
" ".join(
[url.replace(" ", "%20") for url in record.get("urls")]
),
" ".join(
[auth.replace(" ", "%20") for auth in record.get("authz")]
),
" ".join([a.replace(" ", "%20") for a in record.get("acl")]),
record.get("hashes", {}).get("md5"),
record.get("size"),
record.get("file_name"),
Expand Down
Loading