Skip to content

Commit

Permalink
initial work on #228
Browse files Browse the repository at this point in the history
  • Loading branch information
Maarten-vd-Sande committed Dec 17, 2019
1 parent bb41a05 commit 3758408
Showing 1 changed file with 22 additions and 0 deletions.
22 changes: 22 additions & 0 deletions rules/configuration.smk
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,28 @@ except FileNotFoundError:
layout_cache = {}


for sample in [sample for sample in samples.index if sample not in layout_cache]:
import urllib.request
import requests
from bs4 import BeautifulSoup
url = f"https://www.ncbi.nlm.nih.gov/sra/?term={sample}"

conn = urllib.request.urlopen(url)
html = conn.read()

soup = BeautifulSoup(html, features="html5lib")
links = soup.find_all('a')

for tag in links:
link = tag.get('href',None)
if link is not None and 'SRR' in link:
trace_conn = urllib.request.urlopen("https:" + link)
trace_html = trace_conn.read()
x = re.search("This run has (\d) read", str(trace_html))
print(x.group(1))


assert False
tp = ThreadPool(config.get('ncbi_requests', 3) // 2)
config['layout'] = {}

Expand Down

0 comments on commit 3758408

Please sign in to comment.