In [2]:
#!/usr/bin/env python
import os
from bs4 import BeautifulSoup
from urllib import request
import sys
import json
from threading import Thread
import queue
from warnings import warn

with open('languages.json') as f:
    LANGUAGES = json.load(f)

LEGAL_CHARS = 'abcdefghijklmnopqrstuvwxyz01234567890_-'
LAST_PROB = 640
URL_ROOT = 'http://projecteuler.net/problem={}'
THREADS = 5

# out_path, language = sys.argv[1:]

out_path = 'tst'
language = 'javascript'

ext, comment = LANGUAGES[language]

print('Ensuring path exists: {}'.format(out_path))
os.makedirs(out_path, exist_ok=True)

def fetch_problem(num):
    print('Fetching problem {} of {}...'.format(num, LAST_PROB))
    url = URL_ROOT.format(num)

    soup = BeautifulSoup(request.urlopen(url).read().decode())

    title = soup.find_all('h2')[0].text
    file_title = ''.join(char for char in '_'.join(title.split()).lower() if char in LEGAL_CHARS)

    filepath = os.path.join(out_path, '{:03}-{}{}'.format(num, file_title, ext))

    content = soup.find_all('div', {'class': 'problem_content'})[0]

    lines = [line for line in content.text.split('\n') if line]

    comment_text = ['{} {}'.format(comment, line) for line in [title.upper(), url] + lines]

    if os.path.exists(filepath):
        warn('File {} already exists: skipping.'.format(filepath))
        return

    # print('\tSaving problem at {}'.format(filepath))

    with open(filepath, 'w') as f:
        f.write('\n'.join(comment_text))


def worker():
    while True:
        item = q.get()
        if item is None:
            break
        fetch_problem(item)
        q.task_done()

q = queue.Queue()
threads = []
for i in range(THREADS):
    t = Thread(target=worker)
    t.start()
    threads.append(t)

for item in range(1, LAST_PROB+1):
    q.put(item)

# block until all tasks are done
q.join()

# stop workers
for _ in range(THREADS):
    q.put(None)
for t in threads:
    t.join()


Ensuring path exists: tst
Fetching problem 1 of 640...
Fetching problem 2 of 640...
Fetching problem 3 of 640...
Fetching problem 4 of 640...
Fetching problem 5 of 640...
Fetching problem 6 of 640...
Fetching problem 7 of 640...
Fetching problem 8 of 640...
Fetching problem 9 of 640...
Fetching problem 10 of 640...
Fetching problem 11 of 640...Fetching problem 12 of 640...

Fetching problem 13 of 640...
Fetching problem 14 of 640...
Fetching problem 15 of 640...
Fetching problem 16 of 640...
Fetching problem 17 of 640...
Fetching problem 18 of 640...
Fetching problem 19 of 640...
Fetching problem 20 of 640...
Fetching problem 21 of 640...
Fetching problem 22 of 640...
Fetching problem 23 of 640...
Fetching problem 24 of 640...
Fetching problem 25 of 640...
Fetching problem 26 of 640...
Fetching problem 27 of 640...
Fetching problem 28 of 640...
Fetching problem 29 of 640...
Fetching problem 30 of 640...
Fetching problem 31 of 640...
Fetching problem 32 of 640...
Fetching problem 33 of 

Fetching problem 268 of 640...
Fetching problem 269 of 640...
Fetching problem 270 of 640...
Fetching problem 271 of 640...
Fetching problem 272 of 640...
Fetching problem 273 of 640...
Fetching problem 274 of 640...
Fetching problem 275 of 640...
Fetching problem 276 of 640...
Fetching problem 277 of 640...
Fetching problem 278 of 640...
Fetching problem 279 of 640...
Fetching problem 280 of 640...
Fetching problem 281 of 640...
Fetching problem 282 of 640...
Fetching problem 283 of 640...
Fetching problem 284 of 640...
Fetching problem 285 of 640...
Fetching problem 286 of 640...
Fetching problem 287 of 640...
Fetching problem 288 of 640...
Fetching problem 289 of 640...
Fetching problem 290 of 640...Fetching problem 291 of 640...

Fetching problem 292 of 640...
Fetching problem 293 of 640...
Fetching problem 294 of 640...
Fetching problem 295 of 640...
Fetching problem 296 of 640...
Fetching problem 297 of 640...
Fetching problem 298 of 640...
Fetching problem 299 of 640...
Fetching

Fetching problem 534 of 640...
Fetching problem 535 of 640...
Fetching problem 536 of 640...
Fetching problem 537 of 640...
Fetching problem 538 of 640...
Fetching problem 539 of 640...
Fetching problem 540 of 640...
Fetching problem 541 of 640...
Fetching problem 542 of 640...
Fetching problem 543 of 640...
Fetching problem 544 of 640...
Fetching problem 545 of 640...
Fetching problem 546 of 640...
Fetching problem 547 of 640...
Fetching problem 548 of 640...
Fetching problem 549 of 640...
Fetching problem 550 of 640...
Fetching problem 551 of 640...
Fetching problem 552 of 640...
Fetching problem 553 of 640...
Fetching problem 554 of 640...
Fetching problem 555 of 640...
Fetching problem 556 of 640...
Fetching problem 557 of 640...
Fetching problem 558 of 640...
Fetching problem 559 of 640...
Fetching problem 560 of 640...
Fetching problem 561 of 640...Fetching problem 562 of 640...

Fetching problem 563 of 640...
Fetching problem 564 of 640...
Fetching problem 565 of 640...
Fetching