In [99]:
import requests

In [100]:
base_url = 'https://codingbat.com'

In [101]:
url = 'https://codingbat.com/java/Recursion-1'
page = requests.get(url)

In [102]:
from bs4 import BeautifulSoup

In [103]:
soup = BeautifulSoup(page.content, 'html.parser')

In [104]:
nodes = soup.find('div', class_='tabin').find('table').findAll('a')
links = list(map(lambda node: (node.text, f"{base_url}{node['href']}"), nodes))
links

[('factorial', 'https://codingbat.com/prob/p154669'),
 ('bunnyEars', 'https://codingbat.com/prob/p183649'),
 ('fibonacci', 'https://codingbat.com/prob/p120015'),
 ('bunnyEars2', 'https://codingbat.com/prob/p107330'),
 ('triangle', 'https://codingbat.com/prob/p194781'),
 ('sumDigits', 'https://codingbat.com/prob/p163932'),
 ('count7', 'https://codingbat.com/prob/p101409'),
 ('count8', 'https://codingbat.com/prob/p192383'),
 ('powerN', 'https://codingbat.com/prob/p158888'),
 ('countX', 'https://codingbat.com/prob/p170371'),
 ('countHi', 'https://codingbat.com/prob/p184029'),
 ('changeXY', 'https://codingbat.com/prob/p101372'),
 ('changePi', 'https://codingbat.com/prob/p170924'),
 ('noX', 'https://codingbat.com/prob/p118230'),
 ('array6', 'https://codingbat.com/prob/p108997'),
 ('array11', 'https://codingbat.com/prob/p135988'),
 ('array220', 'https://codingbat.com/prob/p173469'),
 ('allStar', 'https://codingbat.com/prob/p183394'),
 ('pairStar', 'https://codingbat.com/prob/p158175'),
 ('en

In [183]:
from bs4.element import NavigableString

class Problem:
    def __init__(self, url, name, statement, code, tests):
        self.url = url
        self.name = name
        self.statement = statement
        self.code = code
        self.tests = tests
        

def parse_problem(name, url):
    page = requests.get(url)
    soup = BeautifulSoup(page.content, 'html.parser')

    statement = soup.find('p', class_='max2').get_text('\n', strip=True)
    
    node = soup.find('p', class_='max2').next.next
    tests = []
    while node.name != 'p':
        if isinstance(node, NavigableString):
            tests.append(str(node))
        node = node.next
        
    code = soup.find('form', {'name': 'codeform'}).get_text('\n', strip=True)
    return Problem(url, name, statement, code, tests)
    

In [184]:
problems = list(map(lambda problem: parse_problem(*problem), links))

In [185]:
len(problems)

30

In [186]:
import re
import keyword

KEYWORDS = set(keyword.kwlist)
NAME_LOOKUP = {'str': 's'}
TYPE_LOOKUP = {'String': 'str', 'boolean': 'bool', 'int[]': 'List[int]'}


def fix_name(name):
    if name in KEYWORDS:
        return f'{name}_'
    
    return NAME_LOOKUP.get(name, name)


def fix_type(type_name):
    return TYPE_LOOKUP.get(type_name, type_name)


def parse_args(args):
    res = []
    for token in args.split(','):
        type_, name = token.split()
        res.append(f'{fix_name(name)}: {fix_type(type_)}')
    
    return res
    

def java_to_python(code):
    match = re.match(f'public ([\w\d]+) ([\w\d_]+)\((.*?)\)', code)
    return_type, name, args = fix_type(match[1]), fix_name(match[2]), match[3]
    args = parse_args(args)
    
    return f'def {name}({", ".join(args)}) -> {return_type}:'

In [191]:
TAB = '    '

def break_lines(lines, max_len):
    res = []
    for line in lines:
        words = []
        line_len = 0
        for word in line.split():
            if line_len > 0:
                line_len += 1
            line_len += len(word)
            
            if line_len > max_len:
                res.append(' '.join(words))
                words = [word]
                line_len = len(word)
            else:
                words.append(word)
        
        if words:
            res.append(' '.join(words))
            
    return res

def format_statement(statement, max_len=116):
    lines = break_lines(statement.split('\n'), max_len)
    return '\n'.join(f'{TAB}{line}' for line in lines)
    

In [192]:
def export_problem(problem, filename):
    declaration = java_to_python(problem.code)
    statement =  format_statement('"""\n' + problem.statement + '\n\n' + '\n'.join(problem.tests) + '\n"""')
    
    lines = []
    lines.append('from unittest import TestCase')
    if re.search(r'List\[.*?\]', declaration):
        lines.append('from typing import List')
    lines.append('')
    lines.append('')
        
    lines.append(declaration)
    lines.append(statement)
    lines.append(TAB + 'pass')
    
    lines.append('')
    lines.append('')
    lines.append('class Test(TestCase):')
    lines.append(TAB + 'pass')
    lines.append('')

    with open(filename, 'w') as f:
        f.write('\n'.join(lines))


for idx, problem in enumerate(problems):
    filename = f'{idx:02d}.{problem.name}.py'
    print(f'Exporting {filename}...')
    export_problem(problem, filename)


Exporting 00.factorial.py...
Exporting 01.bunnyEars.py...
Exporting 02.fibonacci.py...
Exporting 03.bunnyEars2.py...
Exporting 04.triangle.py...
Exporting 05.sumDigits.py...
Exporting 06.count7.py...
Exporting 07.count8.py...
Exporting 08.powerN.py...
Exporting 09.countX.py...
Exporting 10.countHi.py...
Exporting 11.changeXY.py...
Exporting 12.changePi.py...
Exporting 13.noX.py...
Exporting 14.array6.py...
Exporting 15.array11.py...
Exporting 16.array220.py...
Exporting 17.allStar.py...
Exporting 18.pairStar.py...
Exporting 19.endX.py...
Exporting 20.countPairs.py...
Exporting 21.countAbc.py...
Exporting 22.count11.py...
Exporting 23.stringClean.py...
Exporting 24.countHi2.py...
Exporting 25.parenBit.py...
Exporting 26.nestParen.py...
Exporting 27.strCount.py...
Exporting 28.strCopies.py...
Exporting 29.strDist.py...
