In [None]:
import json
import re
import execjs 

def extract_js_code_sections(text):
    # Use </response> and </tests> as delimiters
    code_blocks = re.findall(r"```js(.*?)```", text, re.DOTALL)
    if len(code_blocks) < 2:
        return None, None
    return code_blocks[0].strip(), code_blocks[1].strip()  # function, tests

def run_js_with_asserts(js_func: str, js_tests: str):
    full_code = js_func + "\n" + js_tests
    try:
        ctx = execjs.compile(full_code)
        return True
    except Exception as e:
        print("❌ JavaScript execution error:", e)
        return False

valid = 0
total = 0

with open("./data/i_to_r.jsonl") as f:
    for line in f:
        obj = json.loads(line)
        text = obj.get("response", "")
        func_code, test_code = extract_js_code_sections(text)
        if func_code and test_code:
            total += 1
            if run_js_with_asserts(func_code, test_code):
                valid += 1

print(f"✅ Validated: {valid}/{total}")

✅ Validated: 752/752


In [None]:
import json
import re
import execjs

def extract_js_code_sections(text):
    """Extract the first two ```js blocks: function and tests."""
    code_blocks = re.findall(r"```js(.*?)```", text, re.DOTALL)
    if len(code_blocks) < 2:
        return None, None
    return code_blocks[0].strip(), code_blocks[1].strip()

def run_js_with_asserts(js_func: str, js_tests: str):
    """Run JS code using execjs and return True/False."""
    full_code = js_func + "\n" + js_tests
    try:
        execjs.compile(full_code)
        return True
    except Exception as e:
        print("❌ JavaScript execution error:", e)
        return False

# Counters and collectors
valid = 0
total = 0
skipped = 0

valid_cases = []
skipped_cases = []

with open("./data/i_to_r.jsonl") as f:
    for line in f:
        obj = json.loads(line)
        text = obj.get("response", "")
        func_code, test_code = extract_js_code_sections(text)

        if func_code and test_code:
            total += 1
            if run_js_with_asserts(func_code, test_code):
                valid += 1
                valid_cases.append(obj)
            else:
                skipped += 1
                skipped_cases.append({
                    "reason": "JavaScript execution failed",
                    "instruction": obj.get("instruction", ""),
                    "response": text
                })
        else:
            skipped += 1
            reason = "Missing function and/or test block"
            if not func_code and not test_code:
                reason = "Missing both function and test"
            elif not func_code:
                reason = "Missing function block"
            elif not test_code:
                reason = "Missing test block"

            skipped_cases.append({
                "reason": reason,
                "instruction": obj.get("instruction", ""),
                "response": text
            })

# Output report
print(f"\n✅ Validated: {valid} / {total}")
print(f"⚠️ Skipped: {skipped}")

# Save passed and leftover examples
with open("./data/validated_passed.jsonl", "w") as out_passed:
    for obj in valid_cases:
        out_passed.write(json.dumps(obj) + "\n")

with open("./data/leftover_skipped.jsonl", "w") as out_skipped:
    for obj in skipped_cases:
        out_skipped.write(json.dumps(obj, ensure_ascii=False) + "\n")

print("\n📁 Output saved to:")
print(" - validated_passed.jsonl")
print(" - leftover_skipped.jsonl")


✅ Validated: 752 / 752
⚠️ Skipped: 86

📁 Output saved to:
 - validated_passed.jsonl
 - leftover_skipped.jsonl
