In [1]:
import re

In [2]:
def validate_tags(response):
    # Ensure at least one type of tag exists
    if '<fact>' not in response and '<commentary>' not in response:
        return False
    
    # Extract all tags in order of appearance
    tag_pattern = re.compile(r'<fact>|</fact>|<commentary>|</commentary>')
    tags = tag_pattern.findall(response)
    
    # If no tags found after regex search, validation fails
    if not tags:
        return False
    
    # Use stack to check proper nesting and pairing
    stack = []
    
    for tag in tags:
        if tag == '<fact>':
            # Can't have nested facts or facts inside commentary
            if stack and (stack[-1] == '<fact>' or stack[-1] == '<commentary>'):
                return False
            stack.append(tag)
        
        elif tag == '</fact>':
            # Closing tag must match most recent opening tag
            if not stack or stack[-1] != '<fact>':
                return False
            stack.pop()
        
        elif tag == '<commentary>':
            # Can't have nested commentary or commentary inside facts
            if stack and (stack[-1] == '<commentary>' or stack[-1] == '<fact>'):
                return False
            stack.append(tag)
        
        elif tag == '</commentary>':
            # Closing tag must match most recent opening tag
            if not stack or stack[-1] != '<commentary>':
                return False
            stack.pop()
    
    # Stack should be empty if all tags are properly paired
    return len(stack) == 0

In [3]:
def test_validate_tags():
    # Test cases with expected results
    test_cases = [
        # Valid cases
        ("<fact>This is a fact.</fact>", True),
        ("<commentary>This is a commentary.</commentary>", True),
        ("<fact>Fact 1</fact><commentary>Commentary 1</commentary>", True),
        ("<commentary>Commentary 1</commentary><fact>Fact 1</fact>", True),
        # TODO: Check if this is something we want to allow (it was part of the original format)
        ("<fact>Fact 1</fact>Some text<commentary>Commentary 1</commentary>", True),
        ("<fact>Fact 1</fact><fact>Fact 2</fact>", True),
        ("<commentary>Commentary 1</commentary><commentary>Commentary 2</commentary>", True),
        
        # Invalid cases - missing tags
        ("No tags at all", False),
        
        # Invalid cases - unbalanced tags
        ("<fact>Unclosed fact", False),
        ("<fact>Fact 1</fact><fact>Unclosed fact", False),
        ("</fact>Closing without opening", False),
        ("<commentary>Unclosed commentary", False),
        ("</commentary>Closing without opening", False),
        
        # Invalid cases - overlapping tags
        ("<fact><commentary>Overlapping</fact></commentary>", False),
        ("<commentary><fact>Overlapping</commentary></fact>", False),
        
        # Invalid cases - nested tags
        ("<fact>Outer <fact>Inner</fact> fact</fact>", False),
        ("<commentary>Outer <commentary>Inner</commentary> commentary</commentary>", False),
        ("<fact>Fact with <commentary>nested commentary</commentary></fact>", False),
        ("<commentary>Commentary with <fact>nested fact</fact></commentary>", False),
        
        # Complex cases
        ("<fact>Fact 1</fact><commentary>Commentary 1</commentary><fact>Fact 2</fact>", True),
        ("<fact>F1</fact>Text<commentary>C1</commentary>Text<fact>F2</fact>Text<commentary>C2</commentary>", True),
        ("Text<fact>F1</fact>Text<commentary>C1</commentary>Text", True),
        ("<fact>F1</fact></commentary>Unbalanced closing tag", False),
        ("<fact>F1</commentary>Wrong closing tag", False),
    ]
    
    # Run tests
    for i, (test_string, expected) in enumerate(test_cases, 1):
        result = validate_tags(test_string)
        if result == expected:
            print(f"Test {i}: PASS")
        else:
            print(f"Test {i}: FAIL - Expected {expected}, got {result}")
            print(f"  Input: {test_string}")
    
    print("All tests completed.")

# Run the test function
test_validate_tags()

Test 1: PASS
Test 2: PASS
Test 3: PASS
Test 4: PASS
Test 5: PASS
Test 6: PASS
Test 7: PASS
Test 8: PASS
Test 9: PASS
Test 10: PASS
Test 11: PASS
Test 12: PASS
Test 13: PASS
Test 14: PASS
Test 15: PASS
Test 16: PASS
Test 17: PASS
Test 18: PASS
Test 19: PASS
Test 20: PASS
Test 21: PASS
Test 22: PASS
Test 23: PASS
Test 24: PASS
All tests completed.


In [1]:
log_file_path = "/data/synthetic_data/exp/logs/orch_v2_lmunit_serve_1410138.log"


In [4]:
from loguru import logger
import re
with open(log_file_path, "r", encoding="utf-8") as f:
    log_content = f.read()
    if "0.8.3" in "0.8.3":
        logger.info("vLLM version is 0.8.3")
        if "Application startup complete" in log_content:
            match = re.search(
                r"Starting server on port:+\s*(\d+)", log_content
            )
            assert match is not None, "No match found for vLLM version 0.8.3"
            x =  match.group(1) if match else None

        elif "Uvicorn running on" in log_content:
            match = re.search(
                r"Uvicorn running on http://0\.0\.0\.0:(\d+)", log_content
            )
            assert match is not None, "No match found for vLLM version 0.8.3"
            x =  match.group(1) if match else None
        else:
            x = None

[32m2025-05-07 22:43:59.213[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m6[0m - [1mvLLM version is 0.8.3[0m


In [3]:
log_content

