In [None]:
import re

# Explanation of the core snippet, repeated for each tag:
# <rubric>\s*(\S.*?\S|\S)\s*</rubric>
#
# - \s* allows leading whitespace inside the tag before the real content
# - (\S.*?\S|\S) ensures there's at least one non-whitespace character:
#     - \S.*?\S means there's a non-whitespace at the start and at the end (with anything in-between).
#     - |\S covers the case of a single-character non-whitespace (so it doesn't force two).
# - \s* then allows trailing whitespace (e.g. <rubric>   content   </rubric>)
#
# We wrap these in  .*?  (non-greedy, DOTALL) to allow anything around/between the tags.

pattern = re.compile(
    r'^'
    r'.*?<rubric>\s*(\S.*?\S|\S)\s*</rubric>'
    r'.*?<eval>\s*(\S.*?\S|\S)\s*</eval>'
    r'.*?<answer>\s*(\S.*?\S|\S)\s*</answer>'
    r'.*?$',
    re.DOTALL
)

def validate_string_format(s: str) -> bool:
    """
    Returns True if 's' contains (in order):
      - <rubric> with at least one non-whitespace char
      - <eval> with at least one non-whitespace char
      - <answer> with at least one non-whitespace char
    and potentially anything before/between/after those blocks.
    Otherwise returns False.
    """
    return bool(pattern.match(s))


# ------------------------------------------------------------------------
# Example usage:

example_1 = """Some leading text
<rubric>   This is my rubric   </rubric>
random stuff ...
<eval> More content! </eval>   plus more text ...
<answer>  final answer here </answer>
and trailing text
"""

example_2 = """<rubric>   </rubric>  # no real content
<eval>some eval</eval>
<answer>some answer</answer>
"""

print("Example 1:", validate_string_format(example_1))  # EXPECT True (valid)
print("Example 2:", validate_string_format(example_2))  # EXPECT False (rubric block has only whitespace)


In [None]:
import re

pattern = re.compile(
    r'^'
    # optional whitespace, then <rubric> with non-whitespace content
    r'\s*<rubric>\s*(\S(?:.*?\S)?)\s*</rubric>'
    # only whitespace allowed between </rubric> and <eval>
    r'\s*<eval>\s*(\S(?:.*?\S)?)\s*</eval>'
    # only whitespace allowed between </eval> and <answer>
    r'\s*<answer>\s*(\S(?:.*?\S)?)\s*</answer>\s*'
    r'$',
    re.DOTALL
)

def validate_string_format(s: str) -> bool:
    """
    Enforces this strict format:
      [optional whitespace]
      <rubric> (non-whitespace content) </rubric>
      [only whitespace allowed in between]
      <eval>   (non-whitespace content) </eval>
      [only whitespace allowed in between]
      <answer> (non-whitespace content) </answer>
      [optional whitespace]
      
    'Non-whitespace content' means at least one real character – not just spaces/tabs/newlines.
    Returns True if 's' matches exactly that structure; otherwise False.
    """
    return bool(pattern.match(s))

# ----------------------------------------------------------------
# EXAMPLES:

example_ok = """<rubric>  Some rubric content  </rubric>
<eval>   Multiple lines inside,
but not empty  </eval>
<answer>  final answer here </answer>"""

# example_bad_1 = """<rubric>Has content</rubric>\t<eval>Has content</eval>
# <answer>Has content</answer>"""
example_bad_1 = """Some leading text
<rubric>Has content</rubric>
# <eval>Has content</eval>
# <answer>Has content</answer>"""
# ❌ Disallowed because "Some leading text" is non-whitespace
#    outside the tags.

example_bad_2 = """<rubric>   </rubric>
<eval>Something</eval>
<answer>Something</answer>"""
# ❌ Disallowed because the <rubric> tag is empty (only spaces).

example_bad_3 = """<rubric>Rubric</rubric>
<eval>Eval</eval>  Some extra text
<answer>Answer</answer>"""
# ❌ Disallowed because "Some extra text" is non-whitespace
#    between </eval> and <answer>.

print("OK  ->", validate_string_format(example_ok))     # True
print("BAD1->", validate_string_format(example_bad_1))  # False
print("BAD2->", validate_string_format(example_bad_2))  # False
print("BAD3->", validate_string_format(example_bad_3))  # False


OK  -> True
BAD1-> True
BAD2-> False
BAD3-> False


In [1]:
import re

# 1. Define the Regex Pattern
pattern = re.compile(
    r'^'
    # Look for <rubric>...</rubric> with some content inside
    r'.*?<rubric>\s*(\S.*?\S|\S)\s*</rubric>'
    # Look for <eval>...</eval> containing:
    #   - either <quote_A>...</quote_A> or <summary_A>...</summary_A>
    #   - AND either <quote_B>...</quote_B> or <summary_B>...</summary_B>
    r'.*?<eval>\s*'
        r'(?=.*?(?:<quote_A>.*?</quote_A>|<summary_A>.*?</summary_A>))'
        r'(?=.*?(?:<quote_B>.*?</quote_B>|<summary_B>.*?</summary_B>))'
        r'(.*?)'  # Capture the <eval> content
    r'\s*</eval>'
    # Finally look for <answer>...</answer> with some content
    r'.*?<answer>\s*(\S.*?\S|\S)\s*</answer>'
    r'.*?$',
    re.DOTALL
)

# 2. Define Test Cases (each case has: input_text, expected_validity)
test_cases = [
    ("""
<rubric>Rubric content</rubric>
<eval>
Some evaluation mentioning <quote_A>Verbatim A</quote_A> and <quote_B>Verbatim B</quote_B>.
</eval>
<answer>[[A]]</answer>
""", True),

    ("""
<rubric>Some rubric info</rubric>
<eval>
I found that <summary_A>A paraphrased snippet</summary_A> was correct,
but <quote_B>Explicit text from B</quote_B> had issues.
</eval>
<answer>[[B]]</answer>
""", True),

    ("""
<rubric>
1. Accuracy
2. Completeness
</rubric>
<eval>
Here is my evaluation:
- <quote_A>Direct quote from A</quote_A>
- <summary_B>B was paraphrased well</summary_B>
</eval>
<answer>[[A]]</answer>
""", True),

    # Invalid - No A reference:
    ("""
<rubric>Rubric is here</rubric>
<eval>
I only mention <quote_B>Stuff from B</quote_B> and no A references.
</eval>
<answer>[[B]]</answer>
""", False),

    # Invalid - No B reference:
    ("""
<rubric>Rubric is here</rubric>
<eval>
I only mention <summary_A>Paraphrase of A</summary_A> and no B references.
</eval>
<answer>[[A]]</answer>
""", False),

    # Invalid - Missing <rubric>:
    ("""
<eval>
<quote_A>Something A</quote_A>
<quote_B>Something B</quote_B>
</eval>
<answer>[[A]]</answer>
""", False),

    # Invalid - Missing <eval>:
    ("""
<rubric>Rubric is here</rubric>
<answer>[[B]]</answer>
""", False),

    # Invalid - Missing <answer>:
    ("""
<rubric>Rubric text</rubric>
<eval>
<quote_A>A text</quote_A>
<quote_B>B text</quote_B>
</eval>
""", False),

    # Valid - Multiple references:
    ("""
<rubric>Some rubric details</rubric>
<eval>
Points:
1) <quote_A>A snippet #1</quote_A>
2) <summary_A>A snippet #2 (summary)</summary_A>
3) <quote_B>B snippet #1</quote_B>
4) <summary_B>B snippet #2 (summary)</summary_B>
</eval>
<answer>[[B]]</answer>
""", True),

    # Invalid - No references inside <eval> at all:
    ("""
<rubric>Rubric stuff</rubric>
<eval>
No mention of A or B here at all.
</eval>
<answer>[[A]]</answer>
""", False),
]

# 3. Test Each Case
for i, (test_input, expected) in enumerate(test_cases, start=1):
    match = bool(pattern.match(test_input.strip()))
    result = "PASS" if match == expected else "FAIL"
    print(f"Test {i}: Expected={expected}, Got={match} -> {result}")


Test 1: Expected=True, Got=True -> PASS
Test 2: Expected=True, Got=True -> PASS
Test 3: Expected=True, Got=True -> PASS
Test 4: Expected=False, Got=False -> PASS
Test 5: Expected=False, Got=False -> PASS
Test 6: Expected=False, Got=False -> PASS
Test 7: Expected=False, Got=False -> PASS
Test 8: Expected=False, Got=False -> PASS
Test 9: Expected=True, Got=True -> PASS
Test 10: Expected=False, Got=False -> PASS


In [3]:
import re

def get_pattern():
    """
    Returns a compiled regex pattern that enforces:
    1) <rubric> must contain non-empty <justify>...</justify>
    2) <eval> must contain at least one non-empty reference to A and to B
    3) <answer> must contain non-empty content
    """
    pattern = re.compile(
        r'^'
        # 1) <rubric> ... </rubric>, containing <justify> with non-empty text
        r'.*?<rubric>'
        r'(?=.*?<justify>\s*\S.*?\S\s*</justify>)'  # Lookahead ensures <justify> is non-empty
        r'(.*?)'
        r'</rubric>'
        
        # 2) <eval> ... </eval>, must contain references to A and B with non-empty text
        r'.*?<eval>'
        # Chatbot A reference: either <quote_A>...</quote_A> or <summary_A>...</summary_A>, both non-empty
        r'(?=.*?(?:<quote_A>\s*\S.*?\S\s*</quote_A>|<summary_A>\s*\S.*?\S\s*</summary_A>))'
        # Chatbot B reference: either <quote_B>...</quote_B> or <summary_B>...</summary_B>, both non-empty
        r'(?=.*?(?:<quote_B>\s*\S.*?\S\s*</quote_B>|<summary_B>\s*\S.*?\S\s*</summary_B>))'
        r'(.*?)'
        r'</eval>'
        
        # 3) <answer> ... </answer> with non-empty text
        r'.*?<answer>\s*(\S.*?\S|\S)\s*</answer>'
        r'.*$',
        re.DOTALL
    )
    return pattern

def test_pattern():
    """
    Tests the pattern against a series of valid and invalid cases,
    printing pass/fail results.
    """

    pattern = get_pattern()

    test_cases = [
        # 1. VALID: minimal structure, non-empty <justify>, references to A and B, non-empty <answer>
        (
            """
<rubric>
  Rubric item 1
  <justify>This rubric item is justified because ...</justify>
</rubric>
<eval>
  Here is my analysis.
  <quote_A>Sample text from A</quote_A>
  <quote_B>Sample text from B</quote_B>
</eval>
<answer>[[A]]</answer>
""",
            True,
            "Valid minimal structure"
        ),
        
        # 2. VALID: using <summary_A> and <summary_B>, ensuring non-empty justification
        (
            """
<rubric>
  1. Accuracy
  2. Relevance
  <justify>
    We need these criteria to ensure the answer is factually correct and addresses the question.
  </justify>
</rubric>
<eval>
  Based on the content:
  <summary_A>A was concise but accurate.</summary_A>
  <summary_B>B provided more detail, which was relevant.</summary_B>
</eval>
<answer>[[B]]</answer>
""",
            True,
            "Valid example using summary tags"
        ),
        
        # 3. INVALID: Missing <justify> entirely
        (
            """
<rubric>
  Just some rubric content, but no justify section
</rubric>
<eval>
  <quote_A>A text</quote_A>
  <quote_B>B text</quote_B>
</eval>
<answer>[[A]]</answer>
""",
            False,
            "No <justify> inside <rubric>"
        ),
        
        # 4. INVALID: Empty <justify> (only whitespace)
        (
            """
<rubric>
  My rubric item
  <justify>    </justify>
</rubric>
<eval>
  <quote_A>Not empty</quote_A>
  <quote_B>Not empty</quote_B>
</eval>
<answer>[[B]]</answer>
""",
            False,
            "Empty justification"
        ),
        
        # 5. INVALID: No references to Chatbot A in <eval>
        (
            """
<rubric>
  Some rubric item
  <justify>This is a justification</justify>
</rubric>
<eval>
  <quote_B>Reference to B</quote_B>
</eval>
<answer>[[A]]</answer>
""",
            False,
            "No Chatbot A reference"
        ),
        
        # 6. INVALID: No references to Chatbot B in <eval>
        (
            """
<rubric>
  Some rubric item
  <justify>Justification is here</justify>
</rubric>
<eval>
  <summary_A>A summary text</summary_A>
</eval>
<answer>[[A]]</answer>
""",
            False,
            "No Chatbot B reference"
        ),
        
        # 7. INVALID: One reference is empty
        (
            """
<rubric>
  My rubric
  <justify>Justified rubric</justify>
</rubric>
<eval>
  <quote_A>    </quote_A>   <!-- This is effectively empty, just whitespace -->
  <quote_B>Some B content</quote_B>
</eval>
<answer>[[B]]</answer>
""",
            False,
            "Empty <quote_A>"
        ),
        
        # 8. INVALID: Missing <answer> block
        (
            """
<rubric>
  Some rubric
  <justify>Some justification</justify>
</rubric>
<eval>
  <quote_A>Some A text</quote_A>
  <quote_B>Some B text</quote_B>
</eval>
""",
            False,
            "No <answer> section"
        ),
        
        # 9. INVALID: <answer> block is empty
        (
            """
<rubric>
  Another rubric
  <justify>We justify the rubric here</justify>
</rubric>
<eval>
  <quote_A>Verbatim A text</quote_A>
  <quote_B>Verbatim B text</quote_B>
</eval>
<answer></answer>
""",
            False,
            "Empty <answer>"
        ),
        
        # 10. VALID: Multiple references, multiline justification, everything present
        (
            """
<rubric>
  1. Clarity
  2. Accuracy
  3. Relevance
  <justify>
    These criteria are critical: we want correct, clear, and on-topic answers.
    We also want them to be well-presented.
  </justify>
</rubric>
<eval>
  <quote_A>
    A's response line 1
    A's response line 2
  </quote_A>
  <summary_B>B gave a more thorough background, but was slightly off-topic.</summary_B>
  Conclusion: A might be better.
</eval>
<answer>[[A]]</answer>
""",
            True,
            "Valid example with multiple references"
        ),
    ]

    # Test each case
    for i, (text, expected, description) in enumerate(test_cases, start=1):
        match_found = bool(pattern.match(text.strip()))
        result = "PASS" if match_found == expected else "FAIL"
        print(f"Test {i} - {description}: Expected={expected}, Got={match_found} -> {result}")

# If you want to run it right away, just uncomment:
test_pattern()


Test 1 - Valid minimal structure: Expected=True, Got=True -> PASS
Test 2 - Valid example using summary tags: Expected=True, Got=True -> PASS
Test 3 - No <justify> inside <rubric>: Expected=False, Got=False -> PASS
Test 4 - Empty justification: Expected=False, Got=False -> PASS
Test 5 - No Chatbot A reference: Expected=False, Got=False -> PASS
Test 6 - No Chatbot B reference: Expected=False, Got=False -> PASS
Test 7 - Empty <quote_A>: Expected=False, Got=False -> PASS
Test 8 - No <answer> section: Expected=False, Got=False -> PASS
Test 9 - Empty <answer>: Expected=False, Got=False -> PASS
Test 10 - Valid example with multiple references: Expected=True, Got=True -> PASS
