In [1]:
from rage.models import RageClassifier, ClassifierOutput, RageExtractor
from rage.case import RageExample, RageCase
from rage.template import SimpleCaseTemplate
from rage.metrics import GenerateBasedContextPrecision
from pydantic import BaseModel, RootModel




In [4]:
case = RageCase(
    question="What is the capital of France?",
    retrieved_contexts=[
        "Paris is the capital of France and also the largest city in the country.",
        "Lyon is a major city in France.",
    ],
)
metric = GenerateBasedContextPrecision.from_parameters(model_id="openai/gpt-4-turbo-preview")
result = metric.calculate(case)

In [3]:
result

PresicionResult(extra={'verifications': [ClassifierOutput(reason=None, label='Yes'), ClassifierOutput(reason=None, label='No')]}, precision=0.5, average_precision=0.75, precision_at_k=[1.0, 0.5])

In [3]:
result

PresicionResult(extra={'verifications': [ClassifierOutput(reason=None, label="annotation=Literal['Yes', 'No'] required=True"), ClassifierOutput(reason=None, label="annotation=Literal['Yes', 'No'] required=True")]}, precision=0.0, average_precision=0.0, precision_at_k=[0.0, 0.0])

In [None]:
class Statement(BaseModel):
    content: str
    weight: float = Field(..., ge=0.0, le=1.0)
    reason: str | None = None

Statements = RootModel[list[Statement]]

In [None]:
extractor = RageExtractor(
    instruction='提取答案中的声明，根据声明对问题的重要度和帮助性设置权重，并给出理由。',
    output_type=Statements,
)
extractor.inference(
    RageCase(
        question='What is the largest planet in our Solar System?',
        answer='Jupiter is the largest planet in our Solar System and has a giant storm known as the Great Red Spot. Jupiter is closer to the Sun than Earth.'
    )
)

In [None]:
DEFAULT_EXAMPLES = [
    RageExample[ClassifierOutput](
        rage_case=RageCase(
            question="What are the health benefits of green tea?",
            retrieved_contexts=[
                "This article explores the rich history of tea cultivation in China, tracing its roots back to the ancient dynasties. It discusses how different regions have developed their unique tea varieties and brewing techniques. The article also delves into the cultural significance of tea in Chinese society and how it has become a symbol of hospitality and relaxation.",
            ],
        ),
        output=ClassifierOutput(
            reason="The context, while informative about the history and cultural significance of tea in China, does not provide specific information about the health benefits of green tea. Thus, it is not useful for answering the question about health benefits.",
            label="No",
        ),
    ),
    RageExample[ClassifierOutput](
        rage_case=RageCase(
            question="How does photosynthesis work in plants?",
            retrieved_contexts=[
                "Photosynthesis in plants is a complex process involving multiple steps. This paper details how chlorophyll within the chloroplasts absorbs sunlight, which then drives the chemical reaction converting carbon dioxide and water into glucose and oxygen. It explains the role of light and dark reactions and how ATP and NADPH are produced during these processes.",
            ],
        ),
        output=ClassifierOutput(
            reason="This context is extremely relevant and useful for answering the question. It directly addresses the mechanisms of photosynthesis, explaining the key components and processes involved.",
            label="Yes",
        ),
    ),
]

In [None]:
classifier = RageClassifier(
    model_id='openai/gpt-4-turbo-preview',
    instruction=CONTEXT_PRECISION,
    examples=DEFAULT_EXAMPLES,
    case_template=SimpleCaseTemplate(),
    label_set={'Yes', 'No'},
)

In [None]:
case = RageCase(
    question="What is the capital of France?",
    retrieved_contexts=[
        # "Paris is the capital of France and also the largest city in the country.",
        "Lyon is a major city in France.",
    ],
)

In [None]:
from pydantic import BaseModel, RootModel

In [None]:
class Statement(BaseModel):
    content: list[str]
    reason: str | None = None

Statements = RootModel[list[Statement]]

In [None]:
Statement.model_json_schema()

In [None]:
from rich import print


print({'$defs': {'Statement': {'properties': {'content': {'items': {'type': 'string'},
     'title': 'Content',
     'type': 'array'},
    'reason': {'anyOf': [{'type': 'string'}, {'type': 'null'}],
     'default': None,
     'title': 'Reason'}},
   'required': ['content'],
   'title': 'Statement',
   'type': 'object'}},
 'items': {'$ref': '#/$defs/Statement'},
 'title': 'RootModel[list[Statement]]',
 'type': 'array'})

In [None]:
a = '''
dfdsfs
```
[
  {
    "content": [
      "Jupiter is the largest planet in our Solar System and has a giant storm known as the Great Red Spot."
    ],
    "reason": null
  },
  {
    "content": [
      "Jupiter is closer to the Sun than Earth."
    ],
    "reason": null
  }
]```'''
try:
  Statements.model_validate_json(a)
except Exception as e:
  print(e)

In [None]:
Statements.model_json_schema()

In [None]:
from inspect import get_annotations


Statements.model_fields['root'].annotation.__dict__

In [None]:
classifier.inference(case)

In [None]:
print(classifier.prompt[0].content)

In [None]:
import re
import json

def is_valid_json(json_str: str) -> bool:
    try:
        json.loads(json_str, strict=False)
    except json.JSONDecodeError:
        return False
    return True


def extract_json(json_str: str) -> str:
    # markdown code pattern
    json_code_pattern = r"```json\n(.*?)```"
    match = re.search(json_code_pattern, json_str, re.DOTALL)
    if match and is_valid_json(match.group(1)):
        return match.group(1)
    
    code_pattern = r"```(.*?)```"
    match = re.search(code_pattern, json_str, re.DOTALL)
    if match and is_valid_json(match.group(1)):
        return match.group(1)
    
    inline_code_pattern = r"`(.*?)`"
    match = re.search(inline_code_pattern, json_str, re.DOTALL)
    if match and is_valid_json(match.group(1)):
        return match.group(1)

    raise ValueError(f"Invalid JSON: {json_str}")


def ensure_valid_json(json_str: str) -> str:
    if is_valid_json(json_str):
        return json_str
    else:
        return extract_json(json_str)

In [None]:
test_cases = [
    '''抱歉，我之前的回答有误。根据报错信息，输入数据应该是一个有效的数组，而不是一个对象。

根据 JSON Schema 的描述，正确的对象应如下所示：

```
[
  {
    "content": [
      "Jupiter is the largest planet in our Solar System and has a giant storm known as the Great Red Spot."
    ],
    "reason": null
  },
  {
    "content": [
      "Jupiter is closer to the Sun than Earth."
    ],
    "reason": null
  }
]
```
这是一个数组，包含两个符合 "Statement" 定义的对象。每个对象都有 "content" 属性表示内容，以及 "reason" 属性表示原因。
''',
    '''抱歉，我之前的回答有误。根据报错信息，输入数据应该是一个有效的数组，而不是一个对象。

根据 JSON Schema 的描述，正确的对象应如下所示：

```json
[
  {
    "content": [
      "Jupiter is the largest planet in our Solar System and has a giant storm known as the Great Red Spot."
    ],
    "reason": null
  },
  {
    "content": [
      "Jupiter is closer to the Sun than Earth."
    ],
    "reason": null
  }
]
```
这是一个数组，包含两个符合 "Statement" 定义的对象。每个对象都有 "content" 属性表示内容，以及 "reason" 属性表示原因。
''',
'''
[
  {
    "content": [
      "Jupiter is the largest planet in our Solar System and has a giant storm known as the Great Red Spot."
    ],
    "reason": null
  },
  {
    "content": [
      "Jupiter is closer to the Sun than Earth."
    ],
    "reason": null
  }
]
''',
]

In [None]:
for test_case in test_cases:
    print(ensure_valid_json(test_case))
    print('---')

In [1]:
from rage.metrics.model_based.faithfulness import LLMBasedFaithfulness

In [None]:
LLMBasedFaithfulness.from_parameters(ex)

for test_case in test_cases:
    print(f"{test_case} -> {solve(test_case)}")