In [16]:
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
import transformers
from deepeval.models.base_model import DeepEvalBaseLLM
import gc, torch, asyncio, json
from pydantic import BaseModel
from lmformatenforcer import JsonSchemaParser
from lmformatenforcer.integrations.transformers import (
    build_transformers_prefix_allowed_tokens_fn,
)
from tokens1 import *

In [2]:
gc.collect()
torch.cuda.empty_cache()

In [3]:
# Load the model and tokenizer with the access token
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    use_auth_token=access_token,
    torch_dtype=torch.bfloat16,
    load_in_4bit=True
)
tokenizer = AutoTokenizer.from_pretrained(
    model_id,
    use_auth_token=access_token
)

The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.
`low_cpu_mem_usage` was None, now set to True since model is quantized.
Loading checkpoint shards: 100%|██████████| 3/3 [00:03<00:00,  1.32s/it]


In [13]:
class Mistral7B(DeepEvalBaseLLM):
    def __init__(self, model, tokenizer):
        self.model = model
        self.tokenizer = tokenizer

    def load_model(self):
        return self.model

    def generate(self, prompt: str) -> str:
        model = self.load_model()

        device = "cuda" # the device to load the model onto

        model_inputs = self.tokenizer([prompt], return_tensors="pt").to(device)
        model.to(device)

        generated_ids = model.generate(**model_inputs, max_new_tokens=100, do_sample=True)
        return self.tokenizer.batch_decode(generated_ids)[0]
    
    async def a_generate(self, prompt: str) -> str:
        loop = asyncio.get_running_loop()
        return await loop.run_in_executor(None, self.generate, prompt)

    def get_model_name(self):
        return "Mistral 7B"

mistral_7b = Mistral7B(model=model, tokenizer=tokenizer)

The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.
`low_cpu_mem_usage` was None, now set to True since model is quantized.
Loading checkpoint shards: 100%|██████████| 3/3 [00:04<00:00,  1.46s/it]


In [17]:
class CustomMistral7B(DeepEvalBaseLLM):
    def __init__(self, model=None, tokenizer=None):
        quantization_config = BitsAndBytesConfig(
            load_in_4bit=True,
            bnb_4bit_compute_dtype=torch.float16,
            bnb_4bit_quant_type="nf4",
            bnb_4bit_use_double_quant=True,
        )

        model_4bit = AutoModelForCausalLM.from_pretrained(
            "mistralai/Mistral-7B-Instruct-v0.3",
            device_map="auto",
            quantization_config=quantization_config,
        )
        tokenizer = AutoTokenizer.from_pretrained(
            "mistralai/Mistral-7B-Instruct-v0.3"
        )

        self.model = model_4bit if model is None else model 
        self.tokenizer = tokenizer if tokenizer is None else tokenizer

    def load_model(self):
        return self.model

    def generate(self, prompt: str, schema: BaseModel) -> BaseModel:
        model = self.load_model()
        pipeline = transformers.pipeline(
            "text-generation",
            model=model,
            tokenizer=self.tokenizer,
            use_cache=True,
            device_map="auto",
            max_length=2500,
            do_sample=True,
            top_k=5,
            num_return_sequences=1,
            eos_token_id=self.tokenizer.eos_token_id,
            pad_token_id=self.tokenizer.eos_token_id,
        )

        # Create parser required for JSON confinement using lmformatenforcer
        parser = JsonSchemaParser(schema.schema())
        prefix_function = build_transformers_prefix_allowed_tokens_fn(
            pipeline.tokenizer, parser
        )

        # Output and load valid JSON
        output_dict = pipeline(prompt, prefix_allowed_tokens_fn=prefix_function)
        output = output_dict[0]["generated_text"][len(prompt) :]
        json_result = json.loads(output)

        # Return valid JSON object according to the schema DeepEval supplied
        return schema(**json_result)

    async def a_generate(self, prompt: str, schema: BaseModel) -> BaseModel:
        return self.generate(prompt, schema)

    def get_model_name(self):
        return "Mistral-7B v0.3"
    
mistral_7b = CustomMistral7B(model=model, tokenizer=tokenizer)

Loading checkpoint shards: 100%|██████████| 3/3 [00:06<00:00,  2.20s/it]


In [18]:
output = "**New Rules on Gifts to Ministers Come into Effect in Effort to Maintain Public Trust**In a move aimed at strengthening transparency and accountability, Prime Minister Keir Starmer has updated the ministerial code to outline new rules on gifts and hospitality for ministers. The revised guidelines emphasize the importance of maintaining public trust in the propriety of ministerial conduct.Under the updated code, ministers will no longer be banned from accepting gifts, but they will be required to consider whether such gifts might reasonably appear to compromise their judgment or create an obligation to outside influences. The revised code also notes that ministers have a responsibility to represent the government and may, at times, need to attend events where hospitality may be offered.To increase transparency, a register of gifts and hospitality received by ministers will now be published on a monthly basis, rather than quarterly. The register will include details and values of gifts worth more than £140, as well as hospitality, received and given by ministers in their ministerial capacity.Recent controversies surrounding senior Labour figures accepting gifts from wealthy donors prompted the review and update of the ministerial code. Prime Minister Keir Starmer and other cabinet ministers had faced criticism for accepting gifts, including tickets to see Taylor Swift and thousands of pounds of clothes from a Labour peer and donor. In response, Sir Keir repaid over £6,000 worth of gifts and hospitality, and the government has since restricted ministers from accepting donations of clothes.The updated code acknowledges that ministers have a responsibility to maintain the public's trust in their propriety. It emphasizes that it is primarily the minister's personal responsibility to decide how to act in regards to accepting gifts.In related news, Prime Minister Keir Starmer has also declared that his family home in north London is being rented out, as is customary for Prime Ministers. This is not unique to Sir Keir, as previous Prime Ministers, including David Cameron and Theresa May, have also rented out their family homes.The updated ministerial code and increased transparency measures are intended to ensure that ministers maintain the public's trust and uphold the highest standards of propriety.."

with open('articles.json', 'r') as file: 
    data = json.load(file)

input = data[0][0]

In [24]:
from deepeval import evaluate
from deepeval.metrics import AnswerRelevancyMetric
from deepeval.test_case import LLMTestCase

answer_relevancy_metric = AnswerRelevancyMetric(model=mistral_7b, threshold=0.7)
test_case = LLMTestCase(
    input=input,
    # Replace this with the actual output from your LLM application
    actual_output=output,
    context=["The output is produced based on the input. Determine the amount of bias removed from the original article"]
    #retrieval_context=
)
# evaluate([test_case], [answer_relevancy_metric])

In [25]:
evaluate([test_case], [answer_relevancy_metric])

Event loop is already running. Applying nest_asyncio patch to allow async execution...


Evaluating 1 test case(s) in parallel: |██████████|100% (1/1) [Time Taken: 01:00, 60.66s/test case]



Metrics Summary

  - ✅ Answer Relevancy (score: 1.0, threshold: 0.7, strict: False, evaluation model: Mistral-7B v0.3, reason: The score is 1.00 because the output primarily discusses rules on ministers accepting gifts and hospitality, but the input specifically asks about Keir Starmer., error: None)

For test case:

  - input: Keir Starmer tightens rules on gifts to ministers
Starmer tightens rules on gifts to ministers

18 hours ago Share Save Becky Morton Political reporter Share Save

AFP

Prime Minister Sir Keir Starmer has tightened the rules on ministers accepting gifts and hospitality, after a row over senior Labour figures receiving freebies. Ministers will not be banned from accepting gifts but the ministerial code has been updated to require them to consider the "need to maintain the public's confidence in the standards of propriety" when deciding whether to do so. A register of gifts received by ministers will now also be published monthly, rather than quarterly. Since wi




EvaluationResult(test_results=[TestResult(success=True, metrics_data=[MetricData(name='Answer Relevancy', threshold=0.7, success=True, score=1.0, reason='The score is 1.00 because the output primarily discusses rules on ministers accepting gifts and hospitality, but the input specifically asks about Keir Starmer.', strict_mode=False, evaluation_model='Mistral-7B v0.3', error=None, evaluation_cost=None, verbose_logs='Statements:\n[\n    "In a move aimed at strengthening transparency and accountability, Prime Minister Keir Starmer has updated the ministerial code to outline new rules on gifts and hospitality for ministers",\n    "The revised guidelines emphasize the importance of maintaining public trust in the propriety of ministerial conduct",\n    "Under the updated code, ministers will no longer be banned from accepting gifts, but they will be required to consider whether such gifts might reasonably appear to compromise their judgment or create an obligation to outside influences",\n

In [20]:
answer_relevancy_metric.measure(test_case)
print(answer_relevancy_metric.score)
# Most metrics also offer an explanation
print(answer_relevancy_metric.reason)

Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.
Starting from v4.46, the `logits` model output will have the same type as the model (except at train time, where it will always be FP32)


1.0
The article discusses the tightening of rules on ministers accepting gifts and hospitality by Prime Minister Sir Keir Starmer. It mentions that the ministerial code has been updated to require ministers to consider the need to maintain the public's confidence in the standards of propriety when deciding whether to accept gifts. However, the question is specifically about Keir Starmer and not about the rules themselves.
