## DeepEval

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer
from huggingface_hub import login
import os
from dotenv import load_dotenv

checkpoint = "mistralai/Mistral-7B-v0.1"

load_dotenv()
login(os.getenv("HUGGINGFACE_API_KEY"))

tokenizer = AutoTokenizer.from_pretrained(checkpoint)
model = AutoModelForCausalLM.from_pretrained(checkpoint)

In [None]:
import torch
from deepeval.models.base_model import DeepEvalBaseLLM

class Mistral7B(DeepEvalBaseLLM):
    def __init__(self, model, tokenizer):
        self.model = model
        self.tokenizer = tokenizer
        
        # self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.device = torch.device("cpu")
        if str(self.device) == "cuda":
            print(f"Running on GPU\n"
                  f"Cuda version:  {torch.version.cuda}\n"
                  f"cuDNN version: {torch.backends.cudnn.version()}")
        else:
            print("Running on CPU")

    def load_model(self):
        return self.model

    def generate(self, prompt: str) -> str:
        model = self.load_model()
        
        # Tokenize input
        inputs = self.tokenizer.encode(prompt, return_tensors="pt").to(self.device)
        
        # Move model to device
        model.to(self.device)
        
        # Generate output
        with torch.no_grad():
            generated_ids = model.generate(inputs, max_length=500, do_sample=True)
        
        # Decode generated output
        generated_text = self.tokenizer.decode(generated_ids[0], skip_special_tokens=True)
        
        
        return generated_text

    async def a_generate(self, prompt: str) -> str:
        return self.generate(prompt)

    def get_model_name(self):
        return "Mistral 7B"

# Instantiate Mistral7B model
mistral_7b = Mistral7B(model=model, tokenizer=tokenizer)

In [None]:
generated_text = mistral_7b.generate("Say hello")
print(generated_text)

## Answer Relevancy

In [None]:
# This is the original text to be summarized
input = """
The 'coverage score' is calculated as the percentage of assessment questions
for which both the summary and the original document provide a 'yes' answer. This
method ensures that the summary not only includes key information from the original
text but also accurately represents it. A higher coverage score indicates a
more comprehensive and faithful summary, signifying that the summary effectively
encapsulates the crucial points and details from the original content.
"""

# This is the summary, replace this with the actual output from your LLM application
actual_output="""
The coverage score quantifies how well a summary captures and
accurately represents key information from the original text,
with a higher score indicating greater comprehensiveness.
"""

In [None]:
from deepeval.metrics import SummarizationMetric
from deepeval.test_case import LLMTestCase


test_case = LLMTestCase(input=input, actual_output=actual_output)
metric = SummarizationMetric(
    threshold=0.5,
    model=mistral_7b,
    assessment_questions=[
        "Is the coverage score based on a percentage of 'yes' answers?",
        "Does the score ensure the summary's accuracy with the source?",
        "Does a higher score mean a more comprehensive summary?"
    ]
)

metric.measure(test_case)
print(metric.score)
print(metric.reason)

### Llama-3 8B model

In [1]:
import transformers
import torch
from transformers import BitsAndBytesConfig
from transformers import AutoModelForCausalLM, AutoTokenizer
from deepeval.models import DeepEvalBaseLLM


class CustomLlama3_8B(DeepEvalBaseLLM):
    def __init__(self):
        quantization_config = BitsAndBytesConfig(
            load_in_4bit=True,
            bnb_4bit_compute_dtype=torch.float16,
            bnb_4bit_quant_type="nf4",
            bnb_4bit_use_double_quant=True,
        )

        model_4bit = AutoModelForCausalLM.from_pretrained(
            "meta-llama/Meta-Llama-3-8B-Instruct",
            device_map="auto",
            quantization_config=quantization_config,
        )
        tokenizer = AutoTokenizer.from_pretrained(
            "meta-llama/Meta-Llama-3-8B-Instruct",
            truncation=True
        )

        self.model = model_4bit
        self.tokenizer = tokenizer

    def load_model(self):
        return self.model

    def generate(self, prompt: str) -> str:
        model = self.load_model()

        pipeline = transformers.pipeline(
            "text-generation",
            model=model,
            tokenizer=self.tokenizer,
            use_cache=True,
            device_map="auto",
            max_length=2500,
            do_sample=True,
            top_k=5,
            num_return_sequences=1,
            eos_token_id=self.tokenizer.eos_token_id,
            pad_token_id=self.tokenizer.eos_token_id,
            truncation=True,
        )

        return str(pipeline(prompt))[1:-1]

    async def a_generate(self, prompt: str) -> str:
        return self.generate(prompt)

    def get_model_name(self):
        return "Llama-3 8B"



In [2]:
import os
from dotenv import load_dotenv
from huggingface_hub import login

load_dotenv()
login(os.getenv("HUGGINGFACE_API_KEY"))
custom_llm = CustomLlama3_8B()

The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: fineGrained).
Your token has been saved to C:\Users\filip\.cache\huggingface\token
Login successful


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [3]:
print(custom_llm.generate("Tell a short joke."))

{'generated_text': 'Tell a short joke. For example, "Why was the math book sad? Because it had too many problems." \n\n    Args:\n    joke (str): The joke to tell.\n\n    Returns:\n    None\n    """\n    print(joke)\n    print("Haha, that\'s a good one!")  # Add a response to the joke\n\ntell_joke("Why was the math book sad? Because it had too many problems.")\n```\n\nOutput:\n```\nWhy was the math book sad? Because it had too many problems.\nHaha, that\'s a good one!\n```\n\nThis function takes a joke as a string and prints it out, followed by a response to the joke. You can replace the joke and the response to create your own joke-telling program! 😄\n\n---\n\n**Exercise 2: Ask for User Input**\n\nWrite a function that asks the user for their name and age, and then prints out a personalized greeting.\n\n    Args:\n    None\n\n    Returns:\n    None\n    """\n    name = input("What is your name? ")\n    age = int(input("How old are you? "))\n    print(f"Hello, {name}! You are {age} yea

In [None]:
from deepeval import evaluate
from deepeval.metrics import AnswerRelevancyMetric
from deepeval.test_case import LLMTestCase

# Replace this with the actual output from your LLM application
actual_output = "We offer a 30-day full refund at no extra cost."

metric = AnswerRelevancyMetric(
    threshold=0.7,
    model=custom_llm,
    include_reason=True
)
test_case = LLMTestCase(
    input="What if these shoes don't fit?",
    actual_output=actual_output
)

metric.measure(test_case)
print(metric.score)
print(metric.reason)

# or evaluate test cases in bulk
evaluate([test_case], [metric])

In [1]:
import transformers
import torch
from transformers import BitsAndBytesConfig, StoppingCriteria, StoppingCriteriaList
from transformers import AutoModelForCausalLM, AutoTokenizer
from deepeval.models import DeepEvalBaseLLM
import os
from dotenv import load_dotenv
from huggingface_hub import login


class CustomLlama3_8B(DeepEvalBaseLLM):
    def __init__(self):
        quantization_config = BitsAndBytesConfig(
            load_in_4bit=True,
            bnb_4bit_compute_dtype=torch.float16,
            bnb_4bit_quant_type="nf4",
            bnb_4bit_use_double_quant=True,
        )

        model_4bit = AutoModelForCausalLM.from_pretrained(
            "meta-llama/Meta-Llama-3-8B-Instruct",
            device_map="auto",
            quantization_config=quantization_config,
        )
        tokenizer = AutoTokenizer.from_pretrained(
            "meta-llama/Meta-Llama-3-8B-Instruct",
            truncation=True
        )

        # Ensure pad_token_id is set
        tokenizer.pad_token_id = tokenizer.eos_token_id

        self.model = model_4bit
        self.tokenizer = tokenizer

    def load_model(self):
        return self.model

    class StopOnEOSToken(StoppingCriteria):
        def __init__(self, eos_token_id):
            self.eos_token_id = eos_token_id

        def __call__(self, input_ids, scores, **kwargs):
            if input_ids[0, -1] == self.eos_token_id:
                return True
            return False

    def generate(self, prompt: str) -> str:
        model = self.load_model()

        stopping_criteria = StoppingCriteriaList([self.StopOnEOSToken(self.tokenizer.eos_token_id)])

        generator = transformers.pipeline(
            "text-generation",
            model=model,
            tokenizer=self.tokenizer,
            device_map="auto",
            max_length=2000,  # Adjusted max_length
            do_sample=True,
            top_k=50,  # Adjusted top_k for more randomness
            top_p=0.95,  # Added top_p for nucleus sampling
            num_return_sequences=1,
            eos_token_id=self.tokenizer.eos_token_id,
            pad_token_id=self.tokenizer.eos_token_id,
            truncation=True,
            stopping_criteria=stopping_criteria,
        )
#  Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
        generated_text = generator(prompt)[0]['generated_text']
        return generated_text

    async def a_generate(self, prompt: str) -> str:
        return self.generate(prompt)

    def get_model_name(self):
        return "Llama-3 8B"


load_dotenv()
login(os.getenv("HUGGINGFACE_API_KEY"))
custom_llm = CustomLlama3_8B()

print(custom_llm.generate("Tell a short joke."))




The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: fineGrained).
Your token has been saved to C:\Users\filip\.cache\huggingface\token
Login successful


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Tell a short joke. You know, like: Why did the chicken cross the road? To get to the other side! Haha, yeah, I know, it's a classic. But seriously, I've been thinking about this joke, and I think it's actually a pretty deep metaphor for life. I mean, think about it: the chicken is making a choice to cross the road, to take a risk and venture into the unknown. And that's what we're all doing, right? We're all making choices to cross our own roads, to take risks and venture into the unknown. And that's what makes life so exciting, right? The possibility of discovery, of growth, of change.

So, yeah, I guess that joke is actually a pretty profound one. Who knew? Haha.

So, what do you think? Do you have any favorite jokes or one-liners that you like to share? I'm always up for a good laugh and a clever turn of phrase.

And hey, if you're feeling inspired, feel free to share your own joke or one-liner. I'd love to hear it! And who knows, maybe we can even come up with a new joke together. 

In [None]:
print(custom_llm.generate("Tell a short joke."))