# Reducing Hallucinations

- https://levelup.gitconnected.com/solving-6-types-of-hallucinations-in-small-llms-a-hands-on-guide-8d15c11650d3

In [None]:
import torch
from gait import s_message, u_message
from transformers import pipeline

In [None]:
model_id = "meta-llama/Llama-3.2-1B-Instruct"
pipe = pipeline(
    "text-generation",
    model=model_id,
    torch_dtype=torch.bfloat16,  # Use bfloat16 for efficient computation
    device_map="auto",  # Automatically selects available GPU/CPU
)

In [None]:
def generate_response(system_prompt: str, user_prompt: str) -> str:
    """
    Generate a response from the model based on a system prompt and user prompt.

    Parameters:
    - system_prompt (str): The instruction or persona for the model (e.g., "You are a pirate chatbot").
    - user_prompt (str): The actual user query or message to respond to.

    Returns:
    - str: The generated text response from the model.
    """
    # Construct the input message format for the model
    messages = [
        s_message(system_prompt),
        u_message(user_prompt),
    ]

    # Generate output using the pipeline
    outputs = pipe(messages)

    # Extract and return the generated text
    return outputs[0]["generated_text"][-1]["content"]

In [None]:
import numpy as np
from sentence_transformers import SentenceTransformer

# Load the model once (outside the function) to avoid reloading on each call
embedding_model = SentenceTransformer("TaylorAI/gte-tiny")


def get_sentence_embedding(sentence: str) -> np.ndarray:
    """
    Generate an embedding vector for a given sentence using a preloaded SentenceTransformer model.

    Parameters:
    - sentence (str): The input sentence to encode.

    Returns:
    - np.ndarray: The sentence embedding as a NumPy array.
    """
    # Encode the sentence into a dense vector using the preloaded model
    embedding = embedding_model.encode(
        sentence,
        # normalize_embeddings=True,
    )

    return embedding

In [None]:
# The system prompt sets the behavior or persona of the AI
system_prompt = "You are an AI Chatbot!"

# The user prompt is the actual question or input from the user
user_prompt = "Who discovered Penicillin in 1928?"

# Generate a response from the AI using the system and user prompts
response = generate_response(system_prompt, user_prompt)

# Print the response returned by the AI
print(response)

In [None]:
# Our Knowledge Base (5 Documents)
documents = [
    "Robert Withering, an English physician and botanist, is known for his study of the foxglove plant and its medicinal properties, particularly its use in treating dropsy (edema).",
    "The process of fermentation is a metabolic process that produces chemical changes in organic substrates through the action of enzymes. It typically occurs in yeast and bacteria, and also in oxygen-starved muscle cells, as in the case of lactic acid fermentation.",
    "Sir Alexander Fleming, a Scottish physician and microbiologist, discovered the antibiotic substance penicillin from the mould Penicillium notatum in 1928. This discovery revolutionized medicine.",
    "The Eiffel Tower is a wrought-iron lattice tower on the Champ de Mars in Paris, France. It was named after the engineer Gustave Eiffel, whose company designed and built the tower.",
]

In [None]:
# Embed the user query
user_query = "Who discovered Penicillin in 1928?"  # Our test query
user_query_embedding = get_sentence_embedding(user_query)

# Embed the Knowledge Base
document_embeddings = [get_sentence_embedding(doc) for doc in documents]

# Find the most relevant document based on embedding similarity (using dot product)
similarity_scores = np.dot(document_embeddings, user_query_embedding)

# Find the index of the document with the highest score
most_relevant_doc_index = np.argmax(similarity_scores)

# Retrieve the text of the most relevant document
retrieved_context = documents[most_relevant_doc_index]

In [None]:
retrieved_context

In [None]:
# We explicitly tell the AI to use the context provided
rag_system_prompt = f"""
You are an AI Chatbot!
Use the following context to answer the user's question accurately.
If the context does not contain enough information to answer the question, 
respond that you don't have sufficient information from the provided context.

Context:
{retrieved_context}
"""

# The user prompt remains the same
user_prompt = "Who discovered Penicillin in 1928?"

# Generate a response using the RAG system prompt and original user prompt
rag_response = generate_response(rag_system_prompt, user_prompt)

# Print the RAG-enhanced response
print(rag_response)

In [None]:
system_prompt = "You are an AI Chatbot!"

# The user prompt is the actual question or input from the user
user_prompt = "Who is the president of France today?"

# Generate a response from the AI using the system and user prompts
response = generate_response(system_prompt, user_prompt)

# Print the response returned by the AI
print(response)

In [None]:
from datetime import datetime


def make_query_time_aware(user_prompt: str) -> str:
    """
    Rewrites the user prompt to include the current date for temporal context.
    This is a simplified example targeting specific keywords.

    Parameters:
    - user_prompt (str): The original user query.

    Returns:
    - str: The rewritten, time-aware query.
    """
    now = datetime.now()
    # Simple replacements - expand this for more temporal keywords
    rewritten_prompt = user_prompt.replace("today", now.strftime("%B %d, %Y"))
    rewritten_prompt = rewritten_prompt.replace("this year", now.strftime("%Y"))
    rewritten_prompt = rewritten_prompt.replace("this month", now.strftime("%B %Y"))
    # You might add more complex logic or regex for different temporal phrases
    return rewritten_prompt

In [None]:
system_prompt = "You are an AI Chatbot!"

# The user prompt is the actual question or input from the user
user_prompt = make_query_time_aware("Who is the president of France today?")
print(user_prompt)

# Generate a response from the AI using the system and user prompts
response = generate_response(system_prompt, user_prompt)

# Print the response returned by the AI
print(response)

In [None]:
# The system prompt sets the behavior or persona of the AI
system_prompt = "You are an AI Chatbot!"

# The user prompt is the actual question or input from the user
user_prompt = "Explain how photosynthesis works in plants"

# Initializing a list to store multiple generated responses
responses = []

for i in range(10):

    # Generating Responses
    response = generate_response(system_prompt, user_prompt)

    # Append the generated response to the list
    responses.append(response)

In [None]:
responses

In [None]:
user_query_embedding = get_sentence_embedding(user_query)

# Get embeddings for all responses
response_embeddings = [get_sentence_embedding(response) for response in responses]

# Calculate similarity scores between the query and each response using dot product
similarity_scores = np.dot(response_embeddings, user_query_embedding)

# Printing sim score
print(similarity_scores)