In [None]:
!pip install transformers accelerate datasets ragas

In [None]:
!huggingface-cli login --token ''

In [None]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
import pandas as pd
from datasets import load_dataset

from datasets import DatasetDict, Dataset

# Set random seed for reproducibility
torch.random.manual_seed(0)

# Load the model and tokenizer
# change model her and just run the code 
model = AutoModelForCausalLM.from_pretrained(
    "wasifis/cited_phi3_mini_instruct",
    device_map="cuda",
    torch_dtype="auto",
    trust_remote_code=True,
)
tokenizer = AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-128k-instruct")

# Initialize the pipeline
pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
)

# Load the dataset
dataset_dict = load_dataset("wasifis/rag-test-gt")

# Convert to pandas DataFrame
df = pd.DataFrame(dataset_dict["default"])

# Prepare the base context
base_context = [
    {"role": "system", "content": "You are a helpful AI assistant."}
]

# Function to generate response for each question with context
def generate_response(question, context):
    messages = base_context + [{"role": "user", "content": context}, {"role": "user", "content": question}]
    generation_args = {
        "max_new_tokens": 500,
        "return_full_text": False,
        "temperature": 0.0,
        "do_sample": False,
    }
    output = pipe(messages, **generation_args)
    return output[0]['generated_text']

# Apply the function to each question and context in the DataFrame
df['answer'] = df.apply(lambda row: generate_response(row['question'], row['contexts']), axis=1)

# Save the updated DataFrame to a new CSV file
df.to_csv('generated_testset_with_answers.csv', index=False)

In [None]:
import pandas as pd
from datasets import Dataset
import os
from ragas import evaluate
from ragas.metrics import faithfulness, answer_correctness

# Set the OpenAI API key
os.environ["OPENAI_API_KEY"] = ""

# Load the CSV file into a DataFrame
csv_file_path = 'generated_testset_with_answers.csv'
df = pd.read_csv(csv_file_path)

# Inspect the DataFrame to understand its structure
print(df.dtypes)

# Ensure that the 'contexts' column is of type list of strings
if 'contexts' in df.columns:
    df['contexts'] = df['contexts'].apply(lambda x: eval(x) if isinstance(x, str) and x.startswith('[') and x.endswith(']') else [x])

# Ensure that the 'ground_truths' column is of type list of strings
if 'ground_truths' in df.columns:
    df['ground_truths'] = df['ground_truths'].apply(lambda x: eval(x) if isinstance(x, str) and x.startswith('[') and x.endswith(']') else [x])

# Convert the DataFrame to a dictionary
data_samples = df.to_dict(orient='list')

# Create a Dataset from the dictionary
dataset = Dataset.from_dict(data_samples)

# Define the metrics
metrics = [faithfulness, answer_correctness]

# Evaluate the dataset using the specified metrics
score = evaluate(dataset, metrics=metrics)

# Convert the score to a pandas DataFrame and print it
print(score.to_pandas())
