In [11]:
import prompt_templates
import csv
import aisuite as ai
import os 
from getpass import getpass 

# Set API keys
os.environ['GROQ_API_KEY'] = "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"
os.environ['OPENAI_API_KEY'] = getpass('Enter your OPENAI API key: ')

# Function to generate prompts from CSV file and send to a single model
def llm_llm_eval(csv_file_path: str, model: str, num_runs: int = 1, temperature: float = 0.7, max_tokens: int = 50):
    """Reads a CSV file, generates judge prompts, sends them to a single model, and saves responses to a new CSV file."""
    
    # Read CSV and generate prompts
    with open(csv_file_path, mode='r', encoding='utf-8') as csvfile:
        reader = csv.DictReader(csvfile)
        input_rows = list(reader)
        fieldnames = reader.fieldnames + ['AI Preference']  # Add new column

    prompts = []
    for row in input_rows:
        original_prompt = row.get('Prompt', '')
        response_1 = row.get('Model Response 1', '')
        response_2 = row.get('Model Response 2', '')

        prompt = prompt_templates.create_judge_prompt(original_prompt, response_1, response_2)
        prompts.append((row, prompt))  # Keep the original row paired with the prompt

    # Initialize aisuite client
    client = ai.Client()

    # Prepare to store responses
    updated_rows = []

    # Iterate over the prompts and get responses from the model
    for row, prompt in prompts:
        ai_preference = None

        # Run the prompt multiple times
        for _ in range(num_runs):
            try:
                # Prepare the message for the model
                messages = [{"role": "system", "content": prompt}]
                
                # Send the prompt to the model via aisuite
                response = client.chat.completions.create(
                    model=model,
                    messages=messages,
                    temperature=temperature,
                    max_tokens=max_tokens
                )

                # Extract the response and use it as the AI preference
                ai_preference = response.choices[0].message.content.strip()
                print(f"AI Preference: {ai_preference}")
                break  # Exit loop on successful response
            except Exception as e:
                print(f"Error with model {model}: {e}")
                ai_preference = f"ERROR: {str(e)}"

        # Update the row with the AI Preference
        row['AI Preference'] = ai_preference
        updated_rows.append(row)

    # Save the updated rows to a new CSV file
    output_file_path = os.path.splitext(csv_file_path)[0] + '_with_ai_preference_4o_mini.csv'
    with open(output_file_path, mode='w', encoding='utf-8', newline='') as outputfile:
        writer = csv.DictWriter(outputfile, fieldnames=fieldnames)
        writer.writeheader()
        writer.writerows(updated_rows)

    print(f"Processed file saved to {output_file_path}")

# Example usage
csv_file_path = 'LLMAnnotation_GroqLLAMA_Shashank.csv'
csv_file_paths = ['LLMAnnotation-gpt40-shashank.csv','LLMAnnotation_GroqLLAMA_Shashank.csv','LLM_AnnotationgrokLLAMA3_2-Shreyash.csv', 'LLMAnnotation_GroqGemma_Shashank.csv']

model = 'openai:gpt-4o-mini'
for path in csv_file_paths: 
    llm_llm_eval(path, model)


AI Preference: 1
AI Preference: 1
AI Preference: 2
AI Preference: 2
AI Preference: 2
AI Preference: 2
AI Preference: 2
AI Preference: 2
AI Preference: 2
AI Preference: 1
AI Preference: 1
AI Preference: 1
AI Preference: 2
AI Preference: 2
AI Preference: 2
AI Preference: 2
AI Preference: 2
AI Preference: 1
AI Preference: 2
AI Preference: 2
AI Preference: 2
AI Preference: 1
AI Preference: 2
AI Preference: 2
Processed file saved to LLMAnnotation-gpt40-shashank_with_ai_preference_4o_mini.csv
AI Preference: 2
AI Preference: 2
AI Preference: 2
AI Preference: 1
AI Preference: 2
AI Preference: 2
AI Preference: 2
AI Preference: 2
AI Preference: 2
AI Preference: 2
AI Preference: 1
AI Preference: 1
AI Preference: 1
AI Preference: 2
AI Preference: 1
AI Preference: 2
AI Preference: 2
AI Preference: 2
AI Preference: 1
AI Preference: 2
AI Preference: 2
AI Preference: 2
AI Preference: 1
AI Preference: 1
Processed file saved to LLMAnnotation_GroqLLAMA_Shashank_with_ai_preference_4o_mini.csv
AI Preferenc

### Score LM Evaluations to human evaluations 

In [17]:
import csv

def evaluate_ai_preference_matches(file_paths: list[str]):
    """Evaluates the match percentage between 'AI Preference' and 'Preferred Choice' for multiple files."""
    for file_path in file_paths:
        total_rows = 0
        matching_rows = 0

        try:
            # Read the CSV file
            with open(file_path, mode='r', encoding='utf-8') as csvfile:
                reader = csv.DictReader(csvfile)
                if 'AI Preference' not in reader.fieldnames or 'Preferred_Response' not in reader.fieldnames:
                    print(f"Skipping {file_path}: Missing required columns.")
                    continue

                for row in reader:
                    total_rows += 1
                    if row['AI Preference'] == row['Preferred_Response']:
                        matching_rows += 1

            # Calculate and print the match percentage
            match_percentage = (matching_rows / total_rows) * 100 if total_rows > 0 else 0
            print(f"{file_path}: Match Percentage: {match_percentage:.2f}% ({matching_rows}/{total_rows} matches)")
        
        except Exception as e:
            print(f"Error processing {file_path}: {e}")

# Example usage
output_csv_paths = [
    'LLM_AnnotationgrokLLAMA3_2-Shreyash_with_ai_preference_4o.csv',
    'LLMAnnotation_GroqLLAMA_Shashank_with_ai_preference_4o.csv',
    'LLM_AnnotationGROKGEMMA-Shreyash_with_ai_preference_4o.csv',
    'LLMAnnotation-gpt40-shashank_with_ai_preference_with_ai_preference_4o.csv'
]
evaluate_ai_preference_matches(output_csv_paths)


Error processing LLM_AnnotationgrokLLAMA3_2-Shreyash_with_ai_preference_4o.csv: [Errno 2] No such file or directory: 'LLM_AnnotationgrokLLAMA3_2-Shreyash_with_ai_preference_4o.csv'
Error processing LLMAnnotation_GroqLLAMA_Shashank_with_ai_preference_4o.csv: [Errno 2] No such file or directory: 'LLMAnnotation_GroqLLAMA_Shashank_with_ai_preference_4o.csv'
Error processing LLM_AnnotationGROKGEMMA-Shreyash_with_ai_preference_4o.csv: [Errno 2] No such file or directory: 'LLM_AnnotationGROKGEMMA-Shreyash_with_ai_preference_4o.csv'
Error processing LLMAnnotation-gpt40-shashank_with_ai_preference_4o.csv: [Errno 2] No such file or directory: 'LLMAnnotation-gpt40-shashank_with_ai_preference_4o.csv'
