In [None]:
import pandas as pd

def calculate_overall_accuracy():
    models = [
        ("gpt4", "results_gpt4_fewshot.csv", "results_gpt4_zeroshot.csv"),
        ("gpt35", "results_gpt35_fewshot.csv", "results_gpt35_zeroshot.csv"),
        ("llama3", "results_llama3_fewshot.csv", "results_llama3_zeroshot.csv"),
        ("opus", "results_opus_fewshot.csv", "results_opus_zeroshot.csv"),
        ("sonnet", "results_sonnet_fewshot.csv", "results_sonnet_zeroshot.csv"),
        ("mistral", "results_mistral_fewshot.csv", "results_mistral_zeroshot.csv"),
    ]
    
    base_path = "/Users/tomsheffer/Documents/advanced_llm/"
    
    overall_results = []

    for model_name, fewshot_file, zeroshot_file in models:
        for file_type, file_name in [("Few-shot", fewshot_file), ("Zero-shot", zeroshot_file)]:
            file_path = base_path + file_name
            try:
                df = pd.read_csv(file_path)
                df['first_resp'] = df['filtered_resps'].apply(lambda x: x[2:-2].split("', '")[0].lower())
                df['target'] = df['target'].str.lower()

                # Detection task: targets are 'llm' or 'human'
                df_detection = df[df['target'].isin(['llm', 'human'])]
                detection_accuracy = df_detection['first_resp'].eq(df_detection['target']).mean()

                # Selection task: targets are '1' or '2'
                df_selection = df[df['target'].isin(['1', '2'])]
                selection_accuracy = df_selection['first_resp'].eq(df_selection['target']).mean()

                print(f"{model_name} {file_type} - Detection Accuracy: {detection_accuracy:.2%}")
                print(f"{model_name} {file_type} - Selection Accuracy: {selection_accuracy:.2%}")

                # Append results for Overleaf code
                overall_results.append((model_name, file_type, detection_accuracy, selection_accuracy))

            except Exception as e:
                print(f"Failed to calculate accuracy for {file_type} {model_name}: {str(e)}")
    
    # Generate Overleaf code
    print("\nOverleaf Code for Overall Accuracy:")
    for model_name, file_type, detection_accuracy, selection_accuracy in overall_results:
        print(f"\\textbf{{{model_name} {file_type}}} & Detection: {detection_accuracy:.2%} & Selection: {selection_accuracy:.2%} \\\\")

# Call the function to calculate overall accuracy and generate Overleaf code
calculate_overall_accuracy()


In [None]:
def calculate_accuracy_by_creation_model():
    models = [
        ("gpt4", "results_gpt4_fewshot.csv", "results_gpt4_zeroshot.csv"),
        ("gpt35", "results_gpt35_fewshot.csv", "results_gpt35_zeroshot.csv"),
        ("llama3", "results_llama3_fewshot.csv", "results_llama3_zeroshot.csv"),
        ("opus", "results_opus_fewshot.csv", "results_opus_zeroshot.csv"),
        ("sonnet", "results_sonnet_fewshot.csv", "results_sonnet_zeroshot.csv"),
        ("mistral", "results_mistral_fewshot.csv", "results_mistral_zeroshot.csv"),
    ]
    
    base_path = "/Users/tomsheffer/Documents/advanced_llm/"
    model_keywords = ["gpt4", "claude-3-haiku", "llama-3", "mistral"]
    
    pd.set_option('display.max_colwidth', None)
    
    model_results = []

    for model_name, fewshot_file, zeroshot_file in models:
        for file_type, file_name in [("Few-shot", fewshot_file), ("Zero-shot", zeroshot_file)]:
            file_path = base_path + file_name
            try:
                df = pd.read_csv(file_path)
                df['first_resp'] = df['filtered_resps'].apply(lambda x: x[2:-2].split("', '")[0].lower())
                df['target'] = df['target'].str.lower()
                df['doc'] = df['doc'].str.lower()

                def classify_model(doc):
                    for keyword in model_keywords:
                        if keyword in doc:
                            return keyword
                    return 'Unknown'

                df['creation_model'] = df['doc'].apply(classify_model)

                # Show only selection accuracies split by creation model
                print(f"\n{model_name} {file_type} - Selection Accuracies by Creation Model:")
                for creation_model in model_keywords + ['Unknown']:
                    model_df = df[df['target'].isin(['1', '2']) & (df['creation_model'] == creation_model)]
                    if not model_df.empty:
                        model_accuracy = model_df['first_resp'].eq(model_df['target']).mean()
                        print(f"  {creation_model}: {model_accuracy:.2%}")
                        model_results.append((model_name, file_type, creation_model, model_accuracy))
                    else:
                        print(f"  {creation_model}: No data available")

            except Exception as e:
                print(f"Failed to calculate accuracy for {file_type} {model_name}: {str(e)}")
    
    # Generate Overleaf code
    print("\nOverleaf Code for Selection Accuracy by Creation Model:")
    for model_name, file_type, creation_model, model_accuracy in model_results:
        print(f"\\textbf{{{model_name} {file_type}}} - {creation_model} & Selection Accuracy: {model_accuracy:.2%} \\\\")

# Call the function to calculate accuracy by creation model and generate Overleaf code
calculate_accuracy_by_creation_model()
