In [49]:
import pandas as pd
         
CSV_FILE = 'test_results_with_questions.csv'

fieldnames = [
    'section', 'dataset', 'embedder', 'chunking_detail', 'timestamp', 'extra_info', 'device', 'evalmodel', 'Prompt', 'ModelName',
    'Temperature', 'TopK', 'SimilarityThresholdDocuments',
    'SimilarityThresholdQuestions', 'runId', 'category',
    'TFIDFScore', 'ResponseTime', 'answer_correctness',
    'faithfulness', 'answer_similarity', 'answer_relevancy', 'context_precision',
    'context_relevancy', 'context_recall', 'response_json', 'similarityLevel']
# Read the CSV file into a DataFrame
df = pd.read_csv(CSV_FILE, usecols=fieldnames)

# Ensure numeric columns are indeed numeric
numeric_cols = [
    'TFIDFScore', 'ResponseTime', 'answer_similarity', 'answer_correctness'
]

# Convert the specified columns to numeric, coercing errors to NaN
df[numeric_cols] = df[numeric_cols].apply(pd.to_numeric, errors='coerce')

# # Drop rows with NaN values in numeric columns to avoid issues during aggregation
# df = df.dropna(subset=numeric_cols)

comparison_df = df.groupby(['extra_info'])[numeric_cols].mean().reset_index()

print(comparison_df)
print(df.shape[0])


      extra_info  TFIDFScore  ResponseTime  answer_similarity  \
0    no_question    0.296113   4547.795094           0.664729   
1  with_question    0.437722   4409.581180           0.736551   

   answer_correctness  
0            0.463972  
1            0.640519  
1479


In [48]:
import pandas as pd
import json

# Read the CSV file into a DataFrame
CSV_FILE = 'test_results_with_questions.csv'

fieldnames = [
    'section', 'dataset', 'embedder', 'chunking_detail', 'timestamp', 'extra_info', 'device', 'evalmodel', 'Prompt', 'ModelName',
    'Temperature', 'TopK', 'SimilarityThresholdDocuments',
    'SimilarityThresholdQuestions', 'runId', 'category',
    'TFIDFScore', 'ResponseTime', 'answer_correctness',
    'faithfulness', 'answer_similarity', 'answer_relevancy', 'context_precision',
    'context_relevancy', 'context_recall', 'response_json', 'similarityLevel'
]

# Read the CSV file into a DataFrame
df = pd.read_csv(CSV_FILE, usecols=fieldnames)

# Ensure numeric columns are indeed numeric
numeric_cols = [
    'TFIDFScore', 'ResponseTime', 'answer_similarity', 'answer_correctness', 'similarityLevel',
]

# Convert the specified columns to numeric, coercing errors to NaN
df[numeric_cols] = df[numeric_cols].apply(pd.to_numeric, errors='coerce')

# Filter the DataFrame by 'extra_info' and similarityLevel
df = df[df['similarityLevel'] < 0.5]
no_question_df = df[df['extra_info'] == 'no_question']
with_question_df = df[df['extra_info'] == 'with_question']

# Function to extract question from response_json
def extract_question(json_str):
    try:
        data = json.loads(json_str)
        return data.get('question', None)
    except json.JSONDecodeError:
        return None

# Extract questions for merging
no_question_df['question'] = no_question_df['response_json'].apply(extract_question)
with_question_df['question'] = with_question_df['response_json'].apply(extract_question)

# Merge the two DataFrames on question
merged_df = no_question_df.merge(
    with_question_df,
    on='question',
    suffixes=('_no', '_with')
)

# Calculate the difference in TFIDFScore
merged_df['TFIDF_diff'] = abs(merged_df['TFIDFScore_with'] - merged_df['TFIDFScore_no'])

# Rank by the difference in TFIDFScore
top_5_diff = merged_df.nlargest(10, 'TFIDF_diff')

# Log the top 5 results with TFIDF difference and response_json for both sections
top_5_results = top_5_diff[['question', 'TFIDF_diff', 'response_json_no', 'response_json_with']]

# Print the top 5 results
print(top_5_results.to_string(index=False))

# Print detailed information
for index, row in top_5_results.iterrows():
    print(f"Question: {row['question']}")
    print(f"TFIDF Difference: {row['TFIDF_diff']}")
    print(f"No Question Response JSON: {row['response_json_no']}")
    print(f"With Question Response JSON: {row['response_json_with']}")
    print("-" * 80)


                                                                     question  TFIDF_diff                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                               

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  no_question_df['question'] = no_question_df['response_json'].apply(extract_question)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  with_question_df['question'] = with_question_df['response_json'].apply(extract_question)


In [6]:
import pandas as pd

CSV_FILE = 'test_results_no_prompt.csv'

fieldnames = [
    'section', 'dataset', 'embedder', 'chunking_detail', 'timestamp', 'extra_info', 'device', 'evalmodel', 'Prompt', 'ModelName',
    'Temperature', 'TopK', 'SimilarityThresholdDocuments',
    'SimilarityThresholdQuestions', 'runId', 'category',
    'TFIDFScore', 'ResponseTime', 'answer_correctness',
    'faithfulness', 'answer_similarity', 'answer_relevancy', 'context_precision',
    'context_relevancy', 'context_recall', 'response_json', 'similarityLevel'
]

# Read the CSV file into a DataFrame
df = pd.read_csv(CSV_FILE, usecols=fieldnames)

# Ensure numeric columns are indeed numeric
numeric_cols = [
    'TFIDFScore', 'ResponseTime', 'answer_similarity', 'answer_correctness'
]

# Convert the specified columns to numeric, coercing errors to NaN
df[numeric_cols] = df[numeric_cols].apply(pd.to_numeric, errors='coerce')

# Drop rows with NaN values in numeric columns to avoid issues during aggregation
df = df.dropna(subset=numeric_cols)

# Save the comparison DataFrame to a CSV file for further analysis
comparison_df.to_csv('comparison_results.csv', index=False)

dataset_comparison_df = df.groupby(['chunking_details'])[numeric_cols].mean().reset_index()

print(dataset_comparison_df)


  chunking_detail  TFIDFScore  ResponseTime  answer_similarity  \
0    no_questions    0.311621   6137.299079           0.677357   
1  with_questions    0.439530   5128.496131           0.737213   

   answer_correctness  
0            0.500571  
1            0.655102  


In [18]:
import pandas as pd
import json
CSV_FILE = 'test_results_no_prompt.csv'

fieldnames = [
    'section', 'dataset', 'embedder', 'chunking_detail', 'timestamp', 'extra_info', 'device', 'evalmodel', 'Prompt', 'ModelName',
    'Temperature', 'TopK', 'SimilarityThresholdDocuments',
    'SimilarityThresholdQuestions', 'runId', 'category',
    'TFIDFScore', 'ResponseTime', 'answer_correctness',
    'faithfulness', 'answer_similarity', 'answer_relevancy', 'context_precision',
    'context_relevancy', 'context_recall', 'response_json', 'similarityLevel'
]

# Read the CSV file into a DataFrame
df = pd.read_csv(CSV_FILE, usecols=fieldnames)

# Ensure numeric columns are indeed numeric
numeric_cols = [
    'TFIDFScore', 'ResponseTime', 'answer_similarity', 'answer_correctness'
]
# Parse the 'response_json' to extract the 'answer' field
def extract_answer(json_str):
    try:
        response = json.loads(json_str)
        return response.get('answer', '')
    except json.JSONDecodeError:
        return ''

# Create a new column with the extracted answers
df['extracted_answer'] = df['response_json'].apply(extract_answer)

# Set scores to 0 if the answer contains "I don't know"
condition = df['extracted_answer'].str.contains("I don't know", case=False)
df.loc[condition, numeric_cols] = 0

# Convert the specified columns to numeric, coercing errors to NaN
df[numeric_cols] = df[numeric_cols].apply(pd.to_numeric, errors='coerce')

# Drop rows with NaN values in numeric columns to avoid issues during aggregation
df = df.dropna(subset=numeric_cols)

dataset_comparison_df = df.groupby(['ModelName'])[numeric_cols].mean().reset_index()

print(dataset_comparison_df)


    ModelName  TFIDFScore  ResponseTime  answer_similarity  answer_correctness
0  llama3:70b    0.357617   5774.539585           0.698881            0.556139
