In [21]:
import os
import pandas as pd

In [22]:
# Function to parse the QREL file
def parse_qrels(qrels_file):
    qrels = {}
    with open(qrels_file, 'r') as file:
        for line in file:
            query_id, _, doc_id, relevance = line.split()
            relevance = int(relevance)
            if query_id not in qrels:
                qrels[query_id] = {}
            qrels[query_id][doc_id] = relevance
    return qrels

In [23]:
# Function to calculate precision at k
def precision_at_k(relevant, retrieved, k):
    if k > len(retrieved):
        k = len(retrieved)
    retrieved_at_k = retrieved[:k]
    true_positives = sum([1 for doc_id in retrieved_at_k if doc_id in relevant and relevant[doc_id] > 0])
    return true_positives / k

# Function to calculate Mean Average Precision (MAP) at k
def mean_average_precision_at_k(relevant, retrieved, k):
    precision_values = []
    relevant_count = 0
    for i, doc_id in enumerate(retrieved[:k]):
        if doc_id in relevant and relevant[doc_id] > 0:
            relevant_count += 1
            precision_values.append(relevant_count / (i + 1))  # Calculate precision at each relevant document rank
    if not precision_values:
        return 0.0
    return sum(precision_values) / len(precision_values)

# Function to calculate P and MAP
def calculate_metrics(df, qrel_dict):
    results = []
    grouped = df.groupby('topicId')
    
    for topic, group in grouped:
        relevant_docs = qrel_dict.get(str(topic), {})  # Ensure topic is a string
        retrieved_docs = group.sort_values('ranking')['docId'].tolist()
        
        p_at_10 = precision_at_k(relevant_docs, retrieved_docs, 10)
        p_at_50 = precision_at_k(relevant_docs, retrieved_docs, 50)
        map_at_100 = mean_average_precision_at_k(relevant_docs, retrieved_docs, 100) #MAP
        map_at_20 = mean_average_precision_at_k(relevant_docs, retrieved_docs, 20) #MAP
        
        results.append({'topicId': topic, 'P@10': p_at_10, 'P@50': p_at_50, 'MAP@100': map_at_100, 'MAP@20': map_at_20})
    
    return results

In [24]:
# Define paths
qrel_file_path = 'D:\\VSCODE PROJECT\\IR\\dataset\\qrels.trec8.csv'
input_dir = 'D:\\VSCODE PROJECT\\IR\\cleaned'  # Replace with your input files directory
output_dir = 'D:\\VSCODE PROJECT\\IR\\scores'
output_file_name = 'final_results.csv'
output_path = os.path.join(output_dir, output_file_name)


In [25]:
# Ensure the output directory exists
os.makedirs(output_dir, exist_ok=True)

# Parse the QREL file
qrel_dict = parse_qrels(qrel_file_path)

# Get the list of input files
input_files = [os.path.join(input_dir, file) for file in os.listdir(input_dir) if file.endswith('.csv')]

# Initialize a DataFrame to store all results
all_results = []

# Process each input file
for file in input_files:
    try:
        # Read the input dataset
        input_df = pd.read_csv(file, delimiter=',', names=['topicId', 'identifier', 'docId', 'ranking', 'similarityScore', 'systemName'])
        
        # Calculate metrics
        results = calculate_metrics(input_df, qrel_dict)
        
        # Convert to DataFrame
        results_df = pd.DataFrame(results)
        results_df['system'] = os.path.basename(file).split('.')[0]
        
        # Append to all results
        all_results.append(results_df)
    except Exception as e:
        print(f"Error processing file {file}: {e}")

In [26]:
# Concatenate all results into a single DataFrame
if all_results:
    final_results = pd.concat(all_results, ignore_index=True)
    # Save the final results to a file
    final_results.to_csv(output_path, index=False)
    
    # Display the final results in separate tables for P@10, P@100, AP@100, and MAP@100
    table_format_p10 = final_results.pivot(index='topicId', columns='system', values='P@10')
    table_format_p50 = final_results.pivot(index='topicId', columns='system', values='P@50')
    table_format_map100 = final_results.pivot(index='topicId', columns='system', values='MAP@100')
    table_format_map20 = final_results.pivot(index='topicId', columns='system', values='MAP@20')

else:
    print("No valid data found to concatenate.")


In [27]:
print("\nTable for P@10:")
print(table_format_p10)


Table for P@10:
system   Dm8Nbn  Flab8at  Flab8ax  GE8MTD2  MITSLStd  Mer8Adtd2  UB99T  \
topicId                                                                  
401         0.0      0.2      0.3      1.0       0.3        0.2    0.0   
402         0.6      0.6      0.9      0.6       0.7        0.6    0.0   
403         0.6      0.9      1.0      1.0       0.9        0.7    0.5   
404         0.1      0.2      0.3      0.4       0.4        0.4    0.1   
405         0.2      0.5      0.4      0.6       0.2        0.3    0.1   
406         0.2      0.5      0.5      0.4       0.4        0.4    0.0   
407         0.3      0.9      1.0      0.6       0.7        0.8    0.1   
408         0.1      0.3      0.4      0.7       0.4        0.4    0.0   
409         0.2      0.2      0.2      0.4       0.4        0.3    0.1   
410         0.5      1.0      1.0      1.0       1.0        1.0    0.0   
411         0.1      0.5      0.7      0.8       0.5        0.5    0.1   
412         0.0      

In [28]:
print("\nTable for P@50:")
print(table_format_p50)


Table for P@50:
system   Dm8Nbn  Flab8at  Flab8ax  GE8MTD2  MITSLStd  Mer8Adtd2  UB99T  \
topicId                                                                  
401        0.04     0.12     0.10     0.92      0.24       0.10   0.00   
402        0.24     0.30     0.52     0.30      0.36       0.28   0.06   
403        0.32     0.40     0.40     0.38      0.40       0.36   0.16   
404        0.06     0.20     0.26     0.28      0.36       0.38   0.18   
405        0.08     0.22     0.30     0.34      0.20       0.22   0.10   
406        0.04     0.18     0.20     0.18      0.20       0.14   0.00   
407        0.10     0.54     0.52     0.38      0.48       0.36   0.06   
408        0.20     0.36     0.44     0.36      0.50       0.42   0.06   
409        0.08     0.10     0.10     0.14      0.16       0.18   0.08   
410        0.22     0.90     0.88     0.74      0.84       0.82   0.02   
411        0.04     0.22     0.24     0.20      0.22       0.16   0.04   
412        0.00     0

In [29]:
print("\nTable for MAP@100:")
print(table_format_map100)


Table for MAP@100:
system     Dm8Nbn   Flab8at   Flab8ax   GE8MTD2  MITSLStd  Mer8Adtd2  \
topicId                                                                
401      0.038379  0.198119  0.303756  0.923190  0.295493   0.226557   
402      0.337771  0.450753  0.661927  0.505900  0.575764   0.467920   
403      0.498739  0.845268  0.908171  0.905682  0.833250   0.601410   
404      0.091123  0.208642  0.319368  0.380707  0.439540   0.405344   
405      0.319825  0.411459  0.429631  0.581723  0.305515   0.265819   
406      0.833333  0.454965  0.438530  0.506038  0.518334   0.447888   
407      0.175937  0.699331  0.664585  0.588581  0.639461   0.547678   
408      0.233619  0.419348  0.466353  0.534153  0.490964   0.420106   
409      0.476974  0.238386  0.259828  0.450925  0.349684   0.377169   
410      0.369070  0.928704  0.931946  0.878483  0.895715   0.917806   
411      0.150000  0.673094  0.632199  0.763364  0.558358   0.423198   
412      0.022988  0.740493  0.236281  0.763

In [30]:
print("\nTable for MAP@20:")
print(table_format_map20)


Table for MAP@20:
system     Dm8Nbn   Flab8at   Flab8ax   GE8MTD2  MITSLStd  Mer8Adtd2  \
topicId                                                                
401      0.000000  0.462500  0.594298  0.994598  0.473640   0.503268   
402      0.496145  0.806443  0.895048  0.837675  0.784287   0.731411   
403      0.628732  0.926051  1.000000  0.996324  0.899611   0.671099   
404      0.200000  0.174325  0.469147  0.542783  0.619844   0.467390   
405      0.383333  0.527655  0.608913  0.730911  0.353580   0.336508   
406      0.833333  0.622076  0.547445  0.587975  0.722673   0.540142   
407      0.340741  0.925462  0.933333  0.776573  0.735739   0.798311   
408      0.222517  0.591409  0.612554  0.818875  0.710197   0.573148   
409      0.608187  0.433824  0.460852  0.852632  0.459365   0.535965   
410      0.502400  1.000000  0.997368  1.000000  0.969586   1.000000   
411      0.150000  0.702473  0.815846  0.939043  0.687149   0.632275   
412      0.000000  0.914174  0.128788  0.9511