In [7]:
import os
import pandas as pd

# Function to parse the QREL file (standard TREC adhoc format)
def parse_qrels(qrels_file):
    qrels = {}
    with open(qrels_file, 'r') as file:
        for line in file:
            topic, _, doc_id, relevance = line.split()
            relevance = int(relevance)
            if topic not in qrels:
                qrels[topic] = {}
            qrels[topic][doc_id] = relevance
    return qrels

# Function to calculate precision at k
def precision_at_k(relevant, retrieved, k):
    retrieved_at_k = retrieved[:k]
    true_positives = sum([1 for doc_id in retrieved_at_k if doc_id in relevant])
    return true_positives / k

# Function to calculate average precision at k
def average_precision(relevant, retrieved, k):
    precisions = [precision_at_k(relevant, retrieved, i + 1) for i in range(k) if retrieved[i] in relevant]
    if not precisions:
        return 0.0
    return sum(precisions) / len(precisions)

# Function to calculate P@10 and AP@10
def calculate_metrics(df, qrels, k=10):
    p_at_10 = []
    ap_at_10 = []
    grouped = df.groupby('topicId')
    
    for topic, group in grouped:
        relevant_docs = qrels.get(topic, {})
        retrieved_docs = group.sort_values('ranking')['docId'].tolist()
        p_at_10.append((topic, precision_at_k(relevant_docs, retrieved_docs, k)))
        ap_at_10.append((topic, average_precision(relevant_docs, retrieved_docs, k)))
    
    return p_at_10, ap_at_10

# Define paths
qrel_file_path = 'D:\VSCODE PROJECT\IR\dataset\qrels.trec8.adhoc'
input_dir = 'D:\VSCODE PROJECT\IR\cleaned'
output_dir = 'D:\VSCODE PROJECT\IR\scores'

# Parse the QREL file
qrels = parse_qrels(qrel_file_path)

# Get the list of input files
input_files = [os.path.join(input_dir, file) for file in os.listdir(input_dir) if file.endswith('.txt')]

# Initialize a DataFrame to store all results
all_results = pd.DataFrame()

# Process each input file
for file in input_files:
    # Read the input dataset
    input_df = pd.read_csv(file, delimiter='\t', names=['topicId', 'Q0', 'docId', 'ranking', 'similarityScore', 'system'])
    
    # Calculate P@10 and AP@10
    p_at_10, ap_at_10 = calculate_metrics(input_df, qrels)
    
    # Convert to DataFrame
    p_at_10_df = pd.DataFrame(p_at_10, columns=['topicId', 'P@10'])
    ap_at_10_df = pd.DataFrame(ap_at_10, columns=['topicId', 'AP@10'])
    
    # Merge results
    results_df = pd.merge(p_at_10_df, ap_at_10_df, on='topicId')
    results_df['system'] = os.path.basename(file).split('.')[0]
    
    # Append to all results
    all_results = pd.concat([all_results, results_df])

# Save the final results to a file
output_path = os.path.join(output_dir, 'final_results.csv')
all_results.to_csv(output_path, index=False)

# Display the final results
print(all_results)


    topicId  P@10  AP@10         system
0       401   0.0    0.0  input_ric8dpn
1       402   0.0    0.0  input_ric8dpn
2       403   0.0    0.0  input_ric8dpn
3       404   0.0    0.0  input_ric8dpn
4       405   0.0    0.0  input_ric8dpn
..      ...   ...    ...            ...
45      446   0.0    0.0        plt8ah2
46      447   0.0    0.0        plt8ah2
47      448   0.0    0.0        plt8ah2
48      449   0.0    0.0        plt8ah2
49      450   0.0    0.0        plt8ah2

[250 rows x 4 columns]


In [29]:
# Function to parse the QREL file (standard TREC adhoc format)
def parse_qrels(qrels_file):
    qrels = {}
    with open(qrels_file, 'r') as file:
        for line in file:
            topic, _, doc_id, relevance = line.split()
            relevance = int(relevance)
            if topic not in qrels:
                qrels[topic] = {}
            qrels[topic][doc_id] = relevance
    return qrels

In [31]:
# Define paths
qrel_file_path = 'D:\VSCODE PROJECT\IR\dataset\qrels.trec8.csv'
input_dir = 'D:\VSCODE PROJECT\IR\cleaned'
output_dir = 'D:\VSCODE PROJECT\IR\scores'

# Parse the QREL file
qrels = parse_qrels(qrel_file_path)

print("QREL Dictionary:")
for topic, docs in qrels.items():
    print(f"Topic: {topic}, Docs: {list(docs.items())[:5]}")  # Print first 5 docs for each topic for brevity


QREL Dictionary:
Topic: 401, Docs: [('FBIS3-10009', 0), ('FBIS3-10059', 0), ('FBIS3-10142', 0), ('FBIS3-1026', 0), ('FBIS3-10502', 0)]
Topic: 402, Docs: [('FBIS3-10134', 0), ('FBIS3-10279', 0), ('FBIS3-10291', 0), ('FBIS3-10855', 0), ('FBIS3-10954', 0)]
Topic: 403, Docs: [('FBIS3-10291', 0), ('FBIS3-10690', 0), ('FBIS3-11193', 0), ('FBIS3-1164', 0), ('FBIS3-11691', 0)]
Topic: 404, Docs: [('FBIS3-10109', 0), ('FBIS3-10402', 0), ('FBIS3-1048', 0), ('FBIS3-1067', 0), ('FBIS3-11594', 0)]
Topic: 405, Docs: [('FBIS3-1045', 0), ('FBIS3-1067', 0), ('FBIS3-11379', 0), ('FBIS3-11396', 0), ('FBIS3-1166', 0)]
Topic: 406, Docs: [('FBIS3-10133', 0), ('FBIS3-1411', 0), ('FBIS3-1674', 0), ('FBIS3-1790', 0), ('FBIS3-18526', 0)]
Topic: 407, Docs: [('FBIS3-10216', 0), ('FBIS3-14832', 0), ('FBIS3-16551', 0), ('FBIS3-16631', 0), ('FBIS3-16684', 0)]
Topic: 408, Docs: [('FBIS3-10291', 0), ('FBIS3-12406', 0), ('FBIS3-13345', 0), ('FBIS3-13375', 0), ('FBIS3-13962', 0)]
Topic: 409, Docs: [('FBIS3-10392', 0), ('