In [2]:
# ==============================================
# Information Retrieval Evaluation Metrics
# ==============================================

# ====== Install Required Libraries ======
!pip install pandas openpyxl 


# ====== Import Libraries ======
import pandas as pd
import math

# ====== Step 1: Read Dataset ======
# Ensure your Excel file "cranfield_q1_dataset.xlsx" is in the same directory
df = pd.read_excel("cranfield_q1_dataset.xlsx")

# ====== Step 2: Filter Query q1 ======
df_q1 = df[df["Query ID"] == "q1"].sort_values("Rank")

retrieved = df_q1["Document ID"]
relevant = set(df_q1[df_q1["Is_Relevant"] == "Yes"]["Document ID"])

# ====== Step 3: Define Metric Functions ======
def precision(retrieved, relevant):
    if not len(retrieved): 
        return 0
    return sum(doc in relevant for doc in retrieved) / len(retrieved)

def recall(retrieved, relevant):
    if not len(relevant): 
        return 0
    return sum(doc in relevant for doc in retrieved) / len(relevant)

def f_measure(p, r):
    if (p + r) == 0: 
        return 0
    return (2 * p * r) / (p + r)

def e_measure(p, r, beta=1):
    if p == 0 and r == 0: 
        return 1
    return 1 - ((1 + beta**2) * p * r) / (beta**2 * p + r)

# ====== Step 4: Compute DCG and NDCG ======
dcg = sum(1 / math.log2(i + 2) for i, doc in enumerate(retrieved) if doc in relevant)
idcg = sum(1 / math.log2(i + 2) for i in range(min(len(retrieved), len(relevant))))
ndcg = dcg / idcg if idcg else 0

# ====== Step 5: Compute Metrics ======
p = precision(retrieved, relevant)
r = recall(retrieved, relevant)
f = f_measure(p, r)
e = e_measure(p, r)

# ====== Step 6: Display Results ======
print(f"Precision: {p:.3f}")
print(f"Recall: {r:.3f}")
print(f"F-Measure: {f:.3f}")
print(f"E-Measure: {e:.3f}")
print(f"NDCG: {ndcg:.3f}")


Precision: 0.500
Recall: 1.000
F-Measure: 0.667
E-Measure: 0.333
NDCG: 0.889



[notice] A new release of pip is available: 25.1.1 -> 25.3
[notice] To update, run: python.exe -m pip install --upgrade pip
