In [7]:
import pandas as pd
import math

# Load Excel dataset
df = pd.read_excel("cranfield_q1_dataset.xlsx")

# Filter only query q1
df_q1 = df[df["Query ID"] == "q1"].sort_values("Rank")

# Retrieved and relevant sets
retrieved = df_q1["Document ID"].tolist()
relevant = set(df_q1[df_q1["Is_Relevant"] == "Yes"]["Document ID"].tolist())

# Precision
def precision(retrieved, relevant):
    if not retrieved: return 0
    return sum(doc in relevant for doc in retrieved) / len(retrieved)

# Recall
def recall(retrieved, relevant):
    if not relevant: return 0
    return sum(doc in relevant for doc in retrieved) / len(relevant)

# F-measure
def f_measure(p, r):
    if (p + r) == 0: return 0
    return 2 * p * r / (p + r)

# E-measure
def e_measure(p, r, beta=1):
    if p == 0 and r == 0: return 1
    return 1 - ((1 + beta**2) * p * r) / (beta**2 * p + r)

# DCG
dcg = sum(1 / math.log2(i+2) for i, d in enumerate(retrieved) if d in relevant)
idcg = sum(1 / math.log2(i+2) for i in range(min(len(relevant), len(retrieved))))
ndcg = dcg / idcg if idcg else 0

# ---- Run metrics ----
p = precision(retrieved, relevant)
r = recall(retrieved, relevant)
f = f_measure(p, r)
e = e_measure(p, r)

print(f"Precision: {p:.3f}")
print(f"Recall: {r:.3f}")
print(f"F-measure: {f:.3f}")
print(f"E-measure: {e:.3f}")
print(f"NDCG: {ndcg:.3f}")

Precision: 0.500
Recall: 1.000
F-measure: 0.667
E-measure: 0.333
NDCG: 0.889


In [None]:
import pandas as pd
import math

df = pd.read_excel("cranfield_q1_dataset.xlsx")


df_q1 = df[df["Query ID"] == "q1"].sort_values("Rank")
retrieved = df_q1["Document ID"]
relevant = set(df_q1[df_q1["Is_Relevant"] == "Yes"]["Document ID"])

def precision (retrieved, relevant):
    if not len(retrieved): return 0
    return sum(doc in relevant for doc in retrieved) / len(retrieved)

def recall (retrieved, relevant):
    if not len(relevant): return 0
    return sum(doc in relevant for doc in retrieved) / len(relevant)

def f_measure (p,r):
    if (p+r) == 0: return 0
    return (2*p*r)/(p+r)

def e_measure (p,r, beta = 1):
    if p == 0 and r == 0: return 1
    return 1 - ((1 + beta **2) *p*r)/(beta**2 *p + r)

dcg = sum (1 / math.log2(i+2) for i, doc in enumerate(retrieved) if doc in relevant)
idcg = sum(1 / math.log2(i+2) for i in range(min(len(retrieved), len(relevant))))
ndcg = dcg / idcg if idcg else 0

p = precision (retrieved, relevant)
r = recall (retrieved, relevant)
f = f_measure (p,r)
e = e_measure (p,r)

print(f"Precisionn: {p:.3f}")
print(f"Recalll: {r:.3f}")
print(f"F_Mesaureeee: {f:.3f}")
print(f"E_Measureee: {e:.3f}")
print(f"NDCGGG: {ndcg:.3f}")



Precisionn: 0.500
Recalll: 1.000
F_Mesaureeee: 0.667
E_Measureee: 0.333
NDCGGG: 0.889


In [2]:
import pandas as pd
import math

df = pd.read_excel("cranfield_q1_dataset.xlsx")

# Filter query q1 and lists
df_q1 = df[df["Query ID"] == "q1"].sort_values("Rank")
retrieved = df_q1["Document ID"]
relevant = set(df_q1[df_q1["Is_Relevant"] == "Yes"]["Document ID"])

# Precision, Recall, F, E
tp = sum(retrieved.isin(relevant))
p = tp / len(retrieved) if len(retrieved) else 0
r = tp / len(relevant) if len(relevant) else 0
f = 2*p*r/(p+r) if p+r else 0
e = 1 - (2*p*r/(p+r)) if p+r else 1   # E-measure (Î²=1)

# NDCG
dcg = sum(1 / math.log2(i+2) for i, d in enumerate(retrieved) if d in relevant)
idcg = sum(1 / math.log2(i+2) for i in range(min(len(relevant), len(retrieved))))
ndcg = dcg / idcg if idcg else 0

print(f"Precision: {p:.3f}")
print(f"Recall: {r:.3f}")
print(f"F-measure: {f:.3f}")
print(f"E-measure: {e:.3f}")
print(f"NDCG: {ndcg:.3f}")


Precision: 0.500
Recall: 1.000
F-measure: 0.667
E-measure: 0.333
NDCG: 0.889
