In [1]:
import json
import numpy as np

embeded_queries = np.load('embeddings/PlotQA_queries_with_instruction_embeddings.npy')
embeded_info_json = json.load(open('encoded_PlotQA_image_keywords.json', 'r'))
embeded_image = np.load('embeddings/PlotQA_corpus_embeddings.npy')
image_id = np.load('embeddings/PlotQA_corpus_corpus_ids.npy')


FileNotFoundError: [Errno 2] No such file or directory: 'encoded_PlotQA_image_keywords.json'

In [7]:
import csv  
import pytrec_eval  
import logging  
import numpy as np  
import json

import torch
from tqdm import tqdm
  
logging.basicConfig(  
    format="%(asctime)s - %(levelname)s - %(name)s -   %(message)s",  
    datefmt="%m/%d/%Y %H:%M:%S",  
    level=logging.INFO  
)  
logger = logging.getLogger(__name__)  
  
def eval_mrr(qrel, run, cutoff=None):  
    """  
    Compute MRR@cutoff manually.  
    """  
    mrr = 0.0  
    num_ranked_q = 0  
    results = {}  
    for qid in qrel:  
        if qid not in run:  
            continue  
        num_ranked_q += 1  
        docid_and_score = [(docid, score) for docid, score in run[qid].items()]  
        docid_and_score.sort(key=lambda x: x[1], reverse=True)  
        for i, (docid, _) in enumerate(docid_and_score):  
            rr = 0.0  
            if cutoff is None or i < cutoff:  
                if docid in qrel[qid] and qrel[qid][docid] > 0:  
                    rr = 1.0 / (i + 1)  
                    break  
        results[qid] = rr  
        mrr += rr  
    mrr /= num_ranked_q  
    results["all"] = mrr  
    return results  

def retrieve_and_evaluate(query_embeddings, query_ids, corpus_embeddings, keyword_embeddings, corpus_ids, qrels):  
    try: 
        query_embeddings = torch.tensor(query_embeddings)
        corpus_embeddings = torch.tensor(corpus_embeddings)
        keyword_embeddings = torch.tensor(keyword_embeddings)
        # 定义cutoff数组
        cutoffs = [10]
        
        # 打开CSV文件以写入结果
        with open('PlotQA_evaluation_results.csv', mode='w', newline='') as file:
            writer = csv.writer(file)
            # 写入表头
            header = ['image_weight', 'keyword_weight']
            for cutoff in cutoffs:
                header.append(f'ndcg@{cutoff}')
            for cutoff in cutoffs:
                header.append(f'recall@{cutoff}')
            for cutoff in cutoffs:
                header.append(f'mrr@{cutoff}')
            writer.writerow(header)
            
            for weight_i in tqdm(np.arange(0, 1.0, 0.1), desc="Weight Iteration", leave=True):
                weight_k = 1 - weight_i
                weight_i = round(weight_i, 2)
                weight_k = round(weight_k, 2)
                # weight_matrix = np.array([[weight_i, weight_k]])
                weight_matrix = torch.tensor([[weight_i, weight_k]], dtype=torch.float32)
                run = {}  

                for q_idx, q_emb in enumerate(query_embeddings):  
                    qid = query_ids[q_idx]  
                    # scores_i = np.dot(corpus_embeddings, q_emb)  
                    # scores_k = np.dot(keyword_embeddings, q_emb)
                    # scores_k = np.array([item[0] for item in scores_k])

                    # scores_matrix = np.array([scores_i, scores_k])
                    # scores = np.dot(weight_matrix, scores_matrix)
                    
                    # top_k_indices = np.argsort(scores.flatten())[::-1][:10]  # 取前10个  

                    # run[qid] = {corpus_ids[idx]: float(scores.flatten()[idx]) for idx in top_k_indices}  
                    
                    scores_i = torch.matmul(corpus_embeddings, q_emb)
                    scores_k = torch.matmul(keyword_embeddings, q_emb)
                    scores_k = scores_k.squeeze()

                    scores_matrix = torch.stack([scores_i, scores_k])
                    scores = torch.matmul(weight_matrix, scores_matrix)

                    top_k_indices = torch.argsort(scores.flatten(), descending=True)[:10]

                    run[qid] = {corpus_ids[idx]: float(scores.flatten()[idx]) for idx in top_k_indices}

                # 评估
                row = [weight_i, weight_k]
                evaluator = pytrec_eval.RelevanceEvaluator(qrels, {"ndcg_cut", "recall"}) 
                
                eval_results = evaluator.evaluate(run)
                
                for cutoff in cutoffs:
                    ndcg_measure = f"ndcg_cut_{cutoff}"
                    
                    evaluator = pytrec_eval.RelevanceEvaluator(qrels, {ndcg_measure}) 
                    eval_results = evaluator.evaluate(run)
                    
                    # recall_measure = f"recall_{cutoff}"
                    
                    ndcg_value = pytrec_eval.compute_aggregated_measure(
                        ndcg_measure, [query_measures[ndcg_measure] for query_measures in eval_results.values()]
                    )
                    
                    # recall_value = pytrec_eval.compute_aggregated_measure(
                    #     recall_measure, [query_measures[recall_measure] for query_measures in eval_results.values()]
                    # )
                    
                    row.append(ndcg_value)
                
                for cutoff in cutoffs:
                    recall_measure = f"recall_{cutoff}"
                    
                    evaluator = pytrec_eval.RelevanceEvaluator(qrels, {recall_measure}) 
                    eval_results = evaluator.evaluate(run)
                
                    recall_value = pytrec_eval.compute_aggregated_measure(
                        recall_measure, [query_measures[recall_measure] for query_measures in eval_results.values()]
                    )
                    # recall_value = pytrec_eval.compute_aggregated_measure(
                    #     f"recall_{cutoff}", [query_measures[f"recall_{cutoff}"] for query_measures in eval_results.values()]
                    # )
                    row.append(recall_value)
                
                for cutoff in cutoffs:
                    mrr_value = eval_mrr(qrels, run, cutoff)['all']
                    row.append(mrr_value)
                
                writer.writerow(row)

    except Exception as e:  
        logger.error(f"Error during retrieval and evaluation: {e}") 
  
def load_beir_qrels(qrels_file):  
    qrels = {}  
    try:  
        with open(qrels_file) as f:  
            tsvreader = csv.DictReader(f, delimiter="\t")  
            for row in tsvreader:  
                qid = row["query-id"]  
                pid = row["corpus-id"]  
                rel = int(row["score"])  
                if qid in qrels:  
                    qrels[qid][pid] = rel  
                else:  
                    qrels[qid] = {pid: rel}  
    except Exception as e:  
        logger.error(f"Error loading qrels file: {e}")  
    return qrels 
 
def load_embeddings_and_ids(embeddings_path, ids_path):
    embeddings = np.load(embeddings_path)
    ids = np.load(ids_path).astype(str)
    return embeddings, ids

datasets = [
    # {
    #     "name": "SlideVQA",
    #     "query_embeddings_path": "embeddings/SlideVQA_queries_with_instruction_embeddings.npy",
    #     "query_ids_path": "embeddings/SlideVQA_queries_query_ids.npy",
    #     "corpus_embeddings_path": "embeddings/SlideVQA_corpus_embeddings.npy",
    #     "corpus_ids_path": "embeddings/SlideVQA_corpus_corpus_ids.npy",
    #     "qrels_path": "dataset/VisRAG-Ret-Test-SlideVQA/qrels/slidevqa-eval-qrels.tsv"
    # },
    # {
    #     "name": "MP_DocVQA",
    #     "query_embeddings_path": "embeddings/MP_DocVQA_queries_with_instruction_embeddings.npy",
    #     "query_ids_path": "embeddings/MP_DocVQA_queries_query_ids.npy",
    #     "corpus_embeddings_path": "embeddings/MP_DocVQA_corpus_embeddings.npy",
    #     "corpus_ids_path": "embeddings/MP_DocVQA_corpus_corpus_ids.npy",
    #     "qrels_path": "dataset/VisRAG-Ret-Test-MP-DocVQA/qrels/docvqa_mp-eval-qrels.tsv"
    # },
    # {
    #     "name": "ArxivQA",
    #     "query_embeddings_path": "embeddings/ArxivQA_queries_with_instruction_embeddings.npy",
    #     "query_ids_path": "embeddings/ArxivQA_queries_query_ids.npy",
    #     "corpus_embeddings_path": "embeddings/ArxivQA_corpus_embeddings.npy",
    #     "corpus_ids_path": "embeddings/ArxivQA_corpus_corpus_ids.npy",
    #     "qrels_path": "dataset/VisRAG-Ret-Test-ArxivQA/qrels/arxivqa-eval-qrels.tsv"
    # },
    # {
    #     "name": "ChartQA",
    #     "query_embeddings_path": "embeddings/ChartQA_queries_with_instruction_embeddings.npy",
    #     "query_ids_path": "embeddings/ChartQA_queries_query_ids.npy",
    #     "corpus_embeddings_path": "embeddings/ChartQA_corpus_summary_embeddings.npy",
    #     "corpus_ids_path": "embeddings/ChartQA_corpus_corpus_ids.npy",
    #     "qrels_path": "dataset/VisRAG-Ret-Test-ChartQA/qrels/chartqa-eval-qrels.tsv"
    # },
    # {
    #     "name": "InfoVQA",
    #     "query_embeddings_path": "embeddings/InfoVQA_queries_with_instruction_embeddings.npy",
    #     "query_ids_path": "embeddings/InfoVQA_queries_query_ids.npy",
    #     "corpus_embeddings_path": "embeddings/InfoVQA_corpus_embeddings.npy",
    #     "corpus_ids_path": "embeddings/InfoVQA_corpus_corpus_ids.npy",
    #     "qrels_path": "dataset/VisRAG-Ret-Test-InfoVQA/qrels/infographicsvqa-eval-qrels.tsv"
    # },
    {
        "name": "PlotQA",
        "query_embeddings_path": "embeddings/PlotQA_queries_with_instruction_embeddings.npy",
        "query_ids_path": "embeddings/PlotQA_queries_query_ids.npy",
        "corpus_embeddings_path": "embeddings/PlotQA_corpus_embeddings.npy",
        "corpus_ids_path": "embeddings/PlotQA_corpus_corpus_ids.npy",
        "qrels_path": "dataset/VisRAG-Ret-Test-PlotQA/qrels/plotqa-eval-qrels.tsv"
    }
]

# 循环评估每个数据集
for dataset in datasets:
    logger.info(f"Evaluating {dataset['name']} dataset")
    query_embeddings, query_ids = load_embeddings_and_ids(dataset["query_embeddings_path"], dataset["query_ids_path"])
    corpus_embeddings, corpus_ids = load_embeddings_and_ids(dataset["corpus_embeddings_path"], dataset["corpus_ids_path"])
    
    # embeded_info_json = json.load(open('encoded_PlotQA_image_keywords.json', 'r'))
    # INSTRUCTION = 'Represent these key words extracted from image: '
    # keyword_embeddings_list = []
    # keyword_embeddings_list = [embeded_info_json[id]['keywords_vector'] for id in tqdm(list(corpus_ids))]
    # keyword_embeddings = np.array(keyword_embeddings_list)
    # np.save('embeddings/PlotQA_keyword_embeddings_with_instruction.npy', keyword_embeddings)
    
    keyword_embeddings = np.load('embeddings/PlotQA_keyword_embeddings_with_instruction.npy')
    
    
    qrels = load_beir_qrels(dataset["qrels_path"])
    retrieve_and_evaluate(query_embeddings, query_ids, corpus_embeddings, keyword_embeddings, corpus_ids, qrels)



12/30/2024 13:53:56 - INFO - __main__ -   Evaluating PlotQA dataset
Weight Iteration: 100%|██████████| 10/10 [10:38<00:00, 63.88s/it]
