## For reference, take help of Haystack Tutorial

- https://github.com/deepset-ai/haystack/blob/master/tutorials/Tutorial5_Evaluation.ipynb

In [None]:
!pip install git+https://github.com/deepset-ai/haystack.git
!pip install urllib3==1.25.4
!pip install torch==1.6.0+cu101 torchvision==0.6.1+cu101 -f https://download.pytorch.org/whl/torch_stable.html

In [None]:
! wget https://artifacts.elastic.co/downloads/elasticsearch/elasticsearch-7.9.2-linux-x86_64.tar.gz -q
! tar -xzf elasticsearch-7.9.2-linux-x86_64.tar.gz
! chown -R daemon:daemon elasticsearch-7.9.2

import os
from subprocess import Popen, PIPE, STDOUT
es_server = Popen(['elasticsearch-7.9.2/bin/elasticsearch'],
                   stdout=PIPE, stderr=STDOUT,
                   preexec_fn=lambda: os.setuid(1)  # as daemon
                  )
# wait until ES has started
! sleep 30

In [None]:
from haystack.preprocessor.cleaning import clean_wiki_text
from haystack.preprocessor.utils import convert_files_to_dicts, fetch_archive_from_http

In [None]:
from farm.utils import initialize_device_settings

device, n_gpu = initialize_device_settings(use_cuda=True)

In [None]:
# Connect to Elasticsearch
from haystack.document_store.elasticsearch import ElasticsearchDocumentStore

# Connect to Elasticsearch
document_store = ElasticsearchDocumentStore(host="localhost", username="", password="", index="document",
                                            create_index=False, embedding_field="emb",
                                            embedding_dim=768, excluded_meta_data=["emb"])

In [None]:
# make sure these indices do not collide with existing ones, the indices will be wiped clean before data is inserted
doc_index = "tutorial5_docs"
label_index = "tutorial5_labels"

In [None]:
# Add evaluation data to Elasticsearch database
# We first delete the custom tutorial indices to not have duplicate elements
# Mount the google drive where you evaluation 
document_store.delete_all_documents(index=doc_index)
document_store.delete_all_documents(index=label_index)

#For overall evaluation (LONG+SHORT)
document_store.add_eval_data(filename="/content/drive/MyDrive/eval_with_title.json", doc_index=doc_index, label_index=label_index)

#For Question with Long Answer evaluation
#document_store.add_eval_data(filename="/content/drive/MyDrive/eval_with_title_long.json", doc_index=doc_index, label_index=label_index)

#For Question with Short Answer evaluation
#document_store.add_eval_data(filename="/content/drive/MyDrive/eval_with_title_short.json", doc_index=doc_index, label_index=label_index)

#It is recommended to have only 'one' document for a particular evaluation.

In [None]:
document_store_dpr = document_store

In [None]:
from haystack.retriever.dense import DensePassageRetriever,EmbeddingRetriever
from haystack.retriever.sparse import ElasticsearchRetriever, TfidfRetriever

In [None]:
dp_retriever = DensePassageRetriever(document_store=document_store_dpr,
                                  query_embedding_model="facebook/dpr-question_encoder-single-nq-base",
                                  passage_embedding_model="facebook/dpr-ctx_encoder-single-nq-base",
                                  use_gpu=True,
                                  embed_title=True,
                                  batch_size=16)
document_store_dpr.update_embeddings(dp_retriever, index=doc_index)

In [None]:
es_retriever = ElasticsearchRetriever(document_store=document_store)

In [None]:
embedded_retriever_farm = EmbeddingRetriever(document_store=document_store,
                               embedding_model="deepset/sentence_bert",
                               model_format="farm")
embedded_retriever_trans = EmbeddingRetriever(document_store=document_store,
                               embedding_model="deepset/sentence_bert",
                               model_format="transformers")

In [None]:
# Initialize Reader
from haystack.reader.farm import FARMReader

In [None]:
custom_farm_model_minilm = FARMReader(model_name_or_path="/content/drive/MyDrive/my_model_lm_mini", top_k_per_candidate=4,use_gpu=True)

In [None]:
# Initialize Finder which sticks together Reader and Retriever
from haystack.finder import Finder

#finder = Finder(reader, retriever)

## Evaluation of Reader

In [None]:
#Function to get evaluation result for a Retriever
def get_retriever_eval_results(retriever_obj,top_k_var,label_index_var,doc_index_var):
    retriever_eval_results = retriever_obj.eval(top_k=top_k_var, label_index=label_index_var, doc_index=doc_index_var)
    return retriever_eval_results

In [None]:
reader_eval_results_minilm = custom_farm_model_minilm.eval(document_store=document_store, device=device, label_index=label_index, doc_index=doc_index)
## Reader Top-N-Accuracy is the proportion of predicted answers that match with their corresponding correct answer
print("Reader Top-N-Accuracy:", reader_eval_results_minilm["top_n_accuracy"])
## Reader Exact Match is the proportion of questions where the predicted answer is exactly the same as the correct answer
print("Reader Exact Match:", reader_eval_results_minilm["EM"])
## Reader F1-Score is the average overlap between the predicted answers and the correct answers
print("Reader F1-Score:", reader_eval_results_minilm["f1"])

## Evaluation of Retriever.

- We are evaluating 4 kinds of Retriever on our Evaluation Data.

In [None]:
#Function to get evaluation result for a Reader
def get_reader_eval_results(reader_obj,document_store_obj,device_var,label_index_var,doc_index_var):
    reader_eval_results = reader_obj.eval(document_store=document_store_obj, device=device_var, label_index=label_index_var, doc_index=label_index_var)
    return reader_eval_results

In [None]:
#Evaluation for ElasticSearch Retriever 
get_retriever_eval_results(es_retriever,5,label_index,doc_index)

In [None]:
#Evaluation for Dense Passage Retriever 
get_retriever_eval_results(dp_retriever,5,label_index,doc_index)

In [None]:
#Evaluation for Embedded Retriever (FARM)
get_retriever_eval_results(embedded_retriever_farm,5,label_index,doc_index)

In [None]:
#Evaluation for Embedded Retriever (Transformer)
get_retriever_eval_results(embedded_retriever_trans,5,label_index,doc_index)

## Evaluation of Finder (Reader+Retriever)

In [None]:
finder00 = Finder(custom_farm_model_minilm, es_retriever)
finder01 = Finder(custom_farm_model_minilm, dp_retriever)

In [None]:
def create_finder_obj(reader_obj,retriever_obj):
    return Finder(reader_obj,retriever_obj)

In [None]:
def calc_finder_eval(finder_obj):
    finder_eval_results = finder_obj.eval(top_k_retriever=10, top_k_reader=5, label_index=label_index, doc_index=doc_index)
    finder_obj.print_eval_results(finder_eval_results)
    return finder_eval_results

In [None]:
#Evaluation of minilm +  ElasticSearch Retriever with top-k-retriver = 10 and top-k-reader = 5
finder_eval_results001 = finder00.eval(top_k_retriever=10, top_k_reader=5, label_index=label_index, doc_index=doc_index)
finder00.print_eval_results(finder_eval_results001)

In [None]:
#Evaluation of minilm +  ElasticSearch Retriever with top-k-retriver = 10 and top-k-reader = 3
finder_eval_results002 = finder00.eval(top_k_retriever=10, top_k_reader=3, label_index=label_index, doc_index=doc_index)
finder00.print_eval_results(finder_eval_results002)

In [None]:
#Evaluation of minilm +  ElasticSearch Retriever with top-k-retriver = 5 and top-k-reader = 3
finder_eval_results003 = finder00.eval(top_k_retriever=5, top_k_reader=3, label_index=label_index, doc_index=doc_index)
finder00.print_eval_results(finder_eval_results003)

In [None]:
#Evaluation of minilm +  Dense Passage Retriever with top-k-retriver = 10 and top-k-reader = 5
finder_eval_results011 = finder01.eval(top_k_retriever=10, top_k_reader=5, label_index=label_index, doc_index=doc_index)
finder01.print_eval_results(finder_eval_results011)

In [None]:
#Evaluation of minilm +  Dense Passage Retriever with top-k-retriver = 10 and top-k-reader = 3
finder_eval_results012 = finder01.eval(top_k_retriever=10, top_k_reader=3, label_index=label_index, doc_index=doc_index)
finder01.print_eval_results(finder_eval_results012)

In [None]:
#Evaluation of minilm +  Dense Passage Retriever with top-k-retriver = 5 and top-k-reader = 3
finder_eval_results013 = finder01.eval(top_k_retriever=5, top_k_reader=3, label_index=label_index, doc_index=doc_index)
finder01.print_eval_results(finder_eval_results013)