In [2]:
import pandas as pd
from tqdm import tqdm
import re
from ba_data_extraction.banking_act_preprocessor import BankingActPreprocessor
from mas_data_extraction.mas_preprocessor import MASPreprocessor
from vectordb_retriever import VectorDbRetriever, GraphDbRetriever, Reranker
from weighted_vectordb_retriever import WeightedGraphDbRetriever, WeightedReranker
from dfs_vectordb_retriever import DFSRetriever
from classify_query import QueryClassifierAgent
from summary_and_output import SummaryAgent, OutputAgent
from source_router import SourceRouterAgent
from llm_as_judge import LLMAsJudge
from sentence_transformers import SentenceTransformer, util

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
pd.set_option('display.max_columns', None)

In [62]:
ba_preprocessor = BankingActPreprocessor()
ba_chunks = ba_preprocessor.get_chunks()
ba_dict = {}
for chunk in ba_chunks:
    ba_dict[chunk['id']] = chunk['text']

mas_preprocessor = MASPreprocessor()
mas_chunks = mas_preprocessor.get_chunks()
mas_dict = {}
for chunk in mas_chunks:
    mas_dict[chunk['id']] = chunk['text']

combined_dict = {**ba_dict, **mas_dict}

def get_text(id, dict=combined_dict):
    return dict[id]

model = SentenceTransformer('all-MiniLM-L6-v2')
def get_similarity_score(chunk1, chunk2):
    embeddings = model.encode([chunk1, chunk2])
    score = util.cos_sim(embeddings[0], embeddings[1]).item()
    return score

def get_ranking_score(retrieved):
    
    # Initialize total penalty score
    correct = sorted(retrieved, reverse=True)

    retrieved = [correct.index(i) for i in retrieved]
    correct = [correct.index(i) for i in correct]

    total_penalty = 0
    max_penalty = 0
    n = len(correct)
    
    # Compare each pair of chunks in the retrieved list
    for i in range(n):
        for j in range(i + 1, n):
            # Check if there is an inversion (higher value chunk before lower value chunk)
            if correct.index(retrieved[i]) > correct.index(retrieved[j]):
                # Calculate the penalty based on the difference in chunk values
                penalty = abs(retrieved[i] - retrieved[j])
                total_penalty += penalty  # Add penalty to total

    for i in range(n):
        for j in range(i + 1, n):
            # Maximum possible penalty when elements are completely reversed
            max_penalty += abs(correct[i] - correct[j])
        
    # Normalize the penalty score to be between 0 and 1
    normalized_penalty = total_penalty / max_penalty
    
    # Return the normalized penalty (1 is the worst, 0 is the best)
    return min(1, normalized_penalty)

def get_retriever_score(reference_ids, top_k_chunks):
    # Calculate position score
    position_scores = [] 
    # Using semantic similarity score 
    max_scores = []
    rank_scores = []
    
    # 'reference_retrieval':['ba-1970-2.1', 'ba-1970-4.2']
    for ref_id in reference_ids:
        # Get position_scores 
        if ref_id in top_k_chunks:
            position = top_k_chunks.index(ref_id) 
        else:
            position = len(top_k_chunks)
        position_score = (len(top_k_chunks) - position) / len(top_k_chunks)
        position_scores.append(position_score)

        # Get semantic similarity score between the reference chunk and the retrieved chunk
        retrieved_chunks_score = [get_similarity_score(get_text(ref_id), get_text(chunk_id)) for chunk_id in top_k_chunks]
        print('Retrieved chunks similarity score:', retrieved_chunks_score)

        # Store max similarity score (range -1 to 1)
        max_score = max(retrieved_chunks_score)
        max_scores.append(max_score)
        
        # Evaluate the ranking/order  
        rank_score = get_ranking_score(retrieved_chunks_score)
        rank_scores.append(rank_score)

    print('Position scores:', position_scores)
    print('Max scores:', max_scores)
    print('Rank scores:', rank_scores)

    agg_position_score = max(position_scores)
    agg_max_score = max(max_scores)
    agg_rank_score = max(rank_scores)

    return agg_position_score, agg_max_score, agg_rank_score, position_scores, max_scores, rank_scores

def get_evaluation(query, correct_source, correct_type, reference_retrieval, reference_generation, n_hop, retriever_method):  
    user_query = query
    
    output = {}
    output['query'] = query
    print('Query:', query)

    # Determine data source 
    output['correct_source'] = correct_source
    router = SourceRouterAgent()
    data_source = router.get_source(user_query)
    print("Raw Data Source:", data_source) 

    # Cleaning data source
    if 'ba' in data_source.lower():
        data_source = 'ba'
    elif 'mas' in data_source.lower():
        data_source = 'mas'
    else:
        output['source'] = data_source
        output['is_source_correct'] = 0
        return output 

    output['source'] = data_source
    if data_source == correct_source:
        output['is_source_correct'] = 1
    else: 
        output['is_source_correct'] = 0

    # Classify the query
    output['correct_type'] = correct_type
    classifier_agent = QueryClassifierAgent()
    classification = classifier_agent.classify_query(user_query)
    print("Raw Classification:", classification)

    if 'factual' in classification.lower():
        classification = 'factual'
    elif 'reasoning' in classification.lower():
        classification = 'reasoning'
    else: 
        # assign misclassification as factual for simplicity
        classification = 'factual'
    
    output['type'] = classification
    if classification == correct_type: 
        output['is_type_correct'] = 1
    else: 
        output['is_type_correct'] = 0 
        
    # Retrieve top-k chunks from VectorDB 
    vectorretriever = VectorDbRetriever(top_k=10)
    top_k_chunks = vectorretriever.get_top_k_chunks(user_query, data_source)
    print("VectorDB Chunks:", top_k_chunks)
    agg_position_score, agg_max_score, agg_rank_score, position_scores, max_scores, rank_scores = get_retriever_score(reference_retrieval, top_k_chunks)
    
    output['reference_retrieval'] = reference_retrieval
    output['generated_retrieval'] = top_k_chunks
    output['retrieval_position_scores'] = position_scores
    output['retrieval_max_position_score'] = agg_position_score
    output['retrieval_best_similarity_scores'] = max_scores
    output['retrieval_max_similarity_score'] = agg_max_score
    output['retrieval_rank_similarity_scores'] = rank_scores
    output['retrieval_max_rank_similarity_score'] = agg_rank_score
    
    # GraphDB Retriever 
    output['graphdb_retrieval_method'] = retriever_method
    output['n_hops'] = n_hop
    if retriever_method == 'bfs':
        graphretriever = GraphDbRetriever(hops=n_hop)
        appended_chunks = graphretriever.get_appended_chunks(top_k_chunks, data_source)
        reranker = Reranker(top_k=5)
    elif retriever_method == 'weighted':
        graphretriever = WeightedGraphDbRetriever(hops=n_hop)
        appended_chunks = graphretriever.get_appended_chunks(top_k_chunks, data_source)
        reranker = WeightedReranker(top_k=5)
    elif retriever_method == 'dfs':
        graphretriever = DFSRetriever(path_length=n_hop)
        appended_chunks = graphretriever.run_DFS(user_query, top_k_chunks, data_source)
        reranker = Reranker(top_k=5)

    # Rerank top_k appended chunks 
    reranked_chunks = reranker.rerank(user_query, appended_chunks)
    print(reranked_chunks)
    print('Length Reranked chunks:', len(reranked_chunks))

    # Extrack reranked chunk ids
    reranked_ids = []
    for chunk in reranked_chunks:
        if retriever_method == 'weighted':
            id_pattern = re.search(r'^ID:\s(.*?)(?=\n)', chunk)
            chunk_id = id_pattern.group(1) if id_pattern else ''
            if chunk_id != '':
                reranked_ids.append(chunk_id)
        else:
            id_pattern = re.match(r"\(([^)]+)\)", chunk)
            chunk_id = id_pattern.group(1) if id_pattern else ''
            if chunk_id != '':
                reranked_ids.append(chunk_id)

    print('Reranker ids:', reranked_ids)
    
    agg_position_score, agg_max_score, agg_rank_score, position_scores, max_scores, rank_scores = get_retriever_score(reference_retrieval, reranked_ids)
    output['reranker_generated_retrieval'] = reranked_ids
    output['reranker_position_scores'] = position_scores
    output['reranker_max_position_score'] = agg_position_score
    output['reranker_best_similarity_scores'] = max_scores
    output['reranker_max_similarity_score'] = agg_max_score
    output['reranker_rank_similarity_scores'] = rank_scores
    output['reranker_max_rank_similarity_score'] = agg_rank_score
    
    # Summary reranked appended chunks 
    summary_agent = SummaryAgent()
    judge = LLMAsJudge()
    summarized_chunks = []
    summary_scores = []
    for chunk in reranked_chunks:
        id_pattern = re.match(r"\(([^)]+)\)", chunk)

        if id_pattern:
            chunk_id = id_pattern.group(1) 
            if data_source == 'ba':
                chunk_id = chunk_id.replace('ba', 'Banking Act')
            elif data_source == 'mas':
                chunk_id = chunk_id.replace('mas', 'MAS')

            before, sep, after = chunk_id.rpartition('-')
            chunk_id = before + ', ' + after
            chunk_id = chunk_id.replace('-', ' ').title()
        else: 
            chunk_id = ''    

        summary = summary_agent.summarize_text(chunk, user_query)
        summary = summary.replace('\n', ' ')
        summarized_chunks.append(f'{chunk_id}: {summary}')

        # Evaluate summary using LLM As Judge
        summary_score = judge.judge_summary(user_query, chunk, summary)
        summary_score = re.findall(r'\d+', summary_score)
        summary_score = int(summary_score[0]) if summary_score else 0
        summary_scores.append(summary_score)
    final_chunks = '\n\n'.join(summarized_chunks)
    output['summary_score'] = sum(summary_scores)/len(summary_scores)

    # Get final answer
    output_agent = OutputAgent()
    final_answer = output_agent.output_response(data_source, classification, user_query, summarized_chunks=final_chunks)
    output['reference_answer'] = reference_generation
    output['generated_answer'] = final_answer

    # Evaluate output agent with LLM As Judge 
    answer_score = judge.judge_answer(user_query, reference_generation, final_answer)
    answer_score = re.findall(r'\d+', answer_score)
    answer_score = int(answer_score[0]) if answer_score else 0
    output['answer_score'] = answer_score
    print('Answer score:', answer_score)

    print('---------------------------------------------------\n')
    return output

In [5]:
df_queries = pd.read_csv('victoria_queries.csv')
df_queries

Unnamed: 0,query,correct_source,correct_type,reference_retrieval,reference_generation
0,What is the minimum paid-up capital required f...,ba,factual,ba-1970-9.1,A bank incorporated in Singapore must have a m...
1,What is the minimum capital requirement for a ...,ba,factual,ba-1970-9.1,A foreign bank must have head office capital f...
2,What are the penalties for individuals who ill...,ba,factual,ba-1970-4a.4,For an individual who contravenes these provis...
3,What are the restrictions on advertising credi...,ba,factual,"ba-1970-57.1, ba-1970-57.2, ba-1970-57.3, ba-1...","In Singapore, any advertisement offering or in..."
4,Is a bank allowed to appeal a licence revocati...,ba,factual,ba-1970-20.4,"Yes, a bank whose licence has been revoked has..."
5,Our bank has suffered significant losses in a ...,ba,reasoning,"ba-1970-9.3a, ba-1970-58.8",When a bank’s losses reduce its capital funds ...
6,A bank promotes its credit card services by ru...,ba,reasoning,"ba-1970-57.1, ba-1970-57.2, ba-1970-57.3, ba-1...","Under the regulations, only a licensed entity ..."
7,Imagine a bank that has repeatedly delayed the...,ba,reasoning,"ba-1970-25.2, ba-1970-20.5",The failure to timely publish audited financia...
8,A bank plans to repurchase its shares to reduc...,ba,reasoning,"ba-1970-9.3, ba-1970-25.1",The bank must not reduce its paid-up capital o...
9,Consider a scenario where a bank repeatedly en...,ba,reasoning,"ba-1970-57.3, ba-1970-57.4, ba-1970-57.5, ba-1...",Unauthorized advertising for credit card servi...


In [None]:
# Get result for BFS first 
bfs_queries_evaluation = []
bfs_retriever_method = 'bfs'
for id, row in df_queries.iterrows():
    for n_hop in [1, 3, 5]:
        query = row['query']
        correct_source = row['correct_source']
        correct_type = row['correct_type']
        reference_retrieval = [row['reference_retrieval']] if ',' not in row['reference_retrieval'] else row['reference_retrieval'].split(', ')
        reference_generation = row['reference_generation']
        evaluation = get_evaluation(query, correct_source, correct_type, reference_retrieval, reference_generation, n_hop, bfs_retriever_method)
        bfs_queries_evaluation.append(evaluation)

df_bfs = pd.DataFrame(bfs_queries_evaluation)

Raw Data Source: ba
Raw Classification: Factual
VectorDB Chunks: ['ba-1970-9.1', 'ba-1970-9.2', 'ba-1970-9.3a', 'ba-1970-9.2a', 'ba-1970-55t.6', 'ba-1970-10.1', 'ba-1970-55t.2', 'ba-1970-55t.1', 'ba-1970-9a.3a', 'ba-1970-40.1']
Retrieved chunks similarity score: [0.9999998807907104, 0.8203828930854797, 0.7704546451568604, 0.7668531537055969, 0.75835782289505, 0.7692022919654846, 0.7747294902801514, 0.8659762740135193, 0.7862464785575867, 0.7480430006980896]
Position scores: [1.0]
Max scores: [0.9999998807907104]
Rank scores: [0.32727272727272727]


Reranking chunks: 100%|██████████| 10/10 [00:11<00:00,  1.14s/it]


Retrieved chunks similarity score: [0.9999998807907104, 0.7668531537055969, 0.8203828930854797, 0.7704546451568604, 0.7692022919654846]
Position scores: [1.0]
Max scores: [0.9999998807907104]
Rank scores: [0.3]
Raw Data Source: ba
Raw Classification: Factual
VectorDB Chunks: ['ba-1970-9.1', 'ba-1970-9.2', 'ba-1970-9.3a', 'ba-1970-9.2a', 'ba-1970-55t.6', 'ba-1970-10.1', 'ba-1970-55t.2', 'ba-1970-55t.1', 'ba-1970-9a.3a', 'ba-1970-40.1']
Retrieved chunks similarity score: [0.9999998807907104, 0.8203828930854797, 0.7704546451568604, 0.7668531537055969, 0.75835782289505, 0.7692022919654846, 0.7747294902801514, 0.8659762740135193, 0.7862464785575867, 0.7480430006980896]
Position scores: [1.0]
Max scores: [0.9999998807907104]
Rank scores: [0.32727272727272727]


Reranking chunks: 100%|██████████| 10/10 [00:06<00:00,  1.51it/s]


Retrieved chunks similarity score: [0.7668531537055969, 0.7692022919654846, 0.7704546451568604, 0.9999998807907104, 0.8203828930854797]
Position scores: [0.4]
Max scores: [0.9999998807907104]
Rank scores: [0.95]
Raw Data Source: ba
Raw Classification: Factual
VectorDB Chunks: ['ba-1970-9.1', 'ba-1970-9.2', 'ba-1970-9.3a', 'ba-1970-9.2a', 'ba-1970-55t.6', 'ba-1970-10.1', 'ba-1970-55t.2', 'ba-1970-55t.1', 'ba-1970-9a.3a', 'ba-1970-40.1']
Retrieved chunks similarity score: [0.9999998807907104, 0.8203828930854797, 0.7704546451568604, 0.7668531537055969, 0.75835782289505, 0.7692022919654846, 0.7747294902801514, 0.8659762740135193, 0.7862464785575867, 0.7480430006980896]
Position scores: [1.0]
Max scores: [0.9999998807907104]
Rank scores: [0.32727272727272727]


Reranking chunks: 100%|██████████| 10/10 [00:07<00:00,  1.35it/s]


Retrieved chunks similarity score: [0.7668531537055969, 0.7692022919654846, 0.7704546451568604, 0.9999998807907104, 0.8203828930854797]
Position scores: [0.4]
Max scores: [0.9999998807907104]
Rank scores: [0.95]
Raw Data Source: ba
Raw Classification: Factual
VectorDB Chunks: ['ba-1970-9.1', 'ba-1970-55t.1', 'ba-1970-12.3', 'ba-1970-9.3a', 'ba-1970-40.1', 'ba-1970-38.1', 'ba-1970-10.1', 'ba-1970-31.1a', 'ba-1970-10.2', 'ba-1970-9a.1']
Retrieved chunks similarity score: [0.9999998807907104, 0.8659762740135193, 0.7440372109413147, 0.7704546451568604, 0.7480430006980896, 0.6859292984008789, 0.7692022919654846, 0.8021577596664429, 0.7345101833343506, 0.8171002864837646]
Position scores: [1.0]
Max scores: [0.9999998807907104]
Rank scores: [0.3393939393939394]


Reranking chunks: 100%|██████████| 10/10 [00:06<00:00,  1.65it/s]


Retrieved chunks similarity score: [0.9999998807907104, 0.7692022919654846, 0.8021577596664429, 0.7345101833343506, 0.8171002864837646]
Position scores: [1.0]
Max scores: [0.9999998807907104]
Rank scores: [0.35]
Raw Data Source: ba
Raw Classification: Factual
VectorDB Chunks: ['ba-1970-9.1', 'ba-1970-55t.1', 'ba-1970-12.3', 'ba-1970-9.3a', 'ba-1970-40.1', 'ba-1970-38.1', 'ba-1970-10.1', 'ba-1970-31.1a', 'ba-1970-10.2', 'ba-1970-9a.1']
Retrieved chunks similarity score: [0.9999998807907104, 0.8659762740135193, 0.7440372109413147, 0.7704546451568604, 0.7480430006980896, 0.6859292984008789, 0.7692022919654846, 0.8021577596664429, 0.7345101833343506, 0.8171002864837646]
Position scores: [1.0]
Max scores: [0.9999998807907104]
Rank scores: [0.3393939393939394]


Reranking chunks: 100%|██████████| 10/10 [00:07<00:00,  1.36it/s]


Retrieved chunks similarity score: [0.9999998807907104, 0.7704546451568604, 0.7692022919654846, 0.8021577596664429, 0.7440372109413147]
Position scores: [1.0]
Max scores: [0.9999998807907104]
Rank scores: [0.15]
Raw Data Source: ba
Raw Classification: Factual
VectorDB Chunks: ['ba-1970-9.1', 'ba-1970-55t.1', 'ba-1970-12.3', 'ba-1970-9.3a', 'ba-1970-40.1', 'ba-1970-38.1', 'ba-1970-10.1', 'ba-1970-31.1a', 'ba-1970-10.2', 'ba-1970-9a.1']
Retrieved chunks similarity score: [0.9999998807907104, 0.8659762740135193, 0.7440372109413147, 0.7704546451568604, 0.7480430006980896, 0.6859292984008789, 0.7692022919654846, 0.8021577596664429, 0.7345101833343506, 0.8171002864837646]
Position scores: [1.0]
Max scores: [0.9999998807907104]
Rank scores: [0.3393939393939394]


Reranking chunks: 100%|██████████| 10/10 [00:11<00:00,  1.18s/it]


Retrieved chunks similarity score: [0.9999998807907104, 0.7704546451568604, 0.7692022919654846, 0.8021577596664429, 0.7440372109413147]
Position scores: [1.0]
Max scores: [0.9999998807907104]
Rank scores: [0.15]
Raw Data Source: ba
Raw Classification: Factual
VectorDB Chunks: ['ba-1970-53a.9', 'ba-1970-53a.8', 'ba-1970-19.3', 'ba-1970-29a.6', 'ba-1970-12.7', 'ba-1970-25.5', 'ba-1970-9.5a', 'ba-1970-26.8', 'ba-1970-39a.8', 'ba-1970-71']
Retrieved chunks similarity score: [0.6360379457473755, 0.7495587468147278, 0.7880329489707947, 0.7216784954071045, 0.7694778442382812, 0.708010733127594, 0.7088097929954529, 0.7915149927139282, 0.6949601173400879, 0.697676956653595]
Position scores: [0.0]
Max scores: [0.7915149927139282]
Rank scores: [0.44242424242424244]


Reranking chunks: 100%|██████████| 10/10 [00:04<00:00,  2.03it/s]


Retrieved chunks similarity score: [0.7495587468147278, 0.6949601173400879, 0.7216784954071045, 0.6360379457473755, 0.7915149927139282]
Position scores: [0.0]
Max scores: [0.7915149927139282]
Rank scores: [0.55]
Raw Data Source: ba
Raw Classification: Factual
VectorDB Chunks: ['ba-1970-53a.9', 'ba-1970-53a.8', 'ba-1970-19.3', 'ba-1970-29a.6', 'ba-1970-12.7', 'ba-1970-25.5', 'ba-1970-9.5a', 'ba-1970-26.8', 'ba-1970-39a.8', 'ba-1970-71']
Retrieved chunks similarity score: [0.6360379457473755, 0.7495587468147278, 0.7880329489707947, 0.7216784954071045, 0.7694778442382812, 0.708010733127594, 0.7088097929954529, 0.7915149927139282, 0.6949601173400879, 0.697676956653595]
Position scores: [0.0]
Max scores: [0.7915149927139282]
Rank scores: [0.44242424242424244]


Reranking chunks: 100%|██████████| 10/10 [00:05<00:00,  1.72it/s]


Retrieved chunks similarity score: [0.7216784954071045, 0.6949601173400879, 0.7495587468147278, 0.7694778442382812, 0.7088097929954529]
Position scores: [0.0]
Max scores: [0.7694778442382812]
Rank scores: [0.6]
Raw Data Source: ba
Raw Classification: Factual
VectorDB Chunks: ['ba-1970-53a.9', 'ba-1970-53a.8', 'ba-1970-19.3', 'ba-1970-29a.6', 'ba-1970-12.7', 'ba-1970-25.5', 'ba-1970-9.5a', 'ba-1970-26.8', 'ba-1970-39a.8', 'ba-1970-71']
Retrieved chunks similarity score: [0.6360379457473755, 0.7495587468147278, 0.7880329489707947, 0.7216784954071045, 0.7694778442382812, 0.708010733127594, 0.7088097929954529, 0.7915149927139282, 0.6949601173400879, 0.697676956653595]
Position scores: [0.0]
Max scores: [0.7915149927139282]
Rank scores: [0.44242424242424244]


Reranking chunks: 100%|██████████| 10/10 [00:05<00:00,  1.89it/s]


Retrieved chunks similarity score: [0.7216784954071045, 0.6949601173400879, 0.7495587468147278, 0.7694778442382812, 0.7088097929954529]
Position scores: [0.0]
Max scores: [0.7694778442382812]
Rank scores: [0.6]
Raw Data Source: ba
Raw Classification: Factual
VectorDB Chunks: ['ba-1970-57.3', 'ba-1970-57.2', 'ba-1970-57.1', 'ba-1970-57a.1', 'ba-1970-56', 'ba-1970-57.4', 'ba-1970-55u.1', 'ba-1970-4a.2', 'ba-1970-57eb.1', 'ba-1970-29.3']
Retrieved chunks similarity score: [0.9257653951644897, 0.944344699382782, 1.0, 0.8453415632247925, 0.6562513709068298, 0.3507389724254608, 0.6262465119361877, 0.6328372359275818, 0.6587145328521729, 0.6097064018249512]
Retrieved chunks similarity score: [0.9266220927238464, 1.0, 0.944344699382782, 0.8002603054046631, 0.6180703639984131, 0.3429679274559021, 0.6466114521026611, 0.6394248604774475, 0.6653096675872803, 0.6695326566696167]
Retrieved chunks similarity score: [1.0000001192092896, 0.9266220927238464, 0.9257653951644897, 0.7861167788505554, 0.656

Reranking chunks: 100%|██████████| 10/10 [00:04<00:00,  2.09it/s]


Retrieved chunks similarity score: [0.3507389724254608, 1.0, 0.944344699382782, 0.8453415632247925, 0.9257653951644897]
Retrieved chunks similarity score: [0.3429679274559021, 0.944344699382782, 1.0, 0.8002603054046631, 0.9266220927238464]
Retrieved chunks similarity score: [0.4267919659614563, 0.9257653951644897, 0.9266220927238464, 0.7861167788505554, 1.0000001192092896]
Retrieved chunks similarity score: [0.9999999403953552, 0.3507389724254608, 0.3429679274559021, 0.42473557591438293, 0.4267919659614563]
Retrieved chunks similarity score: [0.43780088424682617, 0.4584461450576782, 0.4684891402721405, 0.44004711508750916, 0.5350202918052673]
Retrieved chunks similarity score: [0.5777889490127563, 0.5072958469390869, 0.5241296291351318, 0.5120460391044617, 0.6020056009292603]
Retrieved chunks similarity score: [0.1479547768831253, 0.18320052325725555, 0.17905430495738983, 0.20667056739330292, 0.12315110117197037]
Position scores: [0.8, 0.6, 0.2, 1.0, 0.0, 0.0, 0.0]
Max scores: [1.0, 1.

Reranking chunks: 100%|██████████| 10/10 [00:10<00:00,  1.03s/it]


Retrieved chunks similarity score: [1.0, 0.944344699382782, 0.9257653951644897, 0.8453415632247925, 0.3507389724254608]
Retrieved chunks similarity score: [0.944344699382782, 1.0, 0.9266220927238464, 0.8002603054046631, 0.3429679274559021]
Retrieved chunks similarity score: [0.9257653951644897, 0.9266220927238464, 1.0000001192092896, 0.7861167788505554, 0.4267919659614563]
Retrieved chunks similarity score: [0.3507389724254608, 0.3429679274559021, 0.4267919659614563, 0.42473557591438293, 0.9999999403953552]
Retrieved chunks similarity score: [0.4584461450576782, 0.4684891402721405, 0.5350202918052673, 0.44004711508750916, 0.43780088424682617]
Retrieved chunks similarity score: [0.5072958469390869, 0.5241296291351318, 0.6020056009292603, 0.5120460391044617, 0.5777889490127563]
Retrieved chunks similarity score: [0.18320052325725555, 0.17905430495738983, 0.12315110117197037, 0.20667056739330292, 0.1479547768831253]
Position scores: [1.0, 0.8, 0.6, 0.2, 0.0, 0.0, 0.0]
Max scores: [1.0, 1.

Reranking chunks: 100%|██████████| 10/10 [00:05<00:00,  1.75it/s]


Retrieved chunks similarity score: [1.0, 0.944344699382782, 0.9257653951644897, 0.8453415632247925, 0.3507389724254608]
Retrieved chunks similarity score: [0.944344699382782, 1.0, 0.9266220927238464, 0.8002603054046631, 0.3429679274559021]
Retrieved chunks similarity score: [0.9257653951644897, 0.9266220927238464, 1.0000001192092896, 0.7861167788505554, 0.4267919659614563]
Retrieved chunks similarity score: [0.3507389724254608, 0.3429679274559021, 0.4267919659614563, 0.42473557591438293, 0.9999999403953552]
Retrieved chunks similarity score: [0.4584461450576782, 0.4684891402721405, 0.5350202918052673, 0.44004711508750916, 0.43780088424682617]
Retrieved chunks similarity score: [0.5072958469390869, 0.5241296291351318, 0.6020056009292603, 0.5120460391044617, 0.5777889490127563]
Retrieved chunks similarity score: [0.18320052325725555, 0.17905430495738983, 0.12315110117197037, 0.20667056739330292, 0.1479547768831253]
Position scores: [1.0, 0.8, 0.6, 0.2, 0.0, 0.0, 0.0]
Max scores: [1.0, 1.

Reranking chunks: 100%|██████████| 10/10 [00:05<00:00,  1.73it/s]


Retrieved chunks similarity score: [0.6987694501876831, 0.5953507423400879, 0.7817621827125549, 1.0000001192092896, 0.7303391098976135]
Position scores: [0.4]
Max scores: [1.0000001192092896]
Rank scores: [0.8]
Raw Data Source: ba
Raw Classification: Factual
VectorDB Chunks: ['ba-1970-11a', 'ba-1970-20.4', 'ba-1970-55za.6', 'ba-1970-55za.5', 'ba-1970-55za.2', 'ba-1970-20.7', 'ba-1970-7.5', 'ba-1970-20.2', 'ba-1970-20.3', 'ba-1970-20.1']
Retrieved chunks similarity score: [0.6964688897132874, 1.0000001192092896, 0.6987694501876831, 0.7007160782814026, 0.7169982194900513, 0.5953507423400879, 0.7303391098976135, 0.7817621827125549, 0.8143907785415649, 0.683028519153595]
Position scores: [0.9]
Max scores: [1.0000001192092896]
Rank scores: [0.5151515151515151]


Reranking chunks: 100%|██████████| 10/10 [00:06<00:00,  1.54it/s]


Retrieved chunks similarity score: [0.7817621827125549, 1.0000001192092896, 0.5953507423400879, 0.7007160782814026, 0.7303391098976135]
Position scores: [0.8]
Max scores: [1.0000001192092896]
Rank scores: [0.25]
Raw Data Source: ba
Raw Classification: Factual
VectorDB Chunks: ['ba-1970-11a', 'ba-1970-20.4', 'ba-1970-55za.6', 'ba-1970-55za.5', 'ba-1970-55za.2', 'ba-1970-20.7', 'ba-1970-7.5', 'ba-1970-20.2', 'ba-1970-20.3', 'ba-1970-20.1']
Retrieved chunks similarity score: [0.6964688897132874, 1.0000001192092896, 0.6987694501876831, 0.7007160782814026, 0.7169982194900513, 0.5953507423400879, 0.7303391098976135, 0.7817621827125549, 0.8143907785415649, 0.683028519153595]
Position scores: [0.9]
Max scores: [1.0000001192092896]
Rank scores: [0.5151515151515151]


Reranking chunks: 100%|██████████| 10/10 [00:06<00:00,  1.49it/s]


Retrieved chunks similarity score: [0.7817621827125549, 1.0000001192092896, 0.5953507423400879, 0.7007160782814026, 0.7303391098976135]
Position scores: [0.8]
Max scores: [1.0000001192092896]
Rank scores: [0.25]
Raw Data Source: ba
Raw Classification: The question is classified as 'Reasoning'. It requires inference and analysis of the relationships between the bank's financial situation, capital adequacy management strategies, and the auditor's responsibilities.
VectorDB Chunks: ['ba-1970-58.8', 'ba-1970-10.2a', 'ba-1970-9a.3', 'ba-1970-10.3', 'ba-1970-48aa.1', 'ba-1970-48aa.2', 'ba-1970-39a.5', 'ba-1970-10.2', 'ba-1970-58.1', 'ba-1970-46b.4']
Retrieved chunks similarity score: [0.5648605227470398, 0.503929615020752, 0.6651657223701477, 0.7024672627449036, 0.5496634840965271, 0.5882453322410583, 0.45479586720466614, 0.6955890655517578, 0.424751877784729, 0.5163326859474182]
Retrieved chunks similarity score: [1.0, 0.5552387833595276, 0.4099576473236084, 0.6795032620429993, 0.6823636889

Reranking chunks: 100%|██████████| 10/10 [00:05<00:00,  1.89it/s]


Retrieved chunks similarity score: [0.5648605227470398, 0.424751877784729, 0.6955890655517578, 0.7024672627449036, 0.5163326859474182]
Retrieved chunks similarity score: [1.0, 0.6227365136146545, 0.5997809171676636, 0.6795032620429993, 0.7509393095970154]
Position scores: [0.0, 1.0]
Max scores: [0.7024672627449036, 1.0]
Rank scores: [0.6, 0.45]
Raw Data Source: ba
Raw Classification: The question is classified as 'Reasoning' because it requires an analysis of the relationship between the bank's significant losses, capital adequacy management, and the auditor's responsibilities, which involves drawing conclusions and applying logical steps rather than just retrieving facts.
VectorDB Chunks: ['ba-1970-58.8', 'ba-1970-10.2a', 'ba-1970-9a.3', 'ba-1970-10.3', 'ba-1970-48aa.1', 'ba-1970-48aa.2', 'ba-1970-39a.5', 'ba-1970-10.2', 'ba-1970-58.1', 'ba-1970-46b.4']
Retrieved chunks similarity score: [0.5648605227470398, 0.503929615020752, 0.6651657223701477, 0.7024672627449036, 0.5496634840965271

Reranking chunks: 100%|██████████| 10/10 [00:06<00:00,  1.60it/s]


Retrieved chunks similarity score: [0.5648605227470398, 0.5163326859474182, 0.424751877784729, 0.6955890655517578, 0.7024672627449036]
Retrieved chunks similarity score: [1.0, 0.7509393095970154, 0.6227365136146545, 0.5997809171676636, 0.6795032620429993]
Position scores: [0.0, 1.0]
Max scores: [0.7024672627449036, 1.0]
Rank scores: [0.8, 0.15]
Raw Data Source: ba
Raw Classification: The question is classified as 'Reasoning'. It requires inference and analysis of the relationships between capital management, capital adequacy regulations, and the auditor's responsibilities in the context of the bank's significant losses.
VectorDB Chunks: ['ba-1970-58.8', 'ba-1970-10.2a', 'ba-1970-9a.3', 'ba-1970-10.3', 'ba-1970-48aa.1', 'ba-1970-48aa.2', 'ba-1970-39a.5', 'ba-1970-10.2', 'ba-1970-58.1', 'ba-1970-46b.4']
Retrieved chunks similarity score: [0.5648605227470398, 0.503929615020752, 0.6651657223701477, 0.7024672627449036, 0.5496634840965271, 0.5882453322410583, 0.45479586720466614, 0.695589065

Reranking chunks: 100%|██████████| 10/10 [00:06<00:00,  1.53it/s]


Retrieved chunks similarity score: [0.5648605227470398, 0.5163326859474182, 0.424751877784729, 0.6955890655517578, 0.7024672627449036]
Retrieved chunks similarity score: [1.0, 0.7509393095970154, 0.6227365136146545, 0.5997809171676636, 0.6795032620429993]
Position scores: [0.0, 1.0]
Max scores: [0.7024672627449036, 1.0]
Rank scores: [0.8, 0.15]
Raw Data Source: ba
Raw Classification: The question is classified as 'Reasoning'. It requires inference about how the Authority might address the situation and the broader implications for the bank, rather than seeking a straightforward factual answer.
VectorDB Chunks: ['ba-1970-57.3', 'ba-1970-29a.4', 'ba-1970-5a.2', 'ba-1970-57.1', 'ba-1970-5.2b', 'ba-1970-57a.1', 'ba-1970-5.1', 'ba-1970-56', 'ba-1970-57.2', 'ba-1970-47a.4']
Retrieved chunks similarity score: [0.9257653951644897, 0.6209722757339478, 0.6025153994560242, 1.0, 0.4976455867290497, 0.8453415632247925, 0.5088929533958435, 0.6562513709068298, 0.944344699382782, 0.5586485862731934]
R

Reranking chunks: 100%|██████████| 10/10 [00:05<00:00,  1.83it/s]


Retrieved chunks similarity score: [0.4976455867290497, 0.9257653951644897, 0.6209722757339478, 0.5586485862731934, 0.5088929533958435]
Retrieved chunks similarity score: [0.5283324122428894, 0.9266220927238464, 0.6828244924545288, 0.5620046854019165, 0.48966214060783386]
Retrieved chunks similarity score: [0.47180256247520447, 1.0000001192092896, 0.6217442750930786, 0.5088993906974792, 0.45543551445007324]
Retrieved chunks similarity score: [0.4102723002433777, 0.06654896587133408, 0.3320867717266083, 0.30304089188575745, 0.27275416254997253]
Position scores: [0.0, 0.0, 0.8, 0.0]
Max scores: [0.9257653951644897, 0.9266220927238464, 1.0000001192092896, 0.4102723002433777]
Rank scores: [0.5, 0.3, 0.3, 0.3]
Raw Data Source: ba
Raw Classification: The question is classified as 'Reasoning' because it requires inference and analysis of potential regulatory actions by the Authority and the broader implications on the bank. It involves evaluating the situation and making logical deductions ra

Reranking chunks: 100%|██████████| 10/10 [00:05<00:00,  1.71it/s]


Retrieved chunks similarity score: [1.0, 0.9257653951644897, 0.944344699382782, 0.6562513709068298, 0.6209722757339478]
Retrieved chunks similarity score: [0.944344699382782, 0.9266220927238464, 1.0, 0.6180703639984131, 0.6828244924545288]
Retrieved chunks similarity score: [0.9257653951644897, 1.0000001192092896, 0.9266220927238464, 0.6566728949546814, 0.6217442750930786]
Retrieved chunks similarity score: [0.08238939940929413, 0.06654896587133408, 0.12851502001285553, 0.08431846648454666, 0.3320867717266083]
Position scores: [1.0, 0.6, 0.8, 0.0]
Max scores: [1.0, 1.0, 1.0000001192092896, 0.3320867717266083]
Rank scores: [0.05, 0.2, 0.15, 0.9]
Raw Data Source: ba
Raw Classification: The question is classified as 'Reasoning.' It requires analyzing the implications of the bank's actions and potential regulatory responses, as well as considering the broader impact on the bank, which involves inferential thinking.
VectorDB Chunks: ['ba-1970-57.3', 'ba-1970-29a.4', 'ba-1970-5a.2', 'ba-1970

Reranking chunks: 100%|██████████| 10/10 [00:06<00:00,  1.58it/s]


Retrieved chunks similarity score: [1.0, 0.9257653951644897, 0.944344699382782, 0.6562513709068298, 0.6209722757339478]
Retrieved chunks similarity score: [0.944344699382782, 0.9266220927238464, 1.0, 0.6180703639984131, 0.6828244924545288]
Retrieved chunks similarity score: [0.9257653951644897, 1.0000001192092896, 0.9266220927238464, 0.6566728949546814, 0.6217442750930786]
Retrieved chunks similarity score: [0.08238939940929413, 0.06654896587133408, 0.12851502001285553, 0.08431846648454666, 0.3320867717266083]
Position scores: [1.0, 0.6, 0.8, 0.0]
Max scores: [1.0, 1.0, 1.0000001192092896, 0.3320867717266083]
Rank scores: [0.05, 0.2, 0.15, 0.9]
Raw Data Source: ba
Raw Classification: The question is classified as 'Reasoning'. It requires inference and deduction to analyze the implications of the bank's actions and how they relate to regulatory consequences.
VectorDB Chunks: ['ba-1970-58.8', 'ba-1970-48.1', 'ba-1970-38.6e', 'ba-1970-49.1', 'ba-1970-48aa.2', 'ba-1970-39a.6', 'ba-1970-9.4

Reranking chunks: 100%|██████████| 10/10 [00:05<00:00,  1.86it/s]


Retrieved chunks similarity score: [0.5045673847198486, 0.5712306499481201, 0.5076613426208496, 0.6309671998023987, 0.5213866829872131]
Retrieved chunks similarity score: [0.3984113335609436, 0.32835015654563904, 0.39746519923210144, 0.3860553503036499, 0.3653934895992279]
Position scores: [0.0, 0.0]
Max scores: [0.6309671998023987, 0.3984113335609436]
Rank scores: [0.75, 0.3]
Raw Data Source: ba
Raw Classification: Reasoning
VectorDB Chunks: ['ba-1970-58.8', 'ba-1970-48.1', 'ba-1970-38.6e', 'ba-1970-49.1', 'ba-1970-48aa.2', 'ba-1970-39a.6', 'ba-1970-9.4', 'ba-1970-9a.5', 'ba-1970-9.5a', 'ba-1970-10.5']
Retrieved chunks similarity score: [0.5268644094467163, 0.5057857036590576, 0.5076613426208496, 0.4692282974720001, 0.48199963569641113, 0.5213866829872131, 0.6309671998023987, 0.5712306499481201, 0.46957793831825256, 0.5045673847198486]
Retrieved chunks similarity score: [0.2974216341972351, 0.4188958406448364, 0.39746519923210144, 0.42581290006637573, 0.2859366834163666, 0.36539348959

Reranking chunks: 100%|██████████| 10/10 [00:05<00:00,  1.69it/s]


Retrieved chunks similarity score: [0.5213866829872131, 0.5712306499481201, 0.4692282974720001, 0.5076613426208496, 0.5045673847198486]
Retrieved chunks similarity score: [0.3653934895992279, 0.32835015654563904, 0.42581290006637573, 0.39746519923210144, 0.3984113335609436]
Position scores: [0.0, 0.0]
Max scores: [0.5712306499481201, 0.42581290006637573]
Rank scores: [0.2, 0.8]
Raw Data Source: ba
Raw Classification: The question is classified as 'Reasoning'. It requires inference and analysis of the relationships between the delayed financial statements and the capital maintenance requirements to determine the regulatory consequences.
VectorDB Chunks: ['ba-1970-58.8', 'ba-1970-48.1', 'ba-1970-38.6e', 'ba-1970-49.1', 'ba-1970-48aa.2', 'ba-1970-39a.6', 'ba-1970-9.4', 'ba-1970-9a.5', 'ba-1970-9.5a', 'ba-1970-10.5']
Retrieved chunks similarity score: [0.5268644094467163, 0.5057857036590576, 0.5076613426208496, 0.4692282974720001, 0.48199963569641113, 0.5213866829872131, 0.6309671998023987

Reranking chunks: 100%|██████████| 10/10 [00:06<00:00,  1.49it/s]


Retrieved chunks similarity score: [0.5213866829872131, 0.5712306499481201, 0.4692282974720001, 0.5076613426208496, 0.5045673847198486]
Retrieved chunks similarity score: [0.3653934895992279, 0.32835015654563904, 0.42581290006637573, 0.39746519923210144, 0.3984113335609436]
Position scores: [0.0, 0.0]
Max scores: [0.5712306499481201, 0.42581290006637573]
Rank scores: [0.2, 0.8]
Raw Data Source: ba
Raw Classification: The question is classified as 'Reasoning'. It requires inference about the regulatory requirements that must be met based on the bank's plans to repurchase shares and update its board of directors, which involves understanding relationships and applying logical steps related to financial regulations.
VectorDB Chunks: ['ba-1970-9a.2', 'ba-1970-9.3', 'ba-1970-10.2a', 'ba-1970-55t.4', 'ba-1970-16.3', 'ba-1970-9a.3b', 'ba-1970-10a.3', 'ba-1970-9a.3', 'ba-1970-55y.3', 'ba-1970-65.2']
Retrieved chunks similarity score: [0.7386410236358643, 1.000000238418579, 0.4182462990283966, 

Reranking chunks: 100%|██████████| 10/10 [00:06<00:00,  1.47it/s]


Retrieved chunks similarity score: [0.7386410236358643, 1.000000238418579, 0.41788288950920105, 0.8395041227340698, 0.5256891250610352]
Retrieved chunks similarity score: [0.4366194009780884, 0.6134555339813232, 0.5404318571090698, 0.5816356539726257, 0.3835964500904083]
Position scores: [0.8, 0.0]
Max scores: [1.000000238418579, 0.6134555339813232]
Rank scores: [0.35, 0.35]
Raw Data Source: ba
Raw Classification: The question is classified as 'Factual'. It asks for specific regulatory requirements that the bank must fulfill, which can be answered with retrieved information about regulations regarding share repurchase and changes in the board of directors.
VectorDB Chunks: ['ba-1970-9a.2', 'ba-1970-9.3', 'ba-1970-10.2a', 'ba-1970-55t.4', 'ba-1970-16.3', 'ba-1970-9a.3b', 'ba-1970-10a.3', 'ba-1970-9a.3', 'ba-1970-55y.3', 'ba-1970-65.2']
Retrieved chunks similarity score: [0.7386410236358643, 1.000000238418579, 0.4182462990283966, 0.8395041227340698, 0.7424059510231018, 0.5256891250610352

Reranking chunks: 100%|██████████| 10/10 [00:12<00:00,  1.22s/it]


Retrieved chunks similarity score: [1.000000238418579, 0.7386410236358643, 0.8395041227340698, 0.5256891250610352, 0.41788288950920105]
Retrieved chunks similarity score: [0.6134555339813232, 0.4366194009780884, 0.5816356539726257, 0.3835964500904083, 0.5404318571090698]
Position scores: [1.0, 0.0]
Max scores: [1.000000238418579, 0.6134555339813232]
Rank scores: [0.05, 0.25]
Raw Data Source: ba
Raw Classification: The question is classified as 'Reasoning'. It requires an analysis of the relationships between the bank's actions (repurchasing shares and updating the board of directors) and the relevant regulatory requirements. This involves deducing what specific laws and regulations apply in this context rather than simply retrieving facts.
VectorDB Chunks: ['ba-1970-9a.2', 'ba-1970-9.3', 'ba-1970-10.2a', 'ba-1970-55t.4', 'ba-1970-16.3', 'ba-1970-9a.3b', 'ba-1970-10a.3', 'ba-1970-9a.3', 'ba-1970-55y.3', 'ba-1970-65.2']
Retrieved chunks similarity score: [0.7386410236358643, 1.0000002384

Reranking chunks: 100%|██████████| 10/10 [00:11<00:00,  1.19s/it]


Retrieved chunks similarity score: [1.000000238418579, 0.7386410236358643, 0.8395041227340698, 0.5256891250610352, 0.41788288950920105]
Retrieved chunks similarity score: [0.6134555339813232, 0.4366194009780884, 0.5816356539726257, 0.3835964500904083, 0.5404318571090698]
Position scores: [1.0, 0.0]
Max scores: [1.000000238418579, 0.6134555339813232]
Rank scores: [0.05, 0.25]
Raw Data Source: ba
Raw Classification: Reasoning
VectorDB Chunks: ['ba-1970-10a.3', 'ba-1970-38.4', 'ba-1970-9.5a', 'ba-1970-58.8', 'ba-1970-65.2', 'ba-1970-38.6e', 'ba-1970-39a.3', 'ba-1970-49.1', 'ba-1970-10.5', 'ba-1970-65.5']
Retrieved chunks similarity score: [0.42812538146972656, 0.2989825904369354, 0.22537986934185028, 0.3516567349433899, 0.1421690434217453, 0.19440172612667084, 0.18041980266571045, 0.18082334101200104, 0.2329459935426712, 0.22033041715621948]
Retrieved chunks similarity score: [0.300748735666275, 0.2957405149936676, 0.2945484519004822, 0.24637092649936676, 0.39583849906921387, 0.3419288694

Reranking chunks: 100%|██████████| 10/10 [00:06<00:00,  1.61it/s]


Retrieved chunks similarity score: [0.42812538146972656, 0.2329459935426712, 0.22033041715621948, 0.18082334101200104, 0.19440172612667084]
Retrieved chunks similarity score: [0.300748735666275, 0.25961166620254517, 0.23610691726207733, 0.31206443905830383, 0.3419288694858551]
Retrieved chunks similarity score: [0.31372636556625366, 0.0790400430560112, 0.042067643254995346, 0.1580982655286789, 0.14811725914478302]
Retrieved chunks similarity score: [0.8216290473937988, 0.43865031003952026, 0.39979326725006104, 0.40181443095207214, 0.492710679769516]
Retrieved chunks similarity score: [0.8002648949623108, 0.3904597759246826, 0.3419172763824463, 0.47735658288002014, 0.5833427906036377]
Retrieved chunks similarity score: [1.0000001192092896, 0.5585511326789856, 0.4766210615634918, 0.5772002339363098, 0.6651701927185059]
Retrieved chunks similarity score: [0.35539206862449646, 0.3984113335609436, 0.36052432656288147, 0.42581290006637573, 0.39746519923210144]
Position scores: [0.0, 0.0, 0.0

Reranking chunks: 100%|██████████| 10/10 [00:06<00:00,  1.55it/s]


Retrieved chunks similarity score: [0.42812538146972656, 0.2989825904369354, 0.18082334101200104, 0.22033041715621948, 0.2329459935426712]
Retrieved chunks similarity score: [0.300748735666275, 0.2957405149936676, 0.31206443905830383, 0.23610691726207733, 0.25961166620254517]
Retrieved chunks similarity score: [0.31372636556625366, 0.0781601071357727, 0.1580982655286789, 0.042067643254995346, 0.0790400430560112]
Retrieved chunks similarity score: [0.8216290473937988, 0.4982829689979553, 0.40181443095207214, 0.39979326725006104, 0.43865031003952026]
Retrieved chunks similarity score: [0.8002648949623108, 0.4514560103416443, 0.47735658288002014, 0.3419172763824463, 0.3904597759246826]
Retrieved chunks similarity score: [1.0000001192092896, 0.5522184371948242, 0.5772002339363098, 0.4766210615634918, 0.5585511326789856]
Retrieved chunks similarity score: [0.35539206862449646, 0.4170403480529785, 0.42581290006637573, 0.36052432656288147, 0.3984113335609436]
Position scores: [0.0, 0.0, 0.0, 

Reranking chunks: 100%|██████████| 10/10 [00:07<00:00,  1.29it/s]


Retrieved chunks similarity score: [0.42812538146972656, 0.2989825904369354, 0.18082334101200104, 0.22033041715621948, 0.2329459935426712]
Retrieved chunks similarity score: [0.300748735666275, 0.2957405149936676, 0.31206443905830383, 0.23610691726207733, 0.25961166620254517]
Retrieved chunks similarity score: [0.31372636556625366, 0.0781601071357727, 0.1580982655286789, 0.042067643254995346, 0.0790400430560112]
Retrieved chunks similarity score: [0.8216290473937988, 0.4982829689979553, 0.40181443095207214, 0.39979326725006104, 0.43865031003952026]
Retrieved chunks similarity score: [0.8002648949623108, 0.4514560103416443, 0.47735658288002014, 0.3419172763824463, 0.3904597759246826]
Retrieved chunks similarity score: [1.0000001192092896, 0.5522184371948242, 0.5772002339363098, 0.4766210615634918, 0.5585511326789856]
Retrieved chunks similarity score: [0.35539206862449646, 0.4170403480529785, 0.42581290006637573, 0.36052432656288147, 0.3984113335609436]
Position scores: [0.0, 0.0, 0.0, 

In [None]:
df_bfs.to_csv('df_bfs.csv')

In [70]:
# Find the best hop
df_hop_score = df_bfs.groupby('n_hops')['answer_score'].mean().reset_index()
bfs_best_hop = int(df_hop_score.sort_values(by=['answer_score', 'n_hops'], ascending=[False, True]).iloc[0]['n_hops'])
print('BFS best hop:', bfs_best_hop)

BFS best hop: 3


In [72]:
df_hop_score

Unnamed: 0,n_hops,answer_score
0,1,4.1
1,3,4.8
2,5,4.7


In [None]:
# Get result for DFS
dfs_queries_evaluation = []
for id, row in df_queries.iterrows():
    query = row['query']
    correct_source = row['correct_source']
    correct_type = row['correct_type']
    reference_retrieval = [row['reference_retrieval']] if ',' not in row['reference_retrieval'] else row['reference_retrieval'].split(', ')
    reference_generation = row['reference_generation']
    evaluation = get_evaluation(query, correct_source, correct_type, reference_retrieval, reference_generation, n_hop=bfs_best_hop, retriever_method='dfs')
    dfs_queries_evaluation.append(evaluation)

df_dfs = pd.DataFrame(dfs_queries_evaluation)

Query: What is the minimum paid-up capital required for a bank incorporated in Singapore?
Raw Data Source: ba
Raw Classification: Factual
VectorDB Chunks: ['ba-1970-9.1', 'ba-1970-9.2', 'ba-1970-9.3a', 'ba-1970-9.2a', 'ba-1970-55t.6', 'ba-1970-10.1', 'ba-1970-55t.2', 'ba-1970-55t.1', 'ba-1970-9a.3a', 'ba-1970-40.1']
Retrieved chunks similarity score: [0.9999998807907104, 0.8203828930854797, 0.7704546451568604, 0.7668531537055969, 0.75835782289505, 0.7692022919654846, 0.7747294902801514, 0.8659762740135193, 0.7862464785575867, 0.7480430006980896]
Position scores: [1.0]
Max scores: [0.9999998807907104]
Rank scores: [0.32727272727272727]


Reranking chunks: 100%|██████████| 8/8 [00:03<00:00,  2.11it/s]s]
Reranking chunks: 100%|██████████| 8/8 [00:02<00:00,  3.34it/s]
Reranking chunks: 100%|██████████| 8/8 [00:02<00:00,  3.38it/s]
Reranking chunks: 100%|██████████| 7/7 [00:01<00:00,  3.69it/s]
Reranking chunks: 100%|██████████| 5/5 [00:01<00:00,  3.13it/s]
Reranking chunks: 100%|██████████| 28/28 [00:08<00:00,  3.32it/s]
Reranking chunks: 100%|██████████| 6/6 [00:01<00:00,  3.87it/s]
Reranking chunks: 100%|██████████| 4/4 [00:01<00:00,  2.83it/s]
Reranking chunks: 100%|██████████| 6/6 [00:01<00:00,  3.57it/s]
Reranking chunks: 100%|██████████| 7/7 [00:02<00:00,  2.93it/s]
Reranking chunks: 100%|██████████| 23/23 [00:05<00:00,  4.22it/s].28s/it]
Reranking chunks: 100%|██████████| 23/23 [00:06<00:00,  3.68it/s]
Reranking chunks: 100%|██████████| 8/8 [00:03<00:00,  2.06it/s]
Reranking chunks: 100%|██████████| 8/8 [00:02<00:00,  2.74it/s]
Reranking chunks: 100%|██████████| 4/4 [00:01<00:00,  3.29it/s]
Reranking chunks: 100%|█

Root: ba-1970-9.1//Path_id: ['ba-1970-9.1', 'ba-1970-9a.1', 'ba-1970-9.3', 'ba-1970-10.1']//Text: ['9(1) Subject to this Act, a company must not be granted a bank licence unless — (a) where the company intends to carry on banking business in Singapore as a bank incorporated in Singapore, its paid‑up capital is at least $1,500 million or such other amount as may be prescribed, and its capital funds are at least that amount; or (b) where the company intends to carry on banking business in Singapore through a branch or office located in Singapore, its head office capital funds are at least the equivalent of $200 million. [5/2016; 1/2020]', '9A(1) Despite section 9, a company incorporated in Singapore which is a qualifying subsidiary may be granted a bank licence under section 7 if its paid‑up capital is at least $100 million. [5/2016; 1/2020]', '9(3) A bank incorporated in Singapore must not reduce its paid‑up capital, or purchase or otherwise acquire shares issued by the bank if such sha

Reranking chunks: 100%|██████████| 10/10 [00:08<00:00,  1.14it/s]


['(ba-1970-9.2) 9(2) Subject to subsection (2A), the paid‑up capital and capital funds of a bank incorporated in Singapore must be denominated in Singapore dollars or any currency approved by the Authority, and must be in ordinary shares. [5/2016]   (ba-1970-9a.1) 9A(1) Despite section 9, a company incorporated in Singapore which is a qualifying subsidiary may be granted a bank licence under section 7 if its paid‑up capital is at least $100 million. [5/2016; 1/2020]   (ba-1970-9.3) 9(3) A bank incorporated in Singapore must not reduce its paid‑up capital, or purchase or otherwise acquire shares issued by the bank if such shares are to be held as treasury shares, without the approval of the Authority.   (ba-1970-10.1) 10(1) The Authority may, by written notice, require any bank in Singapore or class of banks in Singapore to maintain capital funds in Singapore of such amount (not being less than the minimum prescribed in section 9 or 9A, as the case may be) and in such manner as the Auth

In [None]:
df_dfs.to_csv('df_dfs.csv')

In [61]:
# Get result for weighted
weighted_queries_evaluation = []
for id, row in df_queries.iterrows():
    query = row['query']
    correct_source = row['correct_source']
    correct_type = row['correct_type']
    reference_retrieval = [row['reference_retrieval']] if ',' not in row['reference_retrieval'] else row['reference_retrieval'].split(', ')
    reference_generation = row['reference_generation']
    evaluation = get_evaluation(query, correct_source, correct_type, reference_retrieval, reference_generation, n_hop=bfs_best_hop, retriever_method='weighted')
    weighted_queries_evaluation.append(evaluation)

df_weighted = pd.DataFrame(weighted_queries_evaluation)

Query: What is the minimum paid-up capital required for a bank incorporated in Singapore?
Raw Data Source: ba
Raw Classification: Factual
VectorDB Chunks: ['ba-1970-9.1', 'ba-1970-9.2', 'ba-1970-9.3a', 'ba-1970-9.2a', 'ba-1970-55t.6', 'ba-1970-10.1', 'ba-1970-55t.2', 'ba-1970-55t.1', 'ba-1970-9a.3a', 'ba-1970-40.1']
Retrieved chunks similarity score: [0.9999998807907104, 0.8203828930854797, 0.7704546451568604, 0.7668531537055969, 0.75835782289505, 0.7692022919654846, 0.7747294902801514, 0.8659762740135193, 0.7862464785575867, 0.7480430006980896]
Position scores: [1.0]
Max scores: [0.9999998807907104]
Rank scores: [0.32727272727272727]
Document: ID: ba-1970-9.1
9(1) Subject to this Act, a company must not be granted a bank licence unless — (a) ...
Combined Score: 0.8994671702384949
--------------------------------------------------
Document: ID: ba-1970-9.2
9(2) Subject to subsection (2A), the paid‑up capital and capital funds of a bank inc...
Combined Score: 0.8976037502288818
--------

In [None]:
df_weighted.to_csv('df_weighted.csv')

In [None]:
df_combined = pd.concat([df_bfs, df_dfs, df_weighted])
df_combined.to_csv('df_combined.csv')

In [69]:
df_method_score = df_combined[df_combined['n_hops'] == bfs_best_hop].groupby('graphdb_retrieval_method')['answer_score'].mean().reset_index()
best_method = df_method_score.sort_values(by=['answer_score'], ascending=[False]).iloc[0]['graphdb_retrieval_method']
print('Best retrieval method:', best_method)

Best retrieval method: bfs


In [None]:
df_method_score