In [1]:
import os
import fitz
import re

from ollama import Client
import faiss
import pandas as pd
import numpy as np
import Stemmer
from tqdm import tqdm
import gradio as gr

from llama_index.core import Document
from llama_index.core.node_parser import TokenTextSplitter
from llama_index.core.retrievers import BaseRetriever, QueryFusionRetriever
from llama_index.core.schema import TextNode, NodeWithScore
from llama_index.retrievers.bm25 import BM25Retriever
from llama_index.core.indices.query.query_transform import HyDEQueryTransform
from llama_index.core.query_engine import TransformQueryEngine
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
from llama_index.vector_stores.faiss import FaissVectorStore
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.core import Settings
from llama_index.core.storage import StorageContext
from llama_index.llms.ollama import Ollama

from IPython.display import Markdown, display

# Connect to Ollama Server

In [2]:
client = Client(
  host='http://localhost:11434',
)

# Settings

In [3]:
Settings.embed_model = HuggingFaceEmbedding(
    model_name="BAAI/bge-small-en-v1.5"
)

modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/94.8k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/52.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/743 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/133M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/366 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/711k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

# Ingestion

In [4]:
import json
# Path to the dataset folder
DATASET_PATH = r'/home/jeryl4913/lto_rag_reviewer/notebooks/extracted_text.json'

def get_text_and_metadata(input_path):
    """Load text and metadata from a file and perform chunking."""
    with open(input_path, "r", encoding="utf-8") as f:
        extracted_data = json.load(f)

    texts = []
    metadata = []

    for entry in tqdm(extracted_data, desc="Processing entries"):
        text = entry["text"]
        source_metadata = {
            "source": entry["source"],
            "folder": entry["folder"],
            "file_name": entry["file_name"],
            "page": entry["page"],
            "title": entry["title"],
            "url": entry["url"]
        }
        texts.append(text)
        metadata.append(source_metadata)

    return texts, metadata


In [5]:
docs, metadatas = get_text_and_metadata(DATASET_PATH)

Processing entries: 100%|██████████| 7815/7815 [00:00<00:00, 1008320.59it/s]


In [6]:
documents = [Document(text=docs[t], metadata=metadatas[t]) for t in range(len(docs))]
splitter = TokenTextSplitter(
    chunk_size=512,
    chunk_overlap=20,
    separator=" ",
)
nodes = splitter.get_nodes_from_documents(documents)

# Embedding and Retrieval

## Dense via FAISS

In [7]:
def generate_embeddings(nodes, client, model):
    # Generate embeddings for documents using Ollama
    for doc in tqdm(nodes):
        response = client.embeddings(prompt=doc.text, model=model)
        doc.embedding = response["embedding"]
    return nodes

In [8]:
class FaissIndexer:
    """
    Faiss-based indexer for efficient similarity search using inner-product (cosine) similarity.

    This class handles the creation and management of a FAISS index from node embeddings.
    
    :ivar faiss_index: The FAISS index for storing and querying embeddings.
    :vartype faiss_index: faiss.IndexFlatIP
    :ivar embedding_dim: Dimensionality of the embeddings.
    :vartype embedding_dim: int
    """

    def __init__(self):
        """
        Initialize the FaissIndexer class.

        :ivar faiss_index: The FAISS index, initialized as None.
        :ivar embedding_dim: The dimension of embeddings, initialized as None.
        """
        self.faiss_index = None
        self.embedding_dim = None

    def normalize_embeddings(self, embeddings):
        """
        Normalize embeddings to have unit L2 norm.

        :param embeddings: Array of embeddings to normalize.
        :type embeddings: np.ndarray
        :return: Normalized embeddings.
        :rtype: np.ndarray
        """
        return embeddings / np.linalg.norm(embeddings, axis=1, keepdims=True)

    def build_index(self, nodes):
        """
        Build the FAISS index from a list of nodes containing embeddings.

        :param nodes: List of nodes, where each node contains an `embedding` attribute.
        :type nodes: list
        :raises ValueError: If the nodes list is empty or embeddings are inconsistent.
        """
        if not nodes:
            raise ValueError("Nodes list cannot be empty.")
        
        embeddings = np.array([np.array(node.embedding) for node in nodes])
        normalized_embeddings = self.normalize_embeddings(embeddings)

        self.embedding_dim = normalized_embeddings[0].shape[0]
        self.faiss_index = faiss.IndexFlatIP(self.embedding_dim)  # Inner-product similarity
        self.faiss_index.add(normalized_embeddings)

    def get_index(self):
        """
        Get the FAISS index instance.

        :return: The FAISS index used for similarity search.
        :rtype: faiss.IndexFlatIP
        :raises ValueError: If the index has not been built.
        """
        if self.faiss_index is None:
            raise ValueError("Index has not been built yet. Call 'build_index' first.")
        return self.faiss_index

In [9]:
class FAISSVectorStoreRetriever(BaseRetriever):
    def __init__(self, faiss_index, documents):
        """
        Initialize the FAISS retriever.
        :param faiss_index: The FAISS index containing precomputed embeddings.
        :param documents: List of document chunks.
        :param embeddings: Precomputed embeddings corresponding to the document chunks.
        """
        self.faiss_index = faiss_index
        self.documents = documents

    def _retrieve(self, query_embedding, top_k=5):
        """
        Retrieve the top-k nearest neighbors using the FAISS index.
        :param query_embedding: The embedding of the query.
        :param top_k: Number of top results to retrieve.
        """

        norm_query_embedding = np.array([query_embedding])
        norm_query_embedding /= np.linalg.norm(norm_query_embedding, axis=1, keepdims=True)

        distances, indices = self.faiss_index.search(norm_query_embedding, top_k)
        retrieved_docs = [
            NodeWithScore(node=self.documents[idx], score=1 - dist)
            for idx, dist in zip(indices[0], distances[0])
            if idx != -1
        ]
        return retrieved_docs

In [10]:
# embedding
nodes_embed = generate_embeddings(nodes, client, "mxbai-embed-large")

100%|██████████| 12302/12302 [08:21<00:00, 24.52it/s] 


In [11]:
index = FaissIndexer()
index.build_index(nodes_embed)  # Build index from embeddings
faiss_index = index.get_index()  # Retrieve the FAISS index

# Pass the faiss_index during FaissVectorStore initialization
vector_store = FaissVectorStore(faiss_index=faiss_index)
storage_context = StorageContext.from_defaults(vector_store=vector_store)
index_q = VectorStoreIndex.from_documents(
    documents    # Don’t re-embed doc nodes
)
ollama_llm = Ollama(model="llama3.3", request_timeout=300)
query_engine = index_q.as_query_engine(llm=ollama_llm)

faiss_retriever = FAISSVectorStoreRetriever(
    faiss_index=faiss_index,  # Pass the same faiss_index to the retriever
    documents=nodes_embed
)

## Sparse Embedding via BM25

In [12]:
# bm25_retriever = BM25Retriever.from_defaults(
#     nodes=nodes,
#     similarity_top_k=5,
#     stemmer=Stemmer.Stemmer("english"),
#     language="english",
# )

## Hybrid Retrieval via Reciprocal Rank

In [13]:
def hybrid_embedding(results: dict, top_k: int):
    x = QueryFusionRetriever
    ranked_results = QueryFusionRetriever._reciprocal_rerank_fusion(x, results)
    return ranked_results[:top_k]

# Post Retrieval

## Summarization

In [14]:
def summarize_each_chunk(nodes, client, query, model="llama3.3", parent=False):
    if parent:
        chunks = [doc.text for doc in nodes]
    else:
        chunks = [doc.node.text for doc in nodes]
    summaries = []
    
    for i, chunk in enumerate(chunks):
        prompt = f"""
        Summarize the following text in one concise paragraph, focusing on key points relevant to the query: "{query}".
        
        - Emphasize information directly related to the query.
        - Exclude unrelated, redundant, or speculative details.
        - Do NOT introduce new information or answer the query itself. 
        
        Text:
        {chunk}
        
        Summary:
        """
        
        response = client.generate(model=model, prompt=prompt)
        summary = response['response'].strip()
        summaries.append(summary)

    return summaries

# Generation

In [15]:
def generate_response_with_notice(summaries, query, choices, client, model="llama3.3"):
    # Combine summaries into context block
    context = "\n".join(summaries)
    
    # Create prompt to answer based on summarized text
    prompt = f"""
    Use the following summarized information to answer the query accurately and concisely. 
    DO NOT USE BACKGROUND KNOWLEDGE OUTSIDE THE CONTEXT PROVIDED.
    If the information is not sufficient to fully address the query, respond ONLY with:
    "The available information is insufficient to provide a complete answer to this query."
    \nPlease answer only in letters and put them inside a bracket '[]'. If the question contains the statement 'Check all that apply' then add comma separator if there are multiple answers ONLY IF ALLOWED.

    Summarized Context:
    {context}
    
    Query:
    {query}
    {choices}
    
    Response:
    """
    
    # Send the prompt to Ollama
    response = client.generate(
        model=model,
        prompt=prompt
    )
    
    return response['response'].strip()

# Querying

## Query Transforms

In [16]:
hyde = HyDEQueryTransform(include_original=True, llm=ollama_llm)

## Query Generation

In [17]:
docstore = {}

# Store documents using full metadata as the key
for doc in documents:
    key = tuple(doc.metadata.items())  # Convert metadata to tuple for hashable key
    docstore[key] = doc

In [18]:
def get_document_by_chunk_metadata(chunk_node):
    # Convert chunk metadata to tuple for matching
    metadata_key = tuple(chunk_node.metadata.items())

    # Retrieve document from docstore
    document = docstore.get(metadata_key)
    return document

In [19]:
def remove_duplicate_documents(doc_list):
    seen_ids = set()
    unique_docs = []

    for doc in doc_list:
        if doc.doc_id not in seen_ids:
            seen_ids.add(doc.doc_id)
            unique_docs.append(doc)

    return unique_docs

In [20]:
def gen_query(query, choices, top_k, client, mode='dense', summary=False, model="llama3.3", chunks_only=True):
    hyde_query_engine = TransformQueryEngine(query_engine, hyde)
    response_hyde = hyde_query_engine.query(query)
    post_processed_response = f"""
    Query: {query}
    Additional Query: {response_hyde.response}
    """
    display(Markdown(f"<b>{post_processed_response}</b>"))
    response = client.embeddings(prompt=post_processed_response, model="mxbai-embed-large")
    query_embedding = response["embedding"]

    top_k_docs = faiss_retriever._retrieve(query_embedding, top_k=top_k)

    bm25_retriever = BM25Retriever.from_defaults(
    nodes=nodes,
    similarity_top_k=top_k,
    stemmer=Stemmer.Stemmer("english"),
    language="english",
    )
    retrieved_nodes = bm25_retriever.retrieve(query)

    results = {'faiss': top_k_docs, 'bm25':retrieved_nodes}
    ranked_results = hybrid_embedding(results, top_k=top_k)

    if mode == 'dense':
        print('using FAISS')
        ans_nodes =top_k_docs
    elif mode == 'sparse':
        print('using BM25')
        ans_nodes = retrieved_nodes
    else:
        print('using Hybrid')
        ans_nodes = ranked_results

    parent_flag = True
    context = set([get_document_by_chunk_metadata(docs).text for docs in ans_nodes])
    if chunks_only:
        parent_flag = False
        print('using chunks only')
        context = [docs.node.text for docs in ans_nodes]
        
    if summary:
        print('using summaries')
        context_nodes = remove_duplicate_documents([get_document_by_chunk_metadata(docs) for docs in ans_nodes])

        if chunks_only:
            context_nodes=ans_nodes
        summaries = summarize_each_chunk(context_nodes, client, model='llama3.3', query=query,parent=parent_flag)
        context = summaries

    answer = generate_response_with_notice(context, query, choices, client, model=model)

    # Format the references
    references = []
    for i, doc in enumerate(ranked_results[:top_k], start=1):
        metadata = doc.metadata
        source_info = f"Source {i}: {metadata['title']} (Page {metadata['page']}, Folder: {metadata['folder']})"
        references.append(source_info)

    return answer, "\n".join(references), context

# Evaluation

In [21]:
# Generate prompts dynamically
def generate_choices(row):
    options = []
    for choice in ['A', 'B', 'C', 'D', 'E']:
        # Check for NaN or blank values
        if pd.notna(row[choice]) and row[choice] != '':
            options.append(f"{choice}. {row[choice]}")
    
    # Construct the prompt with few-shot examples
    choices = "\n".join(options)
    
    return choices

In [22]:
from sklearn.model_selection import train_test_split
# Load the Excel file
file_path = '/home/jeryl4913/lto_rag_reviewer/notebooks/eval/LTO_EXAM.csv'
df = pd.read_csv(file_path)

# Display the first few rows of each set
print("Testing Data:")
print(len(df))
display(df.head())

Testing Data:
60


Unnamed: 0,Question,A,B,C,D,E,Answer,Choices
0,"If the driver is turning left, he must: (U-tur...",have the right of way,do so slowly with caution,yield to approaching cars,,,C,A. have the right of way\nB. do so slowly with...
1,"When a vehicle starts to skid, what should the...",Immediately step on the brakes,Hold firmly on to the wheel while slowing down...,Turn the wheels tp the opposite the direction ...,,,B,A. Immediately step on the brakes\nB. Hold fir...
2,"If you are parking uphill without a curb, turn...",edge of the street,other side of the street,middle of the street,,,A,A. edge of the street\nB. other side of the st...
3,"When parking downhill, you should turn your fr...",toward the curb of the sidewalk,away from the curb,any direction will do,,,A,A. toward the curb of the sidewalk\nB. away fr...
4,Which of the following is the maximum speed li...,60 kph,80 kph,100 kph,,,C,A. 60 kph\nB. 80 kph\nC. 100 kph


In [23]:
import time

df["AI"] = np.nan
ai_answer = []
context_answer = []
inference_times = []
for i in tqdm(range(len(df))):
    start_time = time.time()
    answ = gen_query(df["Question"].iloc[i], df["Choices"].iloc[i], top_k=15, client=client, mode='dense', model="llama3.3")
    ai_answer.append(answ[0])
    context_answer.append(answ[2])
    end_time = time.time()
    inference_times.append(end_time - start_time)

df["AI"] = ai_answer
df["Inference_times"] = inference_times
df["Context"] = context_answer
print("Updated Testing Data with AI Answers:")
display(df.head())

  0%|          | 0/60 [00:00<?, ?it/s]

<b>
    Query: If the driver is turning left, he must: (U-turn / Left Rule)
    Additional Query: Left Rule
    </b>

using FAISS
using chunks only


  2%|▏         | 1/60 [01:05<1:04:18, 65.40s/it]

<b>
    Query: When a vehicle starts to skid, what should the driver do?
    Additional Query: The driver should remain calm and avoid sudden movements. The specific steps to recover from a skid depend on the type of skid. For a front-wheel skid, the driver should lift off the gas, shift to neutral, steer once traction returns, and then resume driving. For a rear-wheel skid, the driver should stay calm, take their foot off the accelerator, steer in the direction they want to go, ease the steering wheel if the wheels start to slide the other way, and brake appropriately.
    </b>

using FAISS
using chunks only


  3%|▎         | 2/60 [02:54<1:28:06, 91.14s/it]

<b>
    Query: If you are parking uphill without a curb, turn the wheels towards the:
    Additional Query: road or the direction where the vehicle will roll out off the road and not into it if the parking brake fails.
    </b>

using FAISS
using chunks only


  5%|▌         | 3/60 [04:11<1:20:14, 84.47s/it]

<b>
    Query: When parking downhill, you should turn your front wheels:
    Additional Query: into the curb or toward the sidewalk.
    </b>

using FAISS
using chunks only


  5%|▌         | 3/60 [04:52<1:32:39, 97.54s/it]


KeyboardInterrupt: 

In [None]:
import re


def process_answers(answers):
    formatted_answers = []
    
    for a in answers:
        
        matches = re.findall(r'\[?\s*([A-E](?:\s*,\s*[A-E])*)\s*\]?', str(a)) # Extract answers like [A, C, D] or [A] or [B, D]
        answers = []
        for match in matches:
            answers.extend(re.split(r'\s*,\s*', match))  # Split by comma and remove spaces
        unique_sorted_answers = sorted(set(answers), key=lambda x: ['A', 'B', 'C', 'D', 'E'].index(x))
        if not unique_sorted_answers:
            formatted_answers.append(None)
        else:
            formatted_answers.append(unique_sorted_answers)
    return formatted_answers

df_results = df[["Question", "Answer", "AI", "Inference_times"]].copy()
df_results['Answer'] = df_results['Answer'].apply(lambda x: x.split(', '))
df_results['AI'] = process_answers(df_results["AI"])
df_results['Answer'] = process_answers(df_results["Answer"])



def calculate_scores(df):
    scores = []
    for index, row in df.iterrows():
        correct_answers = set(row['Answer'] if row['Answer'] is not None else [])
        ai_answers = set(row['AI'] if row['AI'] is not None else [])
        if ai_answers == correct_answers:
            score = 1.0
        else:
            score = 0.0
        scores.append(score)
    
    df['Score'] = scores
    accuracy = scores.count(1.0) / len(scores)
    print(f'Final Score: {scores.count(1.0):.2f}/{len(scores):.2f}')
    print(f'Accuracy: {accuracy:.2f}%')
    return df

# Apply the scoring function
scored_df = calculate_scores(df_results)

# Display the dataframe to verify the results
display(scored_df[['Question', 'Answer', 'AI', 'Score', 'Inference_times']])

Final Score: 13.00/60.00
Accuracy: 0.22%


Unnamed: 0,Question,Answer,AI,Score,Inference_times
0,"If the driver is turning left, he must: (U-tur...",[C],"[A, B, C]",0.0,121.788177
1,"When a vehicle starts to skid, what should the...",[B],"[A, B, C]",0.0,167.223632
2,"If you are parking uphill without a curb, turn...",[A],"[A, B, C]",0.0,126.676048
3,"When parking downhill, you should turn your fr...",[A],[A],1.0,114.733462
4,Which of the following is the maximum speed li...,[C],"[A, B]",0.0,149.391973
5,Never park or stop at the side of the road wit...,[C],"[A, B]",0.0,112.751427
6,While driving with maximum speed and you have ...,[C],"[A, B, C]",0.0,153.455341
7,What will happen when your rear tire blows out?,[B],[A],0.0,178.702681
8,Operating a public utility vehicle equipped wi...,[A],"[A, B, C]",0.0,130.630817
9,Your speed while driving at night should keep on:,[A],"[A, B]",0.0,153.482348


In [None]:
# Generate prompts dynamically
def generate_prompt(row):
    
    # Construct the prompt with few-shot examples
    prompt = f"\nActual Question: {row['Question']}\n" 
    prompt += "\nPlease answer the question based on the given context."
    
    return prompt

In [None]:
# Load the Excel file
file_path = '/home/jeryl4913/lto_rag_reviewer/notebooks/eval/LTO_EXAM_QnA.csv'
df = pd.read_csv(file_path, encoding='ISO-8859-1')
df['Prompt'] = df.apply(generate_prompt, axis=1)
display(df.head())

Unnamed: 0,Question,Answer,Prompt
0,Traffic Jam can be prevented if you,Keep opposing lanes open,\nActual Question: Traffic Jam can be prevente...
1,When making a right turn you should,Stay on the outermost lane of the road then si...,\nActual Question: When making a right turn yo...
2,"When you intend to turn right or left, signal ...",25 meters before you intend to make your turn,\nActual Question: When you intend to turn rig...
3,"At an intersection with a traffic light, make ...",The green light is on and there is a left turn...,\nActual Question: At an intersection with a t...
4,Graft and corruption in the traffic enforcemen...,Self disciplined by drivers and obeying traffi...,\nActual Question: Graft and corruption in the...


In [None]:
def generate_response_with_notice(summaries, query, client, model="llama3.3"):
    # Combine summaries into context block
    context = "\n".join(summaries)
    
    # Create prompt to answer based on summarized text
    prompt = f"""
    Use the following summarized information to answer the query accurately and concisely. 
    DO NOT USE BACKGROUND KNOWLEDGE OUTSIDE THE CONTEXT PROVIDED.
    If the information is not sufficient to fully address the query, respond ONLY with:
    "The available information is insufficient to provide a complete answer to this query."

    Summarized Context:
    {context}
    
    Query:
    {query}
    
    Response:
    """
    
    # Send the prompt to Ollama
    response = client.generate(
        model=model,
        prompt=prompt
    )
    
    return response['response'].strip()

In [None]:
def gen_query(query, top_k, client, mode='dense', summary=False, model="llama3.3", chunks_only=True):
    hyde_query_engine = TransformQueryEngine(query_engine, hyde)
    response_hyde = hyde_query_engine.query(query)
    post_processed_response = f"""
    Query: {query}
    Additional Query: {response_hyde.response}
    """
    display(Markdown(f"<b>{post_processed_response}</b>"))
    response = client.embeddings(prompt=post_processed_response, model="mxbai-embed-large")
    query_embedding = response["embedding"]

    top_k_docs = faiss_retriever._retrieve(query_embedding, top_k=top_k)

    bm25_retriever = BM25Retriever.from_defaults(
    nodes=nodes,
    similarity_top_k=top_k,
    stemmer=Stemmer.Stemmer("english"),
    language="english",
    )
    retrieved_nodes = bm25_retriever.retrieve(query)

    results = {'faiss': top_k_docs, 'bm25':retrieved_nodes}
    ranked_results = hybrid_embedding(results, top_k=top_k)

    if mode == 'dense':
        print('using FAISS')
        ans_nodes =top_k_docs
    elif mode == 'sparse':
        print('using BM25')
        ans_nodes = retrieved_nodes
    else:
        print('using Hybrid')
        ans_nodes = ranked_results

    parent_flag = True
    context = set([get_document_by_chunk_metadata(docs).text for docs in ans_nodes])
    if chunks_only:
        parent_flag = False
        print('using chunks only')
        context = [docs.node.text for docs in ans_nodes]
        
    if summary:
        print('using summaries')
        context_nodes = remove_duplicate_documents([get_document_by_chunk_metadata(docs) for docs in ans_nodes])

        if chunks_only:
            context_nodes=ans_nodes
        summaries = summarize_each_chunk(context_nodes, client, model='llama3.3', query=query,parent=parent_flag)
        context = summaries

    answer = generate_response_with_notice(context, query, client, model=model)

    # Format the references
    references = []
    for i, doc in enumerate(ranked_results[:top_k], start=1):
        metadata = doc.metadata
        source_info = f"Source {i}: {metadata['title']} (Page {metadata['page']}, Folder: {metadata['folder']})"
        references.append(source_info)

    return answer, "\n".join(references), context

In [None]:
import time
qr_range = (0,60)
df["AI"] = np.nan
ai_answer = []
inference_times = []

for i in tqdm(range(*qr_range)):
    start_time = time.time()
    ai_answer.append(gen_query(df.loc[i,"Prompt"],  top_k=15, client=client, mode='dense', model="llama3.3"))
    end_time = time.time()
    inference_times.append(end_time - start_time)



  0%|          | 0/60 [00:00<?, ?it/s]

<b>
    Query: 
Actual Question: Traffic Jam can be prevented if you

Please answer the question based on the given context.
    Additional Query: drive safely and follow defensive driving techniques, such as maintaining a safe following distance, signaling your intentions, stopping smoothly, and obeying right-of-way rules, which can help reduce the risk of collisions and contribute to a smoother traffic flow.
    </b>

using FAISS
using chunks only


  2%|▏         | 1/60 [02:47<2:45:04, 167.88s/it]

<b>
    Query: 
Actual Question: When making a right turn you should

Please answer the question based on the given context.
    Additional Query: When making a right turn, you should position your vehicle close to the curb or edge of the road, signal early, check for oncoming traffic and pedestrians, yield to others as necessary, maintain a safe speed, avoid cutting corners, and follow traffic signals and signs. Additionally, use your mirrors and check blind spots to ensure no vehicles or cyclists are in your path.
    </b>

using FAISS
using chunks only


  3%|▎         | 2/60 [05:23<2:35:12, 160.56s/it]

<b>
    Query: 
Actual Question: When you intend to turn right or left, signal your intention at least

Please answer the question based on the given context.
    Additional Query: 30 meters before the turn.
    </b>

using FAISS
using chunks only


  5%|▌         | 3/60 [07:11<2:09:38, 136.47s/it]

<b>
    Query: 
Actual Question: At an intersection with a traffic light, make left turn only when

Please answer the question based on the given context.
    Additional Query: it's safe and the traffic light or intersection rules permit.
    </b>

using FAISS
using chunks only


  7%|▋         | 4/60 [09:03<1:58:39, 127.13s/it]

<b>
    Query: 
Actual Question: Graft and corruption in the traffic enforcement system can be eliminated by

Please answer the question based on the given context.
    Additional Query: Graft and corruption in the traffic enforcement system can be eliminated by strictly enforcing existing traffic laws and regulations, and by motorists following road safety rules at all times and refraining from offering bribes to traffic enforcers. Additionally, law enforcers should remain committed to their duties without condoning any form of corruption within their ranks.
    </b>

using FAISS
using chunks only


  8%|▊         | 5/60 [11:55<2:11:12, 143.13s/it]

<b>
    Query: 
Actual Question: On a four(4) lane road with single white line, you can

Please answer the question based on the given context.
    Additional Query: overtake when it is safe and legal to do so, such as when there is clear visibility ahead and no oncoming traffic is present.
    </b>

using FAISS
using chunks only


 10%|█         | 6/60 [13:55<2:01:49, 135.37s/it]

<b>
    Query: 
Actual Question: A double solid yellow line with broken white line in between means

Please answer the question based on the given context.
    Additional Query: A double solid yellow line signifies an absolute prohibition on passing or crossing the line at any point, indicating a no-passing zone and prohibiting drivers from overtaking vehicles or crossing into oncoming traffic. However, when there's a broken white line in between, it allows overtaking from one side indicated by the broken line while restricting overtaking from the other side with the solid line.
    </b>

using FAISS
using chunks only


 12%|█▏        | 7/60 [15:59<1:56:17, 131.64s/it]

<b>
    Query: 
Actual Question: When making a U-Turn, you should

Please answer the question based on the given context.
    Additional Query: When making a U-turn, you should position your vehicle correctly to provide ample space and visibility for the maneuver, signal your intention to make a U-turn, check for traffic coming from both directions, and exercise patience while being vigilant for pedestrians and cyclists. Additionally, you should choose the right spot, avoiding areas where U-turns are prohibited, and maintain your lane after completing the turn. It's also essential to stay focused on the road, anticipate the actions of other drivers, and respect any signage or markings that prohibit U-turns in specific areas.
    </b>

using FAISS
using chunks only


 13%|█▎        | 8/60 [18:18<1:56:08, 134.01s/it]

<b>
    Query: 
Actual Question: Signs that are triangular in shape and with a red colored border are called

Please answer the question based on the given context.
    Additional Query: The provided context does not mention signs that are triangular in shape with a red-colored border. However, it discusses various types of road and traffic signs, including hazard markers and chevron signs, but does not provide information about triangular signs with a red border.
    </b>

using FAISS
using chunks only


 15%|█▌        | 9/60 [19:30<1:37:25, 114.61s/it]

<b>
    Query: 
Actual Question: Signs that are round, inverted triangle or octagonal and with red colored boarders are called

Please answer the question based on the given context.
    Additional Query: Advance Warning/Traffic Control Device Signs, however the actual name of these signs is not explicitly stated in the provided context. But it can be inferred that these shapes and colors might be related to warning or traffic control signs.
    </b>

using FAISS
using chunks only


 17%|█▋        | 10/60 [20:53<1:27:19, 104.79s/it]

<b>
    Query: 
Actual Question: Signs that are round, rectangular with white and blue background are called

Please answer the question based on the given context.
    Additional Query: There is no information provided about signs being round or rectangular with a white and blue background. The text only discusses road and traffic rules, types of road curves, intersections, and markings such as transition lines and give way lines, but does not describe specific sign shapes or colors.
    </b>

using FAISS
using chunks only


 18%|█▊        | 11/60 [22:10<1:18:36, 96.26s/it] 

<b>
    Query: 
Actual Question: Single with broken line on a two-lane road means

Please answer the question based on the given context.
    Additional Query: Single solid yellow lines with a broken line on a two-lane road permit passing with caution when safe and legal.
    </b>

using FAISS
using chunks only


 20%|██        | 12/60 [23:27<1:12:19, 90.41s/it]

<b>
    Query: 
Actual Question: What to do when you approach a crosswalk or pedestrian lane?

Please answer the question based on the given context.
    Additional Query: When approaching a crosswalk or pedestrian lane, you should yield to pedestrians. It is your responsibility to ensure their safety by allowing them to cross before you proceed. Additionally, be aware of other vehicles and watch out for pedestrians who may be crossing the road. Always slow down and observe your surroundings carefully when approaching an intersection or crosswalk.
    </b>

using FAISS
using chunks only


 22%|██▏       | 13/60 [24:54<1:10:06, 89.50s/it]

<b>
    Query: 
Actual Question: Driving an unregistered motor vehicle is a violation with a fine of

Please answer the question based on the given context.
    Additional Query: Php 10,000.00
    </b>

using FAISS
using chunks only


 23%|██▎       | 14/60 [25:54<1:01:42, 80.48s/it]

<b>
    Query: 
Actual Question: Considered a parking violation?

Please answer the question based on the given context.
    Additional Query: Blocking a driveway, parking on sidewalks, parking in residential areas without permission, parking at or inside an intersection, parking near pedestrian crossings, double parking, parking near footbridges, parking along major national roads, and parking too close to fire hydrants or fire station entrances are all considered parking violations. Additionally, attended illegal parking, obstruction, unattended illegal parking, and other forms of unauthorized parking can also be considered as such.
    </b>

using FAISS
using chunks only


 25%|██▌       | 15/60 [27:02<57:35, 76.78s/it]  

<b>
    Query: 
Actual Question: Green light at an intersection means

Please answer the question based on the given context.
    Additional Query: Green light indicates permission to proceed, yet drivers must exercise caution, particularly concerning pedestrian safety and when navigating turns or intersections. While the green signal grants the right of way, drivers are expected to remain vigilant and prioritize the safety of pedestrians and cyclists crossing the intersection.
    </b>

using FAISS
using chunks only


 27%|██▋       | 16/60 [28:17<55:51, 76.17s/it]

<b>
    Query: 
Actual Question: Steady red light at an intersection means

Please answer the question based on the given context.
    Additional Query: The steady red traffic light signifies more than just stopping; it mandates drivers to come to a full halt, ensuring the safety of pedestrians crossing the street. While it’s imperative to obey the signal, drivers may still be permitted to turn, contingent upon the absence of pedestrian and vehicle traffic and any specific signage indicating restrictions. Thus, while red denotes a mandatory stop, it also entails a cautious evaluation of the intersection’s conditions before proceeding.
    </b>

using FAISS
using chunks only


 28%|██▊       | 17/60 [29:36<55:08, 76.94s/it]

<b>
    Query: 
Actual Question: The penalty of driving a motor vehicle while under the influence of alcohol for the first offense

Please answer the question based on the given context.
    Additional Query: For Non-Professional Driver's License (NPDL) holders, the penalty is confiscation and suspension of the license for a period of twelve months upon final conviction by a regular court. For Professional Driver's License (PDL) holders, the penalty is cancellation of the license upon the commission of the first offense. Additionally, fines ranging from P50,000 to P100,000 with a minimum 6-month jail term may be imposed for driving under the influence of alcohol with no homicide or physical injury.
    </b>

using FAISS
using chunks only


 30%|███       | 18/60 [31:07<56:55, 81.33s/it]

<b>
    Query: 
Actual Question: When a driver of PUV refuses to render service, convey passengers, such violation is penalized with

Please answer the question based on the given context.
    Additional Query: Fine: Php 5,000.00 (first offense), Php 10,000.00 (second offense), Php 15,000.00 (third and subsequent offense) 
Penalty: Cancellation of CPC where the unit is authorized (third and subsequent offense)
    </b>

using FAISS
using chunks only


 32%|███▏      | 19/60 [32:12<52:16, 76.49s/it]

<b>
    Query: 
Actual Question: If the driver is using a motor vehicle in committing a crime and is convicted, his driver's license shall be

Please answer the question based on the given context.
    Additional Query: revoked. Additionally, the subject vehicle will be impounded, the violator penalized with P10,000.00, and the driver will be disqualified to apply for another driver's license for a period of five years, reckoned from the date of payment of penalty.
    </b>

using FAISS
using chunks only


 33%|███▎      | 20/60 [33:23<49:45, 74.63s/it]

<b>
    Query: 
Actual Question: The minimum distance away from the vehicle are following

Please answer the question based on the given context.
    Additional Query: The minimum distance is not explicitly stated in terms of a specific measurement, but rather it's recommended to maintain a gap that allows for at least two to three seconds of reaction time. This can be determined by counting "one thousand and one, one thousand and two" (for two seconds) or "one thousand and one, one thousand and two, one thousand and three" (for three seconds) after the vehicle ahead passes a fixed point, and adjusting for conditions such as speed and road conditions.
    </b>

using FAISS
using chunks only


 35%|███▌      | 21/60 [34:36<48:09, 74.09s/it]

<b>
    Query: 
Actual Question: When do you have a complete/full stop?

Please answer the question based on the given context.
    Additional Query: You have a complete or full stop when your vehicle comes to a standstill after applying the brakes, covering the distance known as the braking distance. This occurs at the end of the total stopping distance, which combines both the braking distance and the perception and reaction distance.
    </b>

using FAISS
using chunks only


 37%|███▋      | 22/60 [35:42<45:28, 71.82s/it]

<b>
    Query: 
Actual Question: On a two-lane road, overtaking is only allowed only at the

Please answer the question based on the given context.
    Additional Query: left lane.
    </b>

using FAISS
using chunks only


 38%|███▊      | 23/60 [37:01<45:31, 73.81s/it]

<b>
    Query: 
Actual Question: Parking is considered as a violation when a motor vehicle

Please answer the question based on the given context.
    Additional Query: is parked at or inside an intersection, near pedestrian crossings, on sidewalks, in residential areas without permission, double parking, near footbridges, along major national roads, too close to a fire hydrant (less than 4 meters away), or too close to a fire station entrance.
    </b>

using FAISS
using chunks only


 40%|████      | 24/60 [38:14<44:07, 73.54s/it]

<b>
    Query: 
Actual Question: Parking is prohibited

Please answer the question based on the given context.
    Additional Query: in several areas, including blocking a driveway, on sidewalks, in residential areas without permission, at or inside an intersection, near pedestrian crossings, by double parking or on the driver's side of a parked vehicle, in the vicinity of footbridges, along major national roads, and within 4 meters of a fire hydrant.
    </b>

using FAISS
using chunks only


 42%|████▏     | 25/60 [39:45<46:00, 78.86s/it]

<b>
    Query: 
Actual Question: When the traffic light is steady green and steady left/right arrow

Please answer the question based on the given context.
    Additional Query: When the traffic light is steady green, it indicates permission to proceed, but drivers must exercise caution, particularly concerning pedestrian safety and when navigating turns or intersections. Additionally, when there is a steady left or right arrow, it grants drivers permission to make turns in the designated direction, but they should proceed with caution and yield to pedestrians and oncoming traffic, prioritizing road safety at intersections.
    </b>

using FAISS
using chunks only


 43%|████▎     | 26/60 [41:23<47:56, 84.61s/it]

<b>
    Query: 
Actual Question: What is the maximum penalty for driving under the influence of liquor or prohibited drugs?

Please answer the question based on the given context.
    Additional Query: The maximum penalty for driving under the influence of alcohol (DUIA) and driving under the influence of dangerous drugs and other similar substances (DUID) resulting in homicide is a penalty of imprisonment under Article 249 of the Revised Penal Code plus a fine ranging from P350,000 to P500,000.
    </b>

using FAISS
using chunks only


 45%|████▌     | 27/60 [42:32<44:00, 80.01s/it]

<b>
    Query: 
Actual Question: On a wet road, you must

Please answer the question based on the given context.
    Additional Query: reduce your speed by gently lifting off the accelerator to maintain control of your vehicle. Additionally, avoid sudden braking and instead apply gentle pressure on the brake pedal to prevent uncontrollable skidding. It's also important to steer smoothly, maintain a safe following distance, and use your vehicle's safety features to navigate safely.
    </b>

using FAISS
using chunks only


 47%|████▋     | 28/60 [43:31<39:16, 73.63s/it]

<b>
    Query: 
Actual Question: While driving with a maximum speed and you have to stop suddenly, you should

Please answer the question based on the given context.
    Additional Query: Avoid abrupt braking and instead apply gentle pressure on the brake pedal. Additionally, if your vehicle is equipped with an Anti-lock Braking System (ABS), it will help prevent wheel lockup and maintain traction. If you have a manual hand brake, raise it gradually to avoid locking the wheels, and for cars with electronic parking brakes, engage them by pulling the lever or pressing the button. It's also important to downshift to lower gears to use engine braking if possible.
    </b>

using FAISS
using chunks only


 48%|████▊     | 29/60 [45:02<40:46, 78.93s/it]

<b>
    Query: 
Actual Question: When another vehicle is following you too closely, you should

Please answer the question based on the given context.
    Additional Query: When another vehicle is following you too closely, you should signal your intentions and allow them to pass, or maintain a safe speed and position to reduce the risk of collision. Additionally, you can stop smoothly and avoid sudden maneuvers to help prevent rear-end accidents. It's also essential to stay alert and observant of your surroundings, watching for signals, brake lights, and any signs of the driver's intentions.
    </b>

using FAISS
using chunks only


 50%|█████     | 30/60 [46:11<37:54, 75.83s/it]

<b>
    Query: 
Actual Question: The driver of the vehicle behind you should always practice the 3-second rule to prevent

Please answer the question based on the given context.
    Additional Query: tailgating or following too closely, which can lead to rear-end accidents and reduce the risk of road crashes.
    </b>

using FAISS
using chunks only


 52%|█████▏    | 31/60 [47:14<34:51, 72.11s/it]

<b>
    Query: 
Actual Question: When parking uphill without a curb, turn your wheels

Please answer the question based on the given context.
    Additional Query: so that the vehicle will roll out off the road and not into the road if the parking brake is inadvertently released.
    </b>

using FAISS
using chunks only


 53%|█████▎    | 32/60 [48:07<30:57, 66.34s/it]

<b>
    Query: 
Actual Question: When parking downhill, you must

Please answer the question based on the given context.
    Additional Query: turn the front wheels into the curb or toward the sidewalk. Always engage the parking brakes. If possible, leave the vehicle in gear position.
    </b>

using FAISS
using chunks only


 55%|█████▌    | 33/60 [49:05<28:45, 63.92s/it]

<b>
    Query: 
Actual Question: When parking uphill, you must

Please answer the question based on the given context.
    Additional Query: turn the front wheels into the curb or toward the sidewalk. Always engage the parking brakes. If possible, leave the vehicle in gear position.
    </b>

using FAISS
using chunks only


 57%|█████▋    | 34/60 [49:57<26:03, 60.13s/it]

<b>
    Query: 
Actual Question: When you make an abrupt move especially when you are on a wet and possibly slippery road, the following action can cause you to skid and lose control

Please answer the question based on the given context.
    Additional Query: When you make an abrupt move, especially when you are on a wet and possibly slippery road, the following action can cause you to skid and lose control: sudden turns or hard braking.
    </b>

using FAISS
using chunks only


 58%|█████▊    | 35/60 [50:58<25:12, 60.51s/it]

<b>
    Query: 
Actual Question: Whenever you are driving, especially when overtaking

Please answer the question based on the given context.
    Additional Query: it's essential to keep a safe distance between your vehicle and the one in front, check your surroundings, signal your intention, and execute the maneuver smoothly. Additionally, be aware of areas where overtaking is prohibited, such as roads with "No Overtaking" signs, approaching crests or curves, and railroad crossings. Always respect traffic signs and road markings, like solid yellow lines, which indicate no overtaking zones. It's also crucial to be patient and courteous when driving, especially when overtaking large vehicles, and maintain a safe following distance to allow for adequate reaction time.
    </b>

using FAISS
using chunks only


 60%|██████    | 36/60 [52:37<28:49, 72.06s/it]

<b>
    Query: 
Actual Question: When driving on a highway, do not stare at the vehicle in front of you, instead you should

Please answer the question based on the given context.
    Additional Query: look ahead and be aware of your surroundings. 

Additionally, it is mentioned to "Avoid Staring at Oncoming Headlights" which implies that staring at any single point, including the vehicle in front, for an extended period can be distracting and potentially hazardous. Instead, drivers should maintain a safe following distance and stay alert to the road conditions ahead.
    </b>

using FAISS
using chunks only


 62%|██████▏   | 37/60 [53:49<27:38, 72.12s/it]

<b>
    Query: 
Actual Question: When driving on a mountain roads during daytime, you should

Please answer the question based on the given context.
    Additional Query: The provided context does not specifically address driving on mountain roads during daytime. However, general safety tips can be inferred. It's essential to follow traffic rules, stay focused on driving, and avoid distractions. Additionally, ensuring your vehicle is well-maintained, including proper tire condition and functioning brakes, is crucial for safe driving. While the context doesn't directly answer the question about mountain roads during daytime, these general safety measures can be applicable.
    </b>

using FAISS
using chunks only


 63%|██████▎   | 38/60 [55:10<27:25, 74.80s/it]

<b>
    Query: 
Actual Question: When driving downhill on a mountain road always

Please answer the question based on the given context.
    Additional Query: reduce speed when approaching curves, turns, or areas prone to slippery surfaces.
    </b>

using FAISS
using chunks only


 65%|██████▌   | 39/60 [56:19<25:35, 73.12s/it]

<b>
    Query: 
Actual Question: The headlights should be used often as needed to

Please answer the question based on the given context.
    Additional Query: maximize their use and ensure safety while driving, especially in low-light conditions. This includes turning them on during dawn, dusk, or inclement weather, and considering using them 24/7 to reduce the risk of daytime collisions.
    </b>

using FAISS
using chunks only


 67%|██████▋   | 40/60 [57:23<23:25, 70.28s/it]

<b>
    Query: 
Actual Question: At an intersection, if two (2) vehicles arrived at the same time, which vehicle has the right of way?

Please answer the question based on the given context.
    Additional Query: The vehicle on the right has the right of way. This means that if two vehicles arrive at an intersection simultaneously, the driver on the left must yield to the driver on the right.
    </b>

using FAISS
using chunks only


 68%|██████▊   | 41/60 [58:39<22:47, 71.99s/it]

<b>
    Query: 
Actual Question: When negotiating a curve on a highway at a relatively high speed, you should

Please answer the question based on the given context.
    Additional Query: The provided context doesn't specifically address the scenario of negotiating a curve on a highway at a relatively high speed. However, it does offer general guidance on safe driving practices, such as easing off the accelerator to reduce speed, avoiding abrupt braking, and steering smoothly. 

Given this information, when approaching a curve, it would be advisable to ease off the accelerator to reduce speed before entering the curve, avoid making sudden turns or applying the brakes abruptly, and steer smoothly through the curve. This approach can help maintain vehicle stability and control. Additionally, reducing speed appropriately for the road conditions is crucial for safe navigation of curves at higher speeds.
    </b>

using FAISS
using chunks only


 70%|███████   | 42/60 [1:00:02<22:36, 75.36s/it]

<b>
    Query: 
Actual Question: When planning to overtake a slower vehicle in front of you at night, you should

Please answer the question based on the given context.
    Additional Query: Use your low beam headlights to avoid dazzling the driver of the vehicle in front and to ensure you can see the road ahead clearly. However, this specific information is not directly mentioned in the provided context. 

From the context, when planning to overtake, steps include signaling intention, checking visibility, assessing road conditions, and executing the maneuver smoothly while maintaining a safe distance. It's also crucial to be aware of areas where overtaking is prohibited, such as "No Overtaking" zones or near intersections and curves with limited visibility.

Therefore, the general advice would be to follow these steps: signal your intention, ensure there's no oncoming traffic, assess road conditions carefully, and execute the maneuver safely. Additionally, being patient, courteous, and mindful of weather and road conditions is essential for safe overtaking.
    </b>

using FAISS
using chunks only


 72%|███████▏  | 43/60 [1:01:37<22:58, 81.11s/it]

<b>
    Query: 
Actual Question: Driving in heavy rains can be extremely dangerous because visibility is limited. What should you do?

Please answer the question based on the given context.
    Additional Query: To drive safely in heavy rains, increase your stopping distances by following the two- or three-second rule and adjust it further if you're unsure about your tire condition. Step on the brake gently to avoid abrupt braking, and use your headlights to make yourself visible to other drivers. Additionally, look as far ahead as possible to anticipate large puddles that may cause aquaplaning. It's also essential to reduce your speed during wet conditions and maintain proper tire pressure and adequate tread depth to minimize the risk of losing control on wet roads.
    </b>

using FAISS
using chunks only


 73%|███████▎  | 44/60 [1:03:13<22:51, 85.75s/it]

<b>
    Query: 
Actual Question: A flashing red light means

Please answer the question based on the given context.
    Additional Query: A flashing red light means that drivers must come to a complete stop, and it should be treated as a four-way stop sign, yielding to other vehicles at the intersection before proceeding further.
    </b>

using FAISS
using chunks only


 75%|███████▌  | 45/60 [1:04:26<20:28, 81.89s/it]

<b>
    Query: 
Actual Question: Your speed while driving at night should keep on

Please answer the question based on the given context.
    Additional Query: reduced, as visibility is limited, and unexpected obstacles may appear. You should follow posted speed limits and adjust your speed according to road conditions.
    </b>

using FAISS
using chunks only


 77%|███████▋  | 46/60 [1:05:38<18:22, 78.78s/it]

<b>
    Query: 
Actual Question: One that affects your visibility?

Please answer the question based on the given context.
    Additional Query: Fog and sun glare can affect your visibility. Additionally, cracked pavement, collapsing shoulders, and construction or road work can also create hazardous conditions that may reduce visibility. Furthermore, blind spots around a vehicle can obstruct the driver's view, leading to potential dangers on the road.
    </b>

using FAISS
using chunks only


 78%|███████▊  | 47/60 [1:06:58<17:09, 79.20s/it]

<b>
    Query: 
Actual Question: When driving at night, you should

Please answer the question based on the given context.
    Additional Query: When driving at night, you should ensure proper lighting, drive slower, stay alert and focused, keep a safe following distance, use high beams wisely, and be mindful of other drivers. Additionally, you should clean your headlights regularly, maintain a safe distance from the vehicle in front, avoid distractions like mobile phones or eating while driving, and adjust your speed according to road conditions. It's also important to watch for animals, especially in rural areas, and use GPS for navigation to stay informed about upcoming turns and exits. Furthermore, you should minimize glare from oncoming vehicles, keep an emergency kit in your vehicle, and stay sober to avoid impaired judgment and coordination.
    </b>

using FAISS
using chunks only


 80%|████████  | 48/60 [1:08:16<15:44, 78.68s/it]

<b>
    Query: 
Actual Question: Being passed is normal part of driving and should not be taken as an insult to ones ability, you should

Please answer the question based on the given context.
    Additional Query: stay calm and aware, move to the side, maintain a steady speed, and check mirrors.
    </b>

using FAISS
using chunks only


 82%|████████▏ | 49/60 [1:09:21<13:42, 74.79s/it]

<b>
    Query: 
Actual Question: When oncoming vehicle deliberately crosses the centerline to pass another vehicle, you should be

Please answer the question based on the given context.
    Additional Query: Move to the side of your lane or the road to provide space for the passing vehicle and maintain a steady speed to allow it to pass safely.
    </b>

using FAISS
using chunks only


 83%|████████▎ | 50/60 [1:11:52<16:16, 97.62s/it]

<b>
    Query: 
Actual Question: When approaching a flooded area and you have to go through it, what should you do?

Please answer the question based on the given context.
    Additional Query: The provided context does not directly address the question of what to do when approaching a flooded area that must be crossed. However, it offers guidance on dealing with a submerged vehicle and preventive measures for flood-prone areas, emphasizing the importance of staying informed about weather forecasts and considering comprehensive auto insurance. If the intention is to protect your vehicle from potential flood damage while navigating through a flooded area, moving to higher ground or safer locations before heavy rains begin is advised. Additionally, having comprehensive auto insurance that covers flood damage can provide financial protection in case of an accident. Nonetheless, specific steps for safely crossing a flooded area are not outlined in the given context.
    </b>

using FAISS
using chunks only


 85%|████████▌ | 51/60 [1:14:43<17:55, 119.47s/it]

<b>
    Query: 
Actual Question: A single solid yellow or white line means

Please answer the question based on the given context.
    Additional Query: A single solid yellow line permits passing with caution when safe and legal.
    </b>

using FAISS
using chunks only


 87%|████████▋ | 52/60 [1:16:58<16:34, 124.37s/it]

<b>
    Query: 
Actual Question: Passing/overtaking is allowed

Please answer the question based on the given context.
    Additional Query: Passing or overtaking is generally allowed in certain situations. For instance, if you're on a road with two or more lanes going in the same direction, you can overtake another vehicle, even on a hill or a curve, as long as it's safe to do so. Additionally, in business or residential districts with multiple lanes, passing on the right side is allowed. It's also permissible to pass when the line on the road is broken, but vehicles proceeding straight have the right of way. However, there are specific areas where overtaking is not allowed, such as near railway crossings, intersections without traffic signals, and roads marked with "No Overtaking" signs or solid yellow lines.
    </b>

using FAISS
using chunks only


 88%|████████▊ | 53/60 [1:19:32<15:32, 133.19s/it]

<b>
    Query: 
Actual Question: A driver on a highway shall yield the right of way to

Please answer the question based on the given context.
    Additional Query: A driver on a highway shall yield the right of way to pedestrians, especially when they are crossing within a crosswalk, as well as to vehicles already on the highway when merging. Additionally, drivers must also yield to emergency vehicles with sirens or lights and to traffic on the public road when pulling out of a driveway.
    </b>

using FAISS
using chunks only


 90%|█████████ | 54/60 [1:21:37<13:04, 130.80s/it]

<b>
    Query: 
Actual Question: A driver on a highway shall yield the right of way to

Please answer the question based on the given context.
    Additional Query: A driver on a highway shall yield the right of way to pedestrians, especially when they are crossing within a crosswalk.
    </b>

using FAISS
using chunks only


 92%|█████████▏| 55/60 [1:23:28<10:24, 124.83s/it]

<b>
    Query: 
Actual Question: This traffic signs warns you that school children may be present in the vicinity

Please answer the question based on the given context.
    Additional Query: School zone signage.
    </b>

using FAISS
using chunks only


 93%|█████████▎| 56/60 [1:25:34<08:20, 125.13s/it]

<b>
    Query: 
Actual Question: The signal warns of a hazard ahead

Please answer the question based on the given context.
    Additional Query: A blinking yellow traffic light serves as a warning to drivers to slow down and proceed with caution, indicating potential hazards or the need for increased vigilance, especially at intersections where traffic conditions are light.
    </b>

using FAISS
using chunks only


 95%|█████████▌| 57/60 [1:27:43<06:19, 126.40s/it]

<b>
    Query: 
Actual Question: This traffic sign states direction and distances

Please answer the question based on the given context.
    Additional Query: Advance Direction Signs, Intersection Direction Signs, Reassurance Direction Signs, and Fingerboard & Direction Signs all provide direction and some also indicate distance. Specifically, Reassurance Direction Signs are mentioned to provide approximate distance to a particular exit, while Advance Direction Signs and Intersection Direction Signs guide the motorist towards certain directions, which may include information on distances.
    </b>

using FAISS
using chunks only


 97%|█████████▋| 58/60 [1:29:50<04:13, 126.55s/it]

<b>
    Query: 
Actual Question: The mark for a railway crossing

Please answer the question based on the given context.
    Additional Query: The mark for a railway crossing is a sign that indicates an upcoming railroad crossing, often accompanied by additional signs such as "Railroad Crossing Ahead" or warning signals. These signs are positioned in areas where they can be easily seen and may include instructions such as stopping or giving the right of way.
    </b>

using FAISS
using chunks only


 98%|█████████▊| 59/60 [1:32:16<02:12, 132.33s/it]

<b>
    Query: 
Actual Question: Children must be held in approved child restraint if there are

Please answer the question based on the given context.
    Additional Query: twelve years or younger, unless they are at least 4’11” (150 cm) in height.
    </b>

using FAISS
using chunks only


100%|██████████| 60/60 [1:34:18<00:00, 94.32s/it] 
  df.loc[qr_range[0]:qr_range[1]-1, "AI"] = [answ[0] for answ in ai_answer]


ValueError: Must have equal len keys and value when setting with an ndarray

In [None]:
df.loc[qr_range[0]:qr_range[1]-1, "AI"] = [answ[0] for answ in ai_answer]
df.loc[qr_range[0]:qr_range[1]-1, "Context"] = [" ".join(map(str, answ[2])) for answ in ai_answer]
df.loc[qr_range[0]:qr_range[1]-1, "Inference_Time"] = inference_times 
df_new = df.loc[qr_range[0]:qr_range[1]-1].copy()
df = df_new.copy()

# 10. Similarity Evaluation

In [None]:
from llama_index.core.evaluation import SemanticSimilarityEvaluator
from llama_index.core.base.embeddings.base import BaseEmbedding
import asyncio
from llama_index.core.embeddings import resolve_embed_model
from pydantic import PrivateAttr

class OllamaEmbeddingModel(BaseEmbedding):
    _client: Client = PrivateAttr()

    def __init__(self, model_name: str = "mxbai-embed-large", timeout: int = 300):
        super().__init__()
        self.model_name = model_name
        self._client = Client() 

    async def _aget_query_embedding(self, query: str) -> list[float]:
        return await self._aget_text_embedding(query)

    async def _aget_text_embedding(self, text: str) -> list[float]:
        loop = asyncio.get_event_loop()
        embedding_response = await loop.run_in_executor(
            None, self._client.embeddings, self.model_name, text
        )
        return embedding_response['embedding']  

    def _get_query_embedding(self, query: str) -> list[float]:
        return self._get_text_embedding(query)

    def _get_text_embedding(self, text: str) -> list[float]:
        embedding_response = self._client.embeddings(
            model=self.model_name,
            prompt=text
        )
        return embedding_response['embedding']


embed_model = OllamaEmbeddingModel(model_name="mxbai-embed-large")
evaluator = SemanticSimilarityEvaluator(
    embed_model=embed_model,
    similarity_threshold=0.6
)

results_scores = []
results_passing = []
for i in tqdm(range(len(df))):
    response = df.loc[i, "AI"]
    reference = df.loc[i, "Answer"]

    result = await evaluator.aevaluate(
    response=response,
    reference=reference,
    )
    results_scores.append(result.score)
    results_passing.append(result.passing)
    
df['Score'] = results_scores
df['Passing'] = results_passing

average_score = df['Score'].mean()
total_items = len(df)
passing_items = df['Passing'].sum()  
print(f"Average Score: {average_score:.4f}")
print(f"Passing: {passing_items}/{total_items}")
display(df[['Question', 'Answer', 'AI', 'Score', 'Passing', 'Inference_Time']])

100%|██████████| 60/60 [00:06<00:00,  9.29it/s]

Average Score: 0.6590
Passing: 48/60





Unnamed: 0,Question,Answer,AI,Score,Passing,Inference_Time
0,Traffic Jam can be prevented if you,Keep opposing lanes open,"To prevent traffic jams, it's essential to pra...",0.696911,True,167.875098
1,When making a right turn you should,Stay on the outermost lane of the road then si...,"When making a right turn, you should drive in ...",0.779041,True,155.435959
2,"When you intend to turn right or left, signal ...",25 meters before you intend to make your turn,"30 meters before turning. \n\nHowever, since t...",0.794384,True,107.802828
3,"At an intersection with a traffic light, make ...",The green light is on and there is a left turn...,"At an intersection with a traffic light, make ...",0.776484,True,112.805041
4,Graft and corruption in the traffic enforcemen...,Self disciplined by drivers and obeying traffi...,To eliminate graft and corruption in the traff...,0.607796,True,171.50372
5,"On a four(4) lane road with single white line,...",Overtake by passing over the solid white line,"On a four-lane road with a single white line, ...",0.827382,True,120.294474
6,A double solid yellow line with broken white l...,Absolutely no overtaking,A double solid yellow line with a broken white...,0.566641,False,123.971421
7,"When making a U-Turn, you should",Check for traffic behind you and indicate your...,The provided text does not specifically discus...,0.6631,True,139.088943
8,Signs that are triangular in shape and with a ...,Caution or warning signs,"Warning signs, which are typically triangular ...",0.731701,True,71.944613
9,"Signs that are round, inverted triangle or oct...",Regulatory signs,Warning signs are typically characterized by a...,0.747711,True,82.784009


# 11. Relevancy Evaluation

In [None]:
from llama_index.core.evaluation import RelevancyEvaluator

ollama_llm = Ollama(model="llama3.3", request_timeout=300)
evaluator = RelevancyEvaluator(llm=ollama_llm)

eval_results = []

for i in tqdm(range(len(df))):
    eval_result = await evaluator.aevaluate(
        query=df.loc[i, "Question"],
        response=df.loc[i, "AI"],
        contexts=[df.loc[i, "Context"]]  
    )
    eval_results.append(eval_result.passing)

df['Eval'] = eval_results

total_items = len(df)
passing_items = df['Eval'].sum()
score = f"Score: {passing_items}/{total_items}"
percentage = passing_items / total_items if total_items > 0 else 0
print(score)
print(f"Percentage: {percentage:.2%}")
display(df[['Question', 'Answer', 'AI', 'Eval', 'Inference_Time']])

100%|██████████| 60/60 [1:50:05<00:00, 110.10s/it]


Score: 46/60
Percentage: 76.67%


Unnamed: 0,Question,Answer,AI,Eval,Inference_Time
0,Traffic Jam can be prevented if you,Keep opposing lanes open,"To prevent traffic jams, it's essential to pra...",True,167.875098
1,When making a right turn you should,Stay on the outermost lane of the road then si...,"When making a right turn, you should drive in ...",False,155.435959
2,"When you intend to turn right or left, signal ...",25 meters before you intend to make your turn,"30 meters before turning. \n\nHowever, since t...",True,107.802828
3,"At an intersection with a traffic light, make ...",The green light is on and there is a left turn...,"At an intersection with a traffic light, make ...",True,112.805041
4,Graft and corruption in the traffic enforcemen...,Self disciplined by drivers and obeying traffi...,To eliminate graft and corruption in the traff...,True,171.50372
5,"On a four(4) lane road with single white line,...",Overtake by passing over the solid white line,"On a four-lane road with a single white line, ...",False,120.294474
6,A double solid yellow line with broken white l...,Absolutely no overtaking,A double solid yellow line with a broken white...,False,123.971421
7,"When making a U-Turn, you should",Check for traffic behind you and indicate your...,The provided text does not specifically discus...,True,139.088943
8,Signs that are triangular in shape and with a ...,Caution or warning signs,"Warning signs, which are typically triangular ...",True,71.944613
9,"Signs that are round, inverted triangle or oct...",Regulatory signs,Warning signs are typically characterized by a...,True,82.784009
