# LTO RAG Table of Contents

1. Import Libraries
2. Connect to Ollama Server
3. Ingestion
4. QA Generation
5. Embedding and Retrieval<br>
 **5.A** Dense via FAISS  
 **5.B** FAISS Retrieval Evaluator  
 **5.C** FAISS Retrieval Evaluation  
 **5.D** Sparse Embedding via BM25  
 **5.E** Hybrid Embedding via Reciprocal Rank Fusion  
 **5.F** Hybrid Retrieval Evaluator  
 **5.G** Hybrid Retrieval Evaluation  
6. Post Retrieval<br>
 **6.A** Summarization  
 **6.B** Evaluation Generation  
7. Querying<br>
 **7.A** Query Transforms  
8. Query Generation
9. TDC Exam Evaluation
10. Similarity Evaluation
11. Relevancy Evaluation


# 1. Import Libraries

In [1]:
import os
import fitz
import re

from ollama import Client
import faiss
import pandas as pd
import numpy as np
import Stemmer
from tqdm import tqdm
import gradio as gr
import json

from llama_index.core import Document
from llama_index.core.node_parser import TokenTextSplitter
from llama_index.core.retrievers import BaseRetriever, QueryFusionRetriever
from llama_index.core.schema import TextNode, NodeWithScore
from llama_index.retrievers.bm25 import BM25Retriever
from llama_index.llms.ollama import Ollama


# For QA generation
import uuid
import warnings
from typing import Dict, List, Optional, Tuple

from llama_index.core.bridge.pydantic import BaseModel
from llama_index.core.llms.utils import LLM
from llama_index.core.schema import MetadataMode, TextNode
from llama_index.core.settings import Settings


# 2. Connect to Ollama Server

In [2]:
client = Client(
  host='http://localhost:11434',
)

# 3. Ingestion

In [3]:
# Path to the dataset folder
DATASET_PATH = r'/home/jeryl4913/lto_rag_reviewer/notebooks/extracted_text.json'

def get_text_and_metadata(input_path):
    """Load text and metadata from a file and perform chunking."""
    with open(input_path, "r", encoding="utf-8") as f:
        extracted_data = json.load(f)

    texts = []
    metadata = []

    for entry in tqdm(extracted_data, desc="Processing entries"):
        text = entry["text"]
        source_metadata = {
            "source": entry["source"],
            "folder": entry["folder"],
            "file_name": entry["file_name"],
            "page": entry["page"],
            "title": entry["title"],
            "url": entry["url"]
        }
        texts.append(text)
        metadata.append(source_metadata)

    return texts, metadata


In [4]:
docs, metadatas = get_text_and_metadata(DATASET_PATH)

Processing entries: 100%|██████████| 7815/7815 [00:00<00:00, 993106.88it/s]




In [5]:
documents = [Document(text=docs[t], metadata=metadatas[t]) for t in range(len(docs))]
splitter = TokenTextSplitter(
    chunk_size=512,
    chunk_overlap=20,
    separator=" ",
)
nodes = splitter.get_nodes_from_documents(documents)

In [6]:
print(nodes[0])

Node ID: 56b06131-42f0-4483-94b9-1cc0d46d62e9
Text: Land Transportation Office (LTO) The concept of land
transportation system in the Philippines started when our ancestors
invented the means of locomotion with the animals in moving people and
goods from place to place. Although the means of land transportation
during the early days were not as sophisticated as the modern vehicles
of today and th...


# 4. QA Generation
Used for Retreival Evaluation:
1. Get all document nodes
2. Generate question for each node (using llama 3.2)
3. Question Answer pairs: Generated Question, Node text

In [7]:
"""Common utils for embeddings."""

import json
import re
import uuid
import warnings
from typing import Dict, List, Optional, Tuple

from llama_index.core.bridge.pydantic import BaseModel
from llama_index.core.llms.utils import LLM
from llama_index.core.schema import MetadataMode, TextNode
from llama_index.core.settings import Settings
from tqdm import tqdm


class EmbeddingQAFinetuneDataset(BaseModel):
    """Embedding QA Finetuning Dataset.

    Args:
        queries (Dict[str, str]): Dict id -> query.
        corpus (Dict[str, str]): Dict id -> string.
        relevant_docs (Dict[str, List[str]]): Dict query id -> list of doc ids.

    """

    queries: Dict[str, str]  # dict id -> query
    corpus: Dict[str, str]  # dict id -> string
    relevant_docs: Dict[str, List[str]]  # query id -> list of doc ids
    mode: str = "text"

    @property
    def query_docid_pairs(self) -> List[Tuple[str, List[str]]]:
        """Get query, relevant doc ids."""
        return [
            (query, self.relevant_docs[query_id])
            for query_id, query in self.queries.items()
        ]

    def save_json(self, path: str) -> None:
        """Save json."""
        with open(path, "w") as f:
            json.dump(self.model_dump(), f, indent=4)

    @classmethod
    def from_json(cls, path: str) -> "EmbeddingQAFinetuneDataset":
        """Load json."""
        with open(path) as f:
            data = json.load(f)
        return cls(**data)


DEFAULT_QA_GENERATE_PROMPT_TMPL = """
Context information is below.

---------------------
{context_str}
---------------------

Based on the above context, generate {num_questions_per_chunk} diverse and specific questions that focus on key details and concepts. \
Do not include prefatory phrases like 'Here are some questions'. Directly list the questions in numerical order.
"""


# generate queries as a convenience function
def generate_qa_embedding_pairs(
    nodes: List[TextNode],
    llm: Optional[LLM] = None,
    qa_generate_prompt_tmpl: str = DEFAULT_QA_GENERATE_PROMPT_TMPL,
    num_questions_per_chunk: int = 2,
) -> EmbeddingQAFinetuneDataset:
    """Generate examples given a set of nodes."""
    llm = llm or Settings.llm
    node_dict = {
        node.node_id: node.get_content(metadata_mode=MetadataMode.NONE)
        for node in nodes
    }

    queries = {}
    relevant_docs = {}
    for node_id, text in tqdm(node_dict.items()):
        query = qa_generate_prompt_tmpl.format(
            context_str=text, num_questions_per_chunk=num_questions_per_chunk
        )
        response = llm.complete(query)

        result = str(response).strip().split("\n")
        questions = [
            re.sub(r"^\d+[\).\s]", "", question).strip() for question in result
        ]
        questions = [
            question
            for question in questions
            if len(question) > 0 and question.endswith("?") and "question" not in question.lower()
        ][:num_questions_per_chunk]

        num_questions_generated = len(questions)
        if num_questions_generated < num_questions_per_chunk:
            warnings.warn(
                f"Fewer questions generated ({num_questions_generated}) "
                f"than requested ({num_questions_per_chunk}) for node {node_id}."
            )

        for question in questions:
            question_id = str(uuid.uuid4())
            queries[question_id] = question
            relevant_docs[question_id] = [node_id]

    # construct dataset
    return EmbeddingQAFinetuneDataset(
        queries=queries, corpus=node_dict, relevant_docs=relevant_docs
    )


In [8]:
import os

access_token = os.popen("echo $OPENAI_API_KEY").read().strip()


if access_token is None:
    raise ValueError("OpenAI API key not found in environment.")

#print(access_token)


In [9]:
from llama_index.core.evaluation import EmbeddingQAFinetuneDataset
from llama_index.llms.ollama import Ollama
from llama_index.llms.openai import OpenAI

# Instantiate LLM
ollama_llm = Ollama(model="llama3.3", request_timeout=300)

# llm = OpenAI(model="gpt-4", access_token=access_token)


# Example usage of the generate_qa_embedding_pairs
qa_dataset = generate_qa_embedding_pairs(
    nodes[:60], llm=ollama_llm, num_questions_per_chunk=1
)

queries = qa_dataset.queries.values()
print(list(queries)[2])

# Save the dataset
qa_dataset.save_json("pg_eval_dataset_FinalBase.json")
print("Successfully created QA dataset")


100%|██████████| 60/60 [16:55<00:00, 16.93s/it]

What specific event or action led to the reorganization of the Land Transportation Commission in 1979, resulting in its renaming to the Bureau of Land Transportation?
Successfully created QA dataset





In [10]:
# Load
qa_dataset = EmbeddingQAFinetuneDataset.from_json("pg_eval_dataset_FinalBase.json")
print("Successfully loaded QA dataset")

Successfully loaded QA dataset


# 5. Embedding and Retrieval

## 5.A. Dense via FAISS

In [11]:
def generate_embeddings(nodes, client, model):
    # Generate embeddings for documents using Ollama
    for doc in tqdm(nodes):
        response = client.embeddings(prompt=doc.text, model=model)
        doc.embedding = response["embedding"]
    return nodes

In [12]:
class FaissIndexer:
    """
    Faiss-based indexer for efficient similarity search using inner-product (cosine) similarity.

    This class handles the creation and management of a FAISS index from node embeddings.
    
    :ivar faiss_index: The FAISS index for storing and querying embeddings.
    :vartype faiss_index: faiss.IndexFlatIP
    :ivar embedding_dim: Dimensionality of the embeddings.
    :vartype embedding_dim: int
    """

    def __init__(self):
        """
        Initialize the FaissIndexer class.

        :ivar faiss_index: The FAISS index, initialized as None.
        :ivar embedding_dim: The dimension of embeddings, initialized as None.
        """
        self.faiss_index = None
        self.embedding_dim = None

    def normalize_embeddings(self, embeddings):
        """
        Normalize embeddings to have unit L2 norm.

        :param embeddings: Array of embeddings to normalize.
        :type embeddings: np.ndarray
        :return: Normalized embeddings.
        :rtype: np.ndarray
        """
        return embeddings / np.linalg.norm(embeddings, axis=1, keepdims=True)

    def build_index(self, nodes):
        """
        Build the FAISS index from a list of nodes containing embeddings.

        :param nodes: List of nodes, where each node contains an `embedding` attribute.
        :type nodes: list
        :raises ValueError: If the nodes list is empty or embeddings are inconsistent.
        """
        if not nodes:
            raise ValueError("Nodes list cannot be empty.")
        
        embeddings = np.array([np.array(node.embedding) for node in nodes])
        normalized_embeddings = self.normalize_embeddings(embeddings)

        self.embedding_dim = normalized_embeddings[0].shape[0]
        self.faiss_index = faiss.IndexFlatIP(self.embedding_dim)  # Inner-product similarity
        self.faiss_index.add(normalized_embeddings)

    def get_index(self):
        """
        Get the FAISS index instance.

        :return: The FAISS index used for similarity search.
        :rtype: faiss.IndexFlatIP
        :raises ValueError: If the index has not been built.
        """
        if self.faiss_index is None:
            raise ValueError("Index has not been built yet. Call 'build_index' first.")
        return self.faiss_index

In [13]:
class FAISSVectorStoreRetriever(BaseRetriever):
    def __init__(self, faiss_index, documents):
        """
        Initialize the FAISS retriever.
        :param faiss_index: The FAISS index containing precomputed embeddings.
        :param documents: List of document chunks.
        :param embeddings: Precomputed embeddings corresponding to the document chunks.
        """
        self.faiss_index = faiss_index
        self.documents = documents

    def _retrieve(self, query_embedding, top_k=5):
        """
        Retrieve the top-k nearest neighbors using the FAISS index.
        :param query_embedding: The embedding of the query.
        :param top_k: Number of top results to retrieve.
        """

        norm_query_embedding = np.array([query_embedding])
        norm_query_embedding /= np.linalg.norm(norm_query_embedding, axis=1, keepdims=True)

        distances, indices = self.faiss_index.search(norm_query_embedding, top_k)
        retrieved_docs = [
            NodeWithScore(node=self.documents[idx], score=1 - dist)
            for idx, dist in zip(indices[0], distances[0])
            if idx != -1
        ]
        return retrieved_docs

## 5.B. FAISS Retreival Evaluator

In [14]:
from typing import List, Tuple, Any
from pydantic import Field, ConfigDict
from llama_index.core.evaluation.retrieval.base import (
    BaseRetrievalEvaluator,
    RetrievalEvalMode,
    RetrievalEvalResult
)
from llama_index.core.evaluation.retrieval.metrics import resolve_metrics
from llama_index.core.response.notebook_utils import display_source_node

class FAISSRetrievalEvaluator(BaseRetrievalEvaluator):
    retriever: "FAISSVectorStoreRetriever" = Field(..., description="FAISS Retriever instance")
    Print_Results: bool = Field(default=False, description="Whether to print retrieved results")

    model_config = ConfigDict(extra="forbid")  

    @classmethod
    def from_metric_names(
        cls,
        metric_names: List[str],
        retriever: "FAISSVectorStoreRetriever",
        Print_Results: bool = False,
        **kwargs: Any,
    ) -> "FAISSRetrievalEvaluator":
        metric_types = resolve_metrics(metric_names)
        metrics = [metric() for metric in metric_types]
        return cls(metrics=metrics, retriever=retriever, Print_Results=Print_Results, **kwargs)

    async def _aget_retrieved_ids_and_texts(
        self,
        query: str,
        mode: RetrievalEvalMode = RetrievalEvalMode.TEXT,
    ) -> Tuple[List[str], List[str]]:
        response = client.embeddings(prompt=query, model="mxbai-embed-large")
        query_embedding = response["embedding"]
        retrieved_docs = self.retriever._retrieve(query_embedding, top_k=15)
        
        # Conditionally print results
        if self.Print_Results:
            for doc in retrieved_docs:
                display_source_node(doc, source_length=1000)  # Directly use doc
        
        retrieved_ids = [doc.id_ for doc in retrieved_docs]  # doc, not doc.node
        retrieved_texts = [doc.text for doc in retrieved_docs]
        return retrieved_ids, retrieved_texts

    async def aevaluate(
        self,
        query: str,
        expected_ids: List[str],
        expected_texts: List[str] = [],
        mode: RetrievalEvalMode = RetrievalEvalMode.TEXT,
        **kwargs: Any,
    ) -> RetrievalEvalResult:
        retrieved_ids, retrieved_texts = await self._aget_retrieved_ids_and_texts(query, mode)
        metric_dict = {}
        
        for metric in self.metrics:
            # Call compute instead of evaluate
            result = metric.compute(
                query=query,
                expected_ids=expected_ids,
                retrieved_ids=retrieved_ids,
                expected_texts=expected_texts,
                retrieved_texts=retrieved_texts,
                **kwargs
            )
            metric_dict[metric.metric_name] = result  # Store the whole RetrievalMetricResult object
            print(f"{metric.metric_name}: {result.score}")  # Print each metric result
         
        # Return RetrievalEvalResult with all required fields
        return RetrievalEvalResult(
            query=query,
            retrieved_ids=retrieved_ids,
            retrieved_texts=retrieved_texts,  # Include retrieved texts
            expected_ids=expected_ids,         # Pass expected ids
            expected_texts=expected_texts,      # Include expected texts
            metric_dict=metric_dict            # Pass full RetrievalMetricResult objects
        )


In [15]:
nodes_embed = generate_embeddings(nodes,  client, "mxbai-embed-large")

100%|██████████| 12302/12302 [10:23<00:00, 19.73it/s]


## 5.C. FAISS Retreival Evaluation

In [16]:
indexer = FaissIndexer()

indexer.build_index(nodes)  
faiss_index = indexer.get_index()

retriever = FAISSVectorStoreRetriever(faiss_index=indexer.get_index(), documents=nodes_embed)

metrics = ["hit_rate", "mrr", "precision", "recall", "ap", "ndcg"]
evaluator = FAISSRetrievalEvaluator.from_metric_names(
    metric_names=metrics,
    retriever=retriever,
    Print_Results=True
)


sample_id, sample_query = list(qa_dataset.queries.items())[1]
sample_expected = qa_dataset.relevant_docs[sample_id]

print("Sample Problem:")
print(f"sample id: {sample_id}, text: {sample_query}")
print(f"sample exp: {sample_expected}")

print("\nRetrieval results:")

result = await evaluator.aevaluate(
    query=sample_query,
    expected_ids=sample_expected,
)

print("\n")
print(result)

Sample Problem:
sample id: 6a9ec60c-a926-4d8a-aa4e-53230252f108, text: What significant event occurred on June 20, 1964, that led to the abolition of the Motor Vehicle Office and the creation of a new regulatory body for land transportation in the Philippines?
sample exp: ['f26592d6-6c5d-4db0-ab65-48c70dfe7fd0']

Retrieval results:


**Node ID:** f26592d6-6c5d-4db0-ab65-48c70dfe7fd0<br>**Similarity:** 0.20510762929916382<br>**Text:** Motor Vehicles. The Chief of the Division was called the Superintendent of Division of Motor Vehicles. Act No. 3992 was amended by Commonwealth Act Numbers 123, 548, 556, 652 and Republic Act Numbers 314, 587, and 2383. On June 2, 1945, Department Order No. 4 was issued by the Department of Public Works and Highways reorganizing the Division. This took effect after the liberation of the Philippines from the Japanese invasion. In 1947, Executive Order No. 94 was promulgated reorganizing the different executive departments, bureaus and offices. Under Section 82 of this E.O., the Division of Motor Vehicles was upgraded into the Motor Vehicles Office (MVO) with the category of a Bureau. The Chief of the MVO enjoyed the rights and privileges of a Bureau Director. During the fifties and early sixties, our country started undergoing rapid economic development. Industrialization advanced and as a consequence, more and better roads were constructed. The Filipino then realized the need for mo...<br>

**Node ID:** 13e2e16a-b2eb-4bc6-905a-3c6ea56d473c<br>**Similarity:** 0.2372511625289917<br>**Text:** On June 2, 1980, Batas Pambansa Bilang 43 was passed providing for the issuance of permanent number plates to owners of motor vehicles and trailers, amending for the purpose section 17 of RA 4136. On March 20, 1985, Executive Order 1011 was promulgated. This Executive Order abolished the Board of Transportation and the Bureau of Land Transportation and established the Land Transportation Commission. The defunct BLT and BOT were merged and their powers, functions and responsibilities were transferred to the Land Transportation Commission (LTC) headed by a Chairman, assisted by four Commissioners. The LTC was tasked to perform functions such as registering motor vehicles, licensing of drivers and conductors, franchising of public utility vehicles and enforcing traffic rules and regulations and adjudicating apprehensions. On January 30, 1987, the Land Transportation Commission was abolished and two offices were created, namely: The Land Transportation Office (LTO) and the Land Transpor...<br>

**Node ID:** 56b06131-42f0-4483-94b9-1cc0d46d62e9<br>**Similarity:** 0.23808741569519043<br>**Text:** Land Transportation Office (LTO) The concept of land transportation system in the Philippines started when our ancestors invented the means of locomotion with the animals in moving people and goods from place to place. Although the means of land transportation during the early days were not as sophisticated as the modern vehicles of today and the roads not as well constructed, the early Filipinos also observed some laws to govern their mobility. These laws were as informal and simple as specifying which animal could be used for certain purposes but these showed that our ancestors had already felt the need to regulate the transportation system. As early as 1910, there were already few motor vehicles seen operating in public highways in Manila and suburbs. Better means of transportation were invented and introduced in the country. Gradually, the Filipinos learned to use cars, trucks, jeeps and other types of vehicles. The means of transportation became better and powerful and the laws...<br>

**Node ID:** 32ad32da-b38e-451f-a267-05f8e69c4bb2<br>**Similarity:** 0.2450343370437622<br>**Text:** so in 1964, Republic Act No. 4136 or the Land Transportation and Traffic Code came to life to compile all the laws relative to transportation and traffic rules, and to create a land transportation commission.Legacy and Ongoing RelevanceAs we navigate in the present, it?s essential to recognize the enduring impact of Republic Act No. 3045. The legislation, born in a time of emerging automotive culture, set the stage for the<br>

**Node ID:** bee21284-e441-4e1d-9f4f-d234146c47d8<br>**Similarity:** 0.2503790855407715<br>**Text:** evolution of motor vehicle governance in the Philippines. Its legacy endures in the ongoing efforts to adapt regulations to the changing dynamics of modern transportation.Republic Act No. 3045 in the Philippines was replaced by subsequent legislation due to the evolving needs and complexities of motor vehicle governance. Laws related to traffic, vehicle registration, licensing, and safety measures must adapt to changes in technology, transportation infrastructure, and societal demands. As the automotive landscape evolved, the government deemed it necessary to enact new laws that could address emerging issues, incorporate advancements, and enhance regulatory frameworks.In 1933, Act No. 3992, known as the Revised Motor Vehicle Law, was enacted, amending and updating the provisions of Republic Act No. 3045. This replacement reflected the government?s commitment to staying abreast of developments in the automotive sector and ensuring that regulations remained relevant and effective enou...<br>

**Node ID:** f65efe8d-409f-4cba-b3f7-5a97a69a613a<br>**Similarity:** 0.2647451162338257<br>**Text:** information about the regulations and guidelines established by the legislation which includes control over the registration and operation of motor vehicles; the licensing of owners, dealers, and chauffeurs; the carrying of lights on all vehicles; and all similar matters.Amendments and EvolutionRepublic Act No. 3992 didn?t just get enacted. For over 30 years, the law existed as aguideline for transport regulation.Over the years though, several amendments were introduced to refine and adapt the legislation to the changing needs of society. Some amendments include Commonwealth Act Numbers 123, 548, 556, and 652, along with Republic Act Numbers 314, 587, and 2383, which played crucial roles in shaping the law. These amendments addressed emerging challenges, technological advancements, and the growing complexities of the motor vehicle landscape before it was repealed by Republic Act 4136, otherwise known as the Land Transportation and Traffic Code in June 1964.Commonwealth Act Numbers 1...<br>

**Node ID:** 25291fac-e900-41ab-bbb5-c0a3d0131d83<br>**Similarity:** 0.26883041858673096<br>**Text:** the Philippines. While both played a crucial role in the regulation of motor vehicles and traffic in different eras, there are some slight differences between the new and the old law. Let?s compare the key features of these two acts:Key Information about Republic Act No. 3992Enactment Date: January 1, 1933.Amendment of: Republic Act No. 3045Renaming of Division: The Automobile Division was renamed the Division of Motor Vehicles.Leadership Title: The head of the Division of Motor Vehicles was referred to as the Superintendent.Amendments: Republic Act No. 3992 underwent amendments through Commonwealth Act Numbers 123, 548, 556, 652, and Republic Act Numbers 314, 587, and 2383.Key Information about Republic Act No. 4136Enactment Date: June 20, 1964.Scope: Known as the ?Land Transportation and Traffic Code,? RA 4136 is a comprehensive law covering various aspects of land transportation, traffic rules, and vehicle registration.Creation of Agencies: This act led to the establishment of ke...<br>

**Node ID:** fd1c5565-08d2-4067-9ba8-13c292d99053<br>**Similarity:** 0.274780809879303<br>**Text:** Motor Vehicles reflected a recognition of the need for more nuanced governance.Foundation for Ongoing RegulationsRA 3045 laid the foundation for subsequent regulations and adjustments in response to the evolving dynamics of motor vehicle use. Its principles continue to influence and guide contemporary efforts to ensure road safety, efficient traffic management, and responsible vehicle operation.Enduring LegacyThe enduring legacy of RA 3045 is reflected in the enduring commitment to structured and safe road governance. Its principles resonate in the ongoing efforts to address new challenges brought about by technological advancements and changes in transportation patterns.More than these benefits, the Republic Act No. 3045 has played a pivotal role in shaping the regulatory landscape of motor vehicles in the Philippines. Its influence is evident in the organized traffic systems, enhanced safety measures, and the establishment of foundational structures that persist in today?s road go...<br>

**Node ID:** e69fb08b-63ac-486c-90fd-04d733f87d69<br>**Similarity:** 0.2754054069519043<br>**Text:** as a significant milestone that laid the groundwork for the current transport laws in the Philippines. Enacted in 1926, RA 3045 served as a comprehensive compilation and amendment of existing laws governing motor vehicle traffic, registration, licensing, and safety measures of old.It was the backdrop of a burgeoning automobile culture in 1926, right when the Philippine government first recognized the need for a consolidated legal framework to regulate motor vehicle activities that then existing RA 2159 (?Motor Vehicle Registration Act of<br>

**Node ID:** cc874dd7-ee48-4ddc-b2d9-a7ac9ce3b521<br>**Similarity:** 0.27756690979003906<br>**Text:** RA 3992: The Revised Motor Vehicle LawRepublic Act No. 3992, also known as ?The Revised Motor Vehicle Law,? holds a significant place in the history of the Philippines?Land Transportation Office (LTO)and its regulations. Enacted on January 1, 1933, this legislation marked a crucial amendment to Republic Act No. 3045, reshaping the governance of motor vehicles within the archipelago.In here, we will explore the key provisions, changes, and subsequent amendments that have shaped the landscape of Philippine traffic laws over the years.Table of ContentsToggleThe Birth of Republic Act No. 3992Key Provisions of RA 3992Division of Motor VehiclesLicensing and RegistrationTraffic Rules and RegulationsPenalties and EnforcementAmendments and RevisionsRelationship to Previous LegislationAmendments and EvolutionCommonwealth Act Numbers 123, 548, 556, and 652Republic Act Numbers 314, 587, and 2383Republic Act 4136Republic Act 3992 vs Republic Act 4136SummaryThe Birth of Republic Act No. 3992At th...<br>

**Node ID:** c019de1a-ceb6-49ef-8f2d-fc7977e6d354<br>**Similarity:** 0.2951927185058594<br>**Text:** as far as they apply, the registration and operation of motor vehicles and the licensing of owners, dealers,conductors, drivers and similar matters.? To effectively carry out its mandate, the Land Transportation Commission later became the Land Transportation Office (LTO) and established numerousLTO officesin various parts of the country.Republic Act 3992 vs Republic Act 4136Republic Act No. 3992 and Republic Act No. 4136 are both significant pieces of legislation in<br>

**Node ID:** f77d24d2-bd2a-41ab-ae5e-bbc5525265d8<br>**Similarity:** 0.29670000076293945<br>**Text:** A\\ ~ f. . -  -  --- . - --- . ....,  - . q:~qf(f{I ,1 Republic of the Philippines Depaitment of Transportation ,  . ' LAND TRANSPORTATION OFFf<t,E --'-.. ~ ~ '] ~ .. The Implementing Rules and Regulations of Republic Act No. 11698 otherwise known as the ""Vintage Vehicle Regulation Act" WHEREAS, Article XIV, Section 14 of the 1987 Constitution declares that the State shall foster the preservation, enrichment, and dynamic evolution of a Filipino culture based on the principle of unity in diversity in a climate of free artistic and intellectual expression; WHEREAS, Section 16 of the same Article of the Constitution also affirms that all the country's artistic and historic wealth constitutes the cultural treasure of the nation and shall be under the protection of the State which may regulate its dispo sition ~ WHEREAS, Republic Act No. 11698 otherwise known as the "Vintage Vehicle Regulation Act" (Act) lapsed into law on 15 April 2022 to protect and promote the nation 's heritage by e...<br>

**Node ID:** 2ded8646-b857-45b8-8084-f0dd5613e7df<br>**Similarity:** 0.2969343066215515<br>**Text:** REPUBLIC OF THE PHILIPPINES. DEPARTMENT OF TRANSPORTATION LAND TRANSPORTATION OFFICE mat Addo Honan gaat  Webs: wn oianush MEMORANDUM To All Regional Directors All Assistant Regional Directors All Regional Operations Chiefs All DistricvExtension Office Chiefs  All Concerned DATE 12 April 2023 SUBJECT PILOT IMPLEMENTATION OF VINTAGE VEHICLE REGISTRATION |in compliance to the Implementing Rules and Regulations (IRR) of Republic Act No. 11698 or the Vintage Vehicle Regulation Act, registration of vintage vehicle shall be initially implemented through a pilot run to be conducted at the New Registration Unit, Operations Division ~ Central Office (CO) commencing on 17 April 2023 in preparation for the eventual nationwide implementation, Related thereto and upon determination that the law may seamlessly be implemented nationwide, a Memorandum shall be consequently issued to such effect. For the time being, please advise owners/stakeholders that they may submit their application for registr...<br>

**Node ID:** e8d175c3-a14c-49a4-9583-743d09240fbd<br>**Similarity:** 0.2983020544052124<br>**Text:** . . REPUBLIC OF THE PHILIPPINES DEPARTMENT OF TRANSPORTATION LAND TRANSPORTATION OFFI East Avenue, Quezon City E-mail Address: ltm:mrri!mm@lt~ .. P -.!irlffi oWebsite: www.lto.gov.ph MEMORANDUM TO: DATE: SUBJECT: All Regional Directors All Assistant Regional Directors All Heads of District/Extension Offices, and, All Employees Concerned of the L TO 11 November 2022 ; U.P .. LAW CENTER -w J lQHtCE of lM NA.llONAL ADMINISlRATlVE ilfGfSH R'. i Adm nisl 31rve Rules   nd Re9u1a11on1 I ' ' RESOLUTION OF BACKLOGS IN MOTOR VEHICLE (MV) RENEWAL REGISTRATION AND MOTOR VEHICLE RELATED MISCELLANEOUS TRANSACTIONS WHEREAS, in compliance with the President's and the Department of Transportation's directive to go full speed ahead in public service delivery and promote an agile bureaucracy that is responsive to the needs of the public.: WHEREAS, it is the declared policy of the State to promote integrity, accountability, proper management of public affairs and public property as well as to establish...<br>

**Node ID:** 48e66f31-a2e1-4208-91f5-4b05c74e941e<br>**Similarity:** 0.30049097537994385<br>**Text:** 1912) cannot cover. Republic Act No. 3045 was born out of this necessity, aiming to streamline laws related to not only motor vehicle traffic, but also motor vehicle registration, licensing, and safety measures.Key Provisions and AmendmentsRA 3045, though old, addressed various facets of motor vehicle governance, including regulations for traffic on Philippine roads, the registration process for vehicles, licensing requirements for operators, and guidelines for nighttime driving. Notably, it paved the way for the establishment of the Automobile Division, initially under the supervision of the Bureau of Public Works.Republic Act No. 3045 also holds paramount importance in the Philippines for several reasons, as it has significantly shaped the regulation of motor vehicles and road safety in the country.Here are some of the key points highlighting the significance of RA 3045:Comprehensive RegulationRA 3045 served as a comprehensive compilation of laws related to motor vehicle traffic, ...<br>

hit_rate: 1.0
mrr: 1.0
precision: 0.06666666666666667
recall: 1.0
ap: 1.0
ndcg: 1.0


Query: What significant event occurred on June 20, 1964, that led to the abolition of the Motor Vehicle Office and the creation of a new regulatory body for land transportation in the Philippines?
Metrics: {'hit_rate': 1.0, 'mrr': 1.0, 'precision': 0.06666666666666667, 'recall': 1.0, 'ap': 1.0, 'ndcg': 1.0}



In [17]:
  
evaluator = FAISSRetrievalEvaluator.from_metric_names(
    metric_names=metrics,
    retriever=retriever,
    Print_Results=False
)

eval_results = await evaluator.aevaluate_dataset(qa_dataset)


def display_results(name, eval_results):
    """Display results from evaluate."""

    metric_dicts = []
    for eval_result in eval_results:
        metric_dict = eval_result.metric_vals_dict
        metric_dicts.append(metric_dict)

    full_df = pd.DataFrame(metric_dicts)

    columns = {
        "retrievers": [name],
        **{k: [full_df[k].mean()] for k in metrics},
    }

    metric_df = pd.DataFrame(columns)

    return metric_df


display_results("top-2 eval", eval_results)

hit_rate: 1.0
mrr: 1.0
precision: 0.06666666666666667
recall: 1.0
ap: 1.0
ndcg: 1.0
hit_rate: 1.0
mrr: 1.0
precision: 0.06666666666666667
recall: 1.0
ap: 1.0
ndcg: 1.0
hit_rate: 1.0
mrr: 1.0
precision: 0.06666666666666667
recall: 1.0
ap: 1.0
ndcg: 1.0
hit_rate: 1.0
mrr: 1.0
precision: 0.06666666666666667
recall: 1.0
ap: 1.0
ndcg: 1.0
hit_rate: 0.0
mrr: 0.0
precision: 0.0
recall: 0.0
ap: 0.0
ndcg: 0.0
hit_rate: 0.0
mrr: 0.0
precision: 0.0
recall: 0.0
ap: 0.0
ndcg: 0.0
hit_rate: 1.0
mrr: 1.0
precision: 0.06666666666666667
recall: 1.0
ap: 1.0
ndcg: 1.0
hit_rate: 1.0
mrr: 1.0
precision: 0.06666666666666667
recall: 1.0
ap: 1.0
ndcg: 1.0
hit_rate: 1.0
mrr: 0.25
precision: 0.06666666666666667
recall: 1.0
ap: 0.25
ndcg: 0.43067655807339306
hit_rate: 1.0
mrr: 0.25
precision: 0.06666666666666667
recall: 1.0
ap: 0.25
ndcg: 0.43067655807339306
hit_rate: 0.0
mrr: 0.0
precision: 0.0
recall: 0.0
ap: 0.0
ndcg: 0.0
hit_rate: 0.0
mrr: 0.0
precision: 0.0
recall: 0.0
ap: 0.0
ndcg: 0.0
hit_rate: 1.0
mrr: 0

Unnamed: 0,retrievers,hit_rate,mrr,precision,recall,ap,ndcg
0,top-2 eval,0.716667,0.402428,0.047778,0.716667,0.402428,0.477407


In [18]:
#indexing
index = FaissIndexer()
index.build_index(nodes_embed)
faiss_index = index.get_index()

faiss_retriever = FAISSVectorStoreRetriever(faiss_index=faiss_index,documents=nodes_embed)

## 5.D Sparse Embedding via BM25

In [19]:
bm25_retriever = BM25Retriever.from_defaults(
   nodes=nodes,
   similarity_top_k=5,
   stemmer=Stemmer.Stemmer("english"),
   language="english",
)

## 5.E Hybrid Retrieval via Reciprocal Rank

In [20]:
def hybrid_embedding(results: dict, top_k: int):
    x = QueryFusionRetriever
    ranked_results = QueryFusionRetriever._reciprocal_rerank_fusion(x, results)
    return ranked_results[:top_k]

## 5.F Hybrid Retrieval Evaluator

In [21]:
from typing import List, Tuple, Any, Dict
from pydantic import Field, ConfigDict
from llama_index.core.evaluation.retrieval.base import (
    BaseRetrievalEvaluator,
    RetrievalEvalMode,
    RetrievalEvalResult
)
from llama_index.core.evaluation.retrieval.metrics import resolve_metrics
from llama_index.core.response.notebook_utils import display_source_node

class HybridRetrievalEvaluator(BaseRetrievalEvaluator):
    faiss_retriever: "FAISSVectorStoreRetriever" = Field(..., description="FAISS Retriever instance")
    bm25_retriever: "BM25Retriever" = Field(..., description="BM25 Retriever instance")
    Print_Results: bool = Field(default=False, description="Whether to print retrieved results")

    model_config = ConfigDict(extra="forbid")  

    @classmethod
    def from_metric_names(
        cls,
        metric_names: List[str],
        faiss_retriever: "FAISSVectorStoreRetriever",
        bm25_retriever: "BM25Retriever",
        Print_Results: bool = False,
        **kwargs: Any,
    ) -> "HybridRetrievalEvaluator":
        metric_types = resolve_metrics(metric_names)
        metrics = [metric() for metric in metric_types]
        return cls(metrics=metrics, faiss_retriever=faiss_retriever, bm25_retriever=bm25_retriever, Print_Results=Print_Results, **kwargs)

    async def _aget_retrieved_ids_and_texts(
        self,
        query: str,
        mode: RetrievalEvalMode = RetrievalEvalMode.TEXT,
        top_k: int = 15
    ) -> Tuple[List[str], List[str]]:
        response = client.embeddings(prompt=query, model="mxbai-embed-large")
        query_embedding = response["embedding"]

        # FAISS retrieval
        faiss_docs = self.faiss_retriever._retrieve(query_embedding, top_k=top_k)

        # BM25 retrieval
        bm25_docs = self.bm25_retriever.retrieve(query)

        # Combine results
        results = {'faiss': faiss_docs, 'bm25': bm25_docs}
        ranked_results = QueryFusionRetriever._reciprocal_rerank_fusion(None, results)
        ranked_results = ranked_results[:top_k]

        # Optionally display results
        if self.Print_Results:
            for doc in ranked_results:
                display_source_node(doc, source_length=1000)

        retrieved_ids = [doc.id_ for doc in ranked_results]
        retrieved_texts = [doc.text for doc in ranked_results]
        return retrieved_ids, retrieved_texts

    async def aevaluate(
        self,
        query: str,
        expected_ids: List[str],
        expected_texts: List[str] = [],
        mode: RetrievalEvalMode = RetrievalEvalMode.TEXT,
        **kwargs: Any,
    ) -> RetrievalEvalResult:
        retrieved_ids, retrieved_texts = await self._aget_retrieved_ids_and_texts(query, mode)
        metric_dict = {}
        
        for metric in self.metrics:
            # Call compute instead of evaluate
            result = metric.compute(
                query=query,
                expected_ids=expected_ids,
                retrieved_ids=retrieved_ids,
                expected_texts=expected_texts,
                retrieved_texts=retrieved_texts,
                **kwargs
            )
            metric_dict[metric.metric_name] = result  # Store the whole RetrievalMetricResult object
            print(f"{metric.metric_name}: {result.score}")  # Print each metric result
        
        # Return RetrievalEvalResult with all required fields
        return RetrievalEvalResult(
            query=query,
            retrieved_ids=retrieved_ids,
            retrieved_texts=retrieved_texts,
            expected_ids=expected_ids,
            expected_texts=expected_texts,
            metric_dict=metric_dict
        )


## 5.G BM25 Retreival Evaluation

In [22]:
evaluator = HybridRetrievalEvaluator.from_metric_names(
    metric_names=metrics,
    faiss_retriever=faiss_retriever,
    bm25_retriever=bm25_retriever,
    Print_Results=True  # Toggle as needed
)

sample_id, sample_query = list(qa_dataset.queries.items())[1]
sample_expected = qa_dataset.relevant_docs[sample_id]

print("Sample Problem:")
print(f"sample id: {sample_id}, text: {sample_query}")
print(f"sample exp: {sample_expected}")

print("\nRetrieval results:")

result = await evaluator.aevaluate(
    query=sample_query,
    expected_ids=sample_expected
)

print("\n")
print(result)

Sample Problem:
sample id: 6a9ec60c-a926-4d8a-aa4e-53230252f108, text: What significant event occurred on June 20, 1964, that led to the abolition of the Motor Vehicle Office and the creation of a new regulatory body for land transportation in the Philippines?
sample exp: ['f26592d6-6c5d-4db0-ab65-48c70dfe7fd0']

Retrieval results:


**Node ID:** 25291fac-e900-41ab-bbb5-c0a3d0131d83<br>**Similarity:** 0.03137254901960784<br>**Text:** the Philippines. While both played a crucial role in the regulation of motor vehicles and traffic in different eras, there are some slight differences between the new and the old law. Let?s compare the key features of these two acts:Key Information about Republic Act No. 3992Enactment Date: January 1, 1933.Amendment of: Republic Act No. 3045Renaming of Division: The Automobile Division was renamed the Division of Motor Vehicles.Leadership Title: The head of the Division of Motor Vehicles was referred to as the Superintendent.Amendments: Republic Act No. 3992 underwent amendments through Commonwealth Act Numbers 123, 548, 556, 652, and Republic Act Numbers 314, 587, and 2383.Key Information about Republic Act No. 4136Enactment Date: June 20, 1964.Scope: Known as the ?Land Transportation and Traffic Code,? RA 4136 is a comprehensive law covering various aspects of land transportation, traffic rules, and vehicle registration.Creation of Agencies: This act led to the establishment of ke...<br>

**Node ID:** bee21284-e441-4e1d-9f4f-d234146c47d8<br>**Similarity:** 0.030679156908665108<br>**Text:** evolution of motor vehicle governance in the Philippines. Its legacy endures in the ongoing efforts to adapt regulations to the changing dynamics of modern transportation.Republic Act No. 3045 in the Philippines was replaced by subsequent legislation due to the evolving needs and complexities of motor vehicle governance. Laws related to traffic, vehicle registration, licensing, and safety measures must adapt to changes in technology, transportation infrastructure, and societal demands. As the automotive landscape evolved, the government deemed it necessary to enact new laws that could address emerging issues, incorporate advancements, and enhance regulatory frameworks.In 1933, Act No. 3992, known as the Revised Motor Vehicle Law, was enacted, amending and updating the provisions of Republic Act No. 3045. This replacement reflected the government?s commitment to staying abreast of developments in the automotive sector and ensuring that regulations remained relevant and effective enou...<br>

**Node ID:** f65efe8d-409f-4cba-b3f7-5a97a69a613a<br>**Similarity:** 0.030621785881252923<br>**Text:** information about the regulations and guidelines established by the legislation which includes control over the registration and operation of motor vehicles; the licensing of owners, dealers, and chauffeurs; the carrying of lights on all vehicles; and all similar matters.Amendments and EvolutionRepublic Act No. 3992 didn?t just get enacted. For over 30 years, the law existed as aguideline for transport regulation.Over the years though, several amendments were introduced to refine and adapt the legislation to the changing needs of society. Some amendments include Commonwealth Act Numbers 123, 548, 556, and 652, along with Republic Act Numbers 314, 587, and 2383, which played crucial roles in shaping the law. These amendments addressed emerging challenges, technological advancements, and the growing complexities of the motor vehicle landscape before it was repealed by Republic Act 4136, otherwise known as the Land Transportation and Traffic Code in June 1964.Commonwealth Act Numbers 1...<br>

**Node ID:** f26592d6-6c5d-4db0-ab65-48c70dfe7fd0<br>**Similarity:** 0.029138513513513514<br>**Text:** Motor Vehicles. The Chief of the Division was called the Superintendent of Division of Motor Vehicles. Act No. 3992 was amended by Commonwealth Act Numbers 123, 548, 556, 652 and Republic Act Numbers 314, 587, and 2383. On June 2, 1945, Department Order No. 4 was issued by the Department of Public Works and Highways reorganizing the Division. This took effect after the liberation of the Philippines from the Japanese invasion. In 1947, Executive Order No. 94 was promulgated reorganizing the different executive departments, bureaus and offices. Under Section 82 of this E.O., the Division of Motor Vehicles was upgraded into the Motor Vehicles Office (MVO) with the category of a Bureau. The Chief of the MVO enjoyed the rights and privileges of a Bureau Director. During the fifties and early sixties, our country started undergoing rapid economic development. Industrialization advanced and as a consequence, more and better roads were constructed. The Filipino then realized the need for mo...<br>

**Node ID:** 48e66f31-a2e1-4208-91f5-4b05c74e941e<br>**Similarity:** 0.016666666666666666<br>**Text:** 1912) cannot cover. Republic Act No. 3045 was born out of this necessity, aiming to streamline laws related to not only motor vehicle traffic, but also motor vehicle registration, licensing, and safety measures.Key Provisions and AmendmentsRA 3045, though old, addressed various facets of motor vehicle governance, including regulations for traffic on Philippine roads, the registration process for vehicles, licensing requirements for operators, and guidelines for nighttime driving. Notably, it paved the way for the establishment of the Automobile Division, initially under the supervision of the Bureau of Public Works.Republic Act No. 3045 also holds paramount importance in the Philippines for several reasons, as it has significantly shaped the regulation of motor vehicles and road safety in the country.Here are some of the key points highlighting the significance of RA 3045:Comprehensive RegulationRA 3045 served as a comprehensive compilation of laws related to motor vehicle traffic, ...<br>

**Node ID:** e8d175c3-a14c-49a4-9583-743d09240fbd<br>**Similarity:** 0.01639344262295082<br>**Text:** . . REPUBLIC OF THE PHILIPPINES DEPARTMENT OF TRANSPORTATION LAND TRANSPORTATION OFFI East Avenue, Quezon City E-mail Address: ltm:mrri!mm@lt~ .. P -.!irlffi oWebsite: www.lto.gov.ph MEMORANDUM TO: DATE: SUBJECT: All Regional Directors All Assistant Regional Directors All Heads of District/Extension Offices, and, All Employees Concerned of the L TO 11 November 2022 ; U.P .. LAW CENTER -w J lQHtCE of lM NA.llONAL ADMINISlRATlVE ilfGfSH R'. i Adm nisl 31rve Rules   nd Re9u1a11on1 I ' ' RESOLUTION OF BACKLOGS IN MOTOR VEHICLE (MV) RENEWAL REGISTRATION AND MOTOR VEHICLE RELATED MISCELLANEOUS TRANSACTIONS WHEREAS, in compliance with the President's and the Department of Transportation's directive to go full speed ahead in public service delivery and promote an agile bureaucracy that is responsive to the needs of the public.: WHEREAS, it is the declared policy of the State to promote integrity, accountability, proper management of public affairs and public property as well as to establish...<br>

**Node ID:** 2ded8646-b857-45b8-8084-f0dd5613e7df<br>**Similarity:** 0.016129032258064516<br>**Text:** REPUBLIC OF THE PHILIPPINES. DEPARTMENT OF TRANSPORTATION LAND TRANSPORTATION OFFICE mat Addo Honan gaat  Webs: wn oianush MEMORANDUM To All Regional Directors All Assistant Regional Directors All Regional Operations Chiefs All DistricvExtension Office Chiefs  All Concerned DATE 12 April 2023 SUBJECT PILOT IMPLEMENTATION OF VINTAGE VEHICLE REGISTRATION |in compliance to the Implementing Rules and Regulations (IRR) of Republic Act No. 11698 or the Vintage Vehicle Regulation Act, registration of vintage vehicle shall be initially implemented through a pilot run to be conducted at the New Registration Unit, Operations Division ~ Central Office (CO) commencing on 17 April 2023 in preparation for the eventual nationwide implementation, Related thereto and upon determination that the law may seamlessly be implemented nationwide, a Memorandum shall be consequently issued to such effect. For the time being, please advise owners/stakeholders that they may submit their application for registr...<br>

**Node ID:** f77d24d2-bd2a-41ab-ae5e-bbc5525265d8<br>**Similarity:** 0.015873015873015872<br>**Text:** A\\ ~ f. . -  -  --- . - --- . ....,  - . q:~qf(f{I ,1 Republic of the Philippines Depaitment of Transportation ,  . ' LAND TRANSPORTATION OFFf<t,E --'-.. ~ ~ '] ~ .. The Implementing Rules and Regulations of Republic Act No. 11698 otherwise known as the ""Vintage Vehicle Regulation Act" WHEREAS, Article XIV, Section 14 of the 1987 Constitution declares that the State shall foster the preservation, enrichment, and dynamic evolution of a Filipino culture based on the principle of unity in diversity in a climate of free artistic and intellectual expression; WHEREAS, Section 16 of the same Article of the Constitution also affirms that all the country's artistic and historic wealth constitutes the cultural treasure of the nation and shall be under the protection of the State which may regulate its dispo sition ~ WHEREAS, Republic Act No. 11698 otherwise known as the "Vintage Vehicle Regulation Act" (Act) lapsed into law on 15 April 2022 to protect and promote the nation 's heritage by e...<br>

**Node ID:** bdbb9e32-a35a-4a6c-a608-2eb110e18a86<br>**Similarity:** 0.015873015873015872<br>**Text:** penalties, and the creation of key agencies, providing a more detailed regulatory framework.EvolutionWhile RA 3992 underwent amendments to adapt to changing circumstances, RA 4136 represents a more substantial legislative effort, incorporating a wider array of provisions to address the complexities of modern transportation.SummaryAs a predecessor to the current Land Transportation and Traffic Code (RA 4136), Republic Act No. 3992 has contributed significantly to the regulation of motor vehicles in the Philippines. For over 30 years, RA 3992, along with its amendments, laid the foundation, which later served as the basis for RA 4136 to expand and modernize the regulatory framework, and shape the contemporary landscape of traffic laws in the<br>

**Node ID:** c019de1a-ceb6-49ef-8f2d-fc7977e6d354<br>**Similarity:** 0.015625<br>**Text:** as far as they apply, the registration and operation of motor vehicles and the licensing of owners, dealers,conductors, drivers and similar matters.? To effectively carry out its mandate, the Land Transportation Commission later became the Land Transportation Office (LTO) and established numerousLTO officesin various parts of the country.Republic Act 3992 vs Republic Act 4136Republic Act No. 3992 and Republic Act No. 4136 are both significant pieces of legislation in<br>

**Node ID:** cc874dd7-ee48-4ddc-b2d9-a7ac9ce3b521<br>**Similarity:** 0.015384615384615385<br>**Text:** RA 3992: The Revised Motor Vehicle LawRepublic Act No. 3992, also known as ?The Revised Motor Vehicle Law,? holds a significant place in the history of the Philippines?Land Transportation Office (LTO)and its regulations. Enacted on January 1, 1933, this legislation marked a crucial amendment to Republic Act No. 3045, reshaping the governance of motor vehicles within the archipelago.In here, we will explore the key provisions, changes, and subsequent amendments that have shaped the landscape of Philippine traffic laws over the years.Table of ContentsToggleThe Birth of Republic Act No. 3992Key Provisions of RA 3992Division of Motor VehiclesLicensing and RegistrationTraffic Rules and RegulationsPenalties and EnforcementAmendments and RevisionsRelationship to Previous LegislationAmendments and EvolutionCommonwealth Act Numbers 123, 548, 556, and 652Republic Act Numbers 314, 587, and 2383Republic Act 4136Republic Act 3992 vs Republic Act 4136SummaryThe Birth of Republic Act No. 3992At th...<br>

**Node ID:** e69fb08b-63ac-486c-90fd-04d733f87d69<br>**Similarity:** 0.015151515151515152<br>**Text:** as a significant milestone that laid the groundwork for the current transport laws in the Philippines. Enacted in 1926, RA 3045 served as a comprehensive compilation and amendment of existing laws governing motor vehicle traffic, registration, licensing, and safety measures of old.It was the backdrop of a burgeoning automobile culture in 1926, right when the Philippine government first recognized the need for a consolidated legal framework to regulate motor vehicle activities that then existing RA 2159 (?Motor Vehicle Registration Act of<br>

**Node ID:** fd1c5565-08d2-4067-9ba8-13c292d99053<br>**Similarity:** 0.014925373134328358<br>**Text:** Motor Vehicles reflected a recognition of the need for more nuanced governance.Foundation for Ongoing RegulationsRA 3045 laid the foundation for subsequent regulations and adjustments in response to the evolving dynamics of motor vehicle use. Its principles continue to influence and guide contemporary efforts to ensure road safety, efficient traffic management, and responsible vehicle operation.Enduring LegacyThe enduring legacy of RA 3045 is reflected in the enduring commitment to structured and safe road governance. Its principles resonate in the ongoing efforts to address new challenges brought about by technological advancements and changes in transportation patterns.More than these benefits, the Republic Act No. 3045 has played a pivotal role in shaping the regulatory landscape of motor vehicles in the Philippines. Its influence is evident in the organized traffic systems, enhanced safety measures, and the establishment of foundational structures that persist in today?s road go...<br>

**Node ID:** 32ad32da-b38e-451f-a267-05f8e69c4bb2<br>**Similarity:** 0.014084507042253521<br>**Text:** so in 1964, Republic Act No. 4136 or the Land Transportation and Traffic Code came to life to compile all the laws relative to transportation and traffic rules, and to create a land transportation commission.Legacy and Ongoing RelevanceAs we navigate in the present, it?s essential to recognize the enduring impact of Republic Act No. 3045. The legislation, born in a time of emerging automotive culture, set the stage for the<br>

**Node ID:** 56b06131-42f0-4483-94b9-1cc0d46d62e9<br>**Similarity:** 0.013888888888888888<br>**Text:** Land Transportation Office (LTO) The concept of land transportation system in the Philippines started when our ancestors invented the means of locomotion with the animals in moving people and goods from place to place. Although the means of land transportation during the early days were not as sophisticated as the modern vehicles of today and the roads not as well constructed, the early Filipinos also observed some laws to govern their mobility. These laws were as informal and simple as specifying which animal could be used for certain purposes but these showed that our ancestors had already felt the need to regulate the transportation system. As early as 1910, there were already few motor vehicles seen operating in public highways in Manila and suburbs. Better means of transportation were invented and introduced in the country. Gradually, the Filipinos learned to use cars, trucks, jeeps and other types of vehicles. The means of transportation became better and powerful and the laws...<br>

hit_rate: 1.0
mrr: 0.25
precision: 0.06666666666666667
recall: 1.0
ap: 0.25
ndcg: 0.43067655807339306


Query: What significant event occurred on June 20, 1964, that led to the abolition of the Motor Vehicle Office and the creation of a new regulatory body for land transportation in the Philippines?
Metrics: {'hit_rate': 1.0, 'mrr': 0.25, 'precision': 0.06666666666666667, 'recall': 1.0, 'ap': 0.25, 'ndcg': 0.43067655807339306}



In [23]:
evaluator = HybridRetrievalEvaluator.from_metric_names(
    metric_names=metrics,
    faiss_retriever=faiss_retriever,
    bm25_retriever=bm25_retriever,
    Print_Results=False  
)

eval_results = await evaluator.aevaluate_dataset(qa_dataset)

def display_results(name, eval_results):
    """Display results from evaluate."""

    metric_dicts = []
    for eval_result in eval_results:
        metric_dict = eval_result.metric_vals_dict
        metric_dicts.append(metric_dict)

    full_df = pd.DataFrame(metric_dicts)

    columns = {
        "retrievers": [name],
        **{k: [full_df[k].mean()] for k in metrics},
    }

    metric_df = pd.DataFrame(columns)

    return metric_df

display_results("top-2 eval", eval_results)


hit_rate: 1.0
mrr: 0.3333333333333333
precision: 0.06666666666666667
recall: 1.0
ap: 0.3333333333333333
ndcg: 0.5
hit_rate: 1.0
mrr: 0.25
precision: 0.06666666666666667
recall: 1.0
ap: 0.25
ndcg: 0.43067655807339306
hit_rate: 1.0
mrr: 0.3333333333333333
precision: 0.06666666666666667
recall: 1.0
ap: 0.3333333333333333
ndcg: 0.5
hit_rate: 1.0
mrr: 0.5
precision: 0.06666666666666667
recall: 1.0
ap: 0.5
ndcg: 0.6309297535714575
hit_rate: 0.0
mrr: 0.0
precision: 0.0
recall: 0.0
ap: 0.0
ndcg: 0.0
hit_rate: 1.0
mrr: 0.5
precision: 0.06666666666666667
recall: 1.0
ap: 0.5
ndcg: 0.6309297535714575
hit_rate: 0.0
mrr: 0.0
precision: 0.0
recall: 0.0
ap: 0.0
ndcg: 0.0
hit_rate: 1.0
mrr: 0.5
precision: 0.06666666666666667
recall: 1.0
ap: 0.5
ndcg: 0.6309297535714575
hit_rate: 1.0
mrr: 0.3333333333333333
precision: 0.06666666666666667
recall: 1.0
ap: 0.3333333333333333
ndcg: 0.5
hit_rate: 1.0
mrr: 0.3333333333333333
precision: 0.06666666666666667
recall: 1.0
ap: 0.3333333333333333
ndcg: 0.5
hit_rate:

Unnamed: 0,retrievers,hit_rate,mrr,precision,recall,ap,ndcg
0,top-2 eval,0.8,0.388074,0.053333,0.8,0.388074,0.488465


# 6. Post Retrieval

## 6.A Summarization

In [24]:
def summarize_each_chunk(nodes, client, query, model="llama3.1:8b", parent=False):
    if parent:
        chunks = [doc.text for doc in nodes]
    else:
        chunks = [doc.node.text for doc in nodes]
    summaries = []
    
    for i, chunk in enumerate(chunks):
        prompt = f"""
        Summarize the following text in one concise paragraph, focusing on key points relevant to the query: "{query}".
        
        - Emphasize information directly related to the query.
        - Exclude unrelated, redundant, or speculative details.
        - Do NOT introduce new information or answer the query itself. 
        
        Text:
        {chunk}
        
        Summary:
        """
        
        response = client.generate(model=model, prompt=prompt)
        summary = response['response'].strip()
        summaries.append(summary)

    return summaries

# 6.B Evaluation Generation

In [25]:
def generate_response_with_notice(summaries, query, client, model="llama3.3"):
    # Combine summaries into context block
    context = "\n".join(summaries)
    
    # Create prompt to answer based on summarized text
    prompt = f"""
    Use the following summarized information to answer the query accurately and concisely. 
    DO NOT USE BACKGROUND KNOWLEDGE OUTSIDE THE CONTEXT PROVIDED.
    If the information is not sufficient to fully address the query, respond ONLY with:
    "The available information is insufficient to provide a complete answer to this query."

    Summarized Context:
    {context}
    
    Query:
    {query}
    
    Response:
    """
    
    # Send the prompt to Ollama
    response = client.generate(
        model=model,
        prompt=prompt
    )
    
    return response['response'].strip()

# 7. Querying

## 7.A Query Transforms

# 8. Query Generation

In [26]:
docstore = {}

# Store documents using full metadata as the key
for doc in documents:
    key = tuple(doc.metadata.items())  # Convert metadata to tuple for hashable key
    docstore[key] = doc

In [27]:
def get_document_by_chunk_metadata(chunk_node):
    # Convert chunk metadata to tuple for matching
    metadata_key = tuple(chunk_node.metadata.items())

    # Retrieve document from docstore
    document = docstore.get(metadata_key)
    return document

In [28]:
def remove_duplicate_documents(doc_list):
    seen_ids = set()
    unique_docs = []

    for doc in doc_list:
        if doc.doc_id not in seen_ids:
            seen_ids.add(doc.doc_id)
            unique_docs.append(doc)

    return unique_docs

In [29]:
def gen_query(query, top_k, client, mode='dense', summary=False, model="llama3.1:8b", chunks_only=True):
    response = client.embeddings(prompt=query, model="mxbai-embed-large")
    query_embedding = response["embedding"]

    top_k_docs = faiss_retriever._retrieve(query_embedding, top_k=top_k)

    bm25_retriever = BM25Retriever.from_defaults(
    nodes=nodes,
    similarity_top_k=top_k,
    stemmer=Stemmer.Stemmer("english"),
    language="english",
    )
    retrieved_nodes = bm25_retriever.retrieve(query)

    results = {'faiss': top_k_docs, 'bm25':retrieved_nodes}
    ranked_results = hybrid_embedding(results, top_k=top_k)

    if mode == 'dense':
        print('using FAISS')
        ans_nodes =top_k_docs
    elif mode == 'sparse':
        print('using BM25')
        ans_nodes = retrieved_nodes
    else:
        print('using Hybrid')
        ans_nodes = ranked_results

    parent_flag = True
    context = set([get_document_by_chunk_metadata(docs).text for docs in ans_nodes])
    if chunks_only:
        parent_flag = False
        print('using chunks only')
        context = [docs.node.text for docs in ans_nodes]
        
    if summary:
        print('using summaries')
        context_nodes = remove_duplicate_documents([get_document_by_chunk_metadata(docs) for docs in ans_nodes])

        if chunks_only:
            context_nodes=ans_nodes
        summaries = summarize_each_chunk(context_nodes, client, model='llama3.1:8b', query=query,parent=parent_flag)
        context = summaries

    answer = generate_response_with_notice(context, query, client, model=model)

    # Format the references
    references = []
    for i, doc in enumerate(ranked_results[:top_k], start=1):
        metadata = doc.metadata
        source_info = f"Source {i}: {metadata['title']} (Page {metadata['page']}, Folder: {metadata['folder']})"
        references.append(source_info)

    return answer, "\n".join(references), "\n".join(context)

# 9. TDC Exam Evaluation

In [30]:
# Generate prompts dynamically
def generate_prompt(row):
    options = []
    for choice in ['A', 'B', 'C', 'D', 'E']:
        # Check for NaN or blank values
        if pd.notna(row[choice]) and row[choice] != '':
            options.append(f"{choice}. {row[choice]}")
    
    # Construct the prompt with few-shot examples
    prompt = f"\nActual Question: {row['Question']}\n" + "\n".join(options)
    prompt += "\nPlease answer only in letters and put them inside a bracket '[]'. If the question contains the statement 'Check all that apply' then add comma separator if there are multiple answers ONLY IF ALLOWED."
    
    return prompt

In [31]:
# Load the Excel file
file_path = '/mnt/c/Users/Jeryl Salas/Documents/AI 351/Project/LTO_EXAM.csv'
df = pd.read_csv(file_path)
df['Prompt'] = df.apply(generate_prompt, axis=1)
display(df.head())

Unnamed: 0,Question,A,B,C,D,E,Answer,Prompt
0,What should you do in case your vehicle breaks...,Open your trunk and hood,Stand on the expressway and flag down passing ...,Call for help using a mobile phone or an expre...,Park as far to the right as possible,Put your hazard warning light on,"A, C, D, E",\nActual Question: What should you do in case ...
1,What will happen when your front tire blows out?,The back end will sway towards the side of the...,The back end will sway away from the blowout,The front end will pull towards the side of th...,The front end will pull to the opposite side o...,,C,\nActual Question: What will happen when your ...
2,What should you do when an ambulance comes up ...,Stop as soon as you can,"Maintain your speed, let the ambulance driver ...",Speed up so that you don't hold the ambulance,Pull over to the right and slow down or even s...,,D,\nActual Question: What should you do when an ...
3,While driving the hood of your car lifts up bl...,Look through the gap underneath the hood or ou...,Brake suddenly so you don't leave the road,Pull to the side of the road and refasten the ...,Turn your headlights on and look out of the si...,,"A,C",\nActual Question: While driving the hood of y...
4,"In case of an accident, the first duty of the ...",pick-up the injured person and take him to the...,report the accident to the nearest hospital,report the accident to the nearest police station,,,A,"\nActual Question: In case of an accident, the..."


In [32]:
import time
qr_range = (0,60)
df["AI"] = np.nan
ai_answer = []
inference_times = []
for i in tqdm(range(*qr_range)):
    start_time = time.time()
    ai_answer.append(gen_query(df.loc[i,"Prompt"], top_k=15, client=client, mode='dense', model="llama3.3"))
    end_time = time.time()
    inference_times.append( end_time - start_time)

df.loc[qr_range[0]:qr_range[1]-1, "AI"] = [answ[0] for answ in ai_answer]
df.loc[qr_range[0]:qr_range[1]-1, "Context"] = [answ[2] for answ in ai_answer]
df.loc[qr_range[0]:qr_range[1]-1, "Inference_Time"] = inference_times

  0%|          | 0/60 [00:00<?, ?it/s]

using FAISS
using chunks only


  2%|▏         | 1/60 [00:22<21:57, 22.32s/it]

using FAISS
using chunks only


  3%|▎         | 2/60 [00:46<22:44, 23.52s/it]

using FAISS
using chunks only


  5%|▌         | 3/60 [01:10<22:25, 23.61s/it]

using FAISS
using chunks only


  7%|▋         | 4/60 [01:38<23:36, 25.29s/it]

using FAISS
using chunks only


  8%|▊         | 5/60 [02:05<23:43, 25.88s/it]

using FAISS
using chunks only


 10%|█         | 6/60 [02:32<23:34, 26.20s/it]

using FAISS
using chunks only


 12%|█▏        | 7/60 [02:57<22:59, 26.03s/it]

using FAISS
using chunks only


 13%|█▎        | 8/60 [03:24<22:46, 26.29s/it]

using FAISS
using chunks only


 15%|█▌        | 9/60 [03:50<22:11, 26.11s/it]

using FAISS
using chunks only


 17%|█▋        | 10/60 [04:17<22:02, 26.45s/it]

using FAISS
using chunks only


 18%|█▊        | 11/60 [04:43<21:28, 26.30s/it]

using FAISS
using chunks only


 20%|██        | 12/60 [05:10<21:13, 26.53s/it]

using FAISS
using chunks only


 22%|██▏       | 13/60 [05:38<21:02, 26.86s/it]

using FAISS
using chunks only


 23%|██▎       | 14/60 [06:04<20:23, 26.59s/it]

using FAISS
using chunks only


 25%|██▌       | 15/60 [06:30<19:55, 26.57s/it]

using FAISS
using chunks only


 27%|██▋       | 16/60 [06:56<19:25, 26.49s/it]

using FAISS
using chunks only


 28%|██▊       | 17/60 [07:22<18:51, 26.32s/it]

using FAISS
using chunks only


 30%|███       | 18/60 [07:49<18:27, 26.37s/it]

using FAISS
using chunks only


 32%|███▏      | 19/60 [08:14<17:51, 26.14s/it]

using FAISS
using chunks only


 33%|███▎      | 20/60 [08:41<17:27, 26.18s/it]

using FAISS
using chunks only


 35%|███▌      | 21/60 [09:07<17:05, 26.29s/it]

using FAISS
using chunks only


 37%|███▋      | 22/60 [09:33<16:37, 26.25s/it]

using FAISS
using chunks only


 38%|███▊      | 23/60 [09:59<16:04, 26.07s/it]

using FAISS
using chunks only


 40%|████      | 24/60 [10:25<15:42, 26.19s/it]

using FAISS
using chunks only


 42%|████▏     | 25/60 [10:51<15:13, 26.09s/it]

using FAISS
using chunks only


 43%|████▎     | 26/60 [11:19<15:03, 26.56s/it]

using FAISS
using chunks only


 45%|████▌     | 27/60 [11:45<14:33, 26.46s/it]

using FAISS
using chunks only


 47%|████▋     | 28/60 [12:12<14:05, 26.41s/it]

using FAISS
using chunks only


 48%|████▊     | 29/60 [12:38<13:37, 26.37s/it]

using FAISS
using chunks only


 50%|█████     | 30/60 [13:04<13:09, 26.32s/it]

using FAISS
using chunks only


 52%|█████▏    | 31/60 [13:30<12:41, 26.25s/it]

using FAISS
using chunks only


 53%|█████▎    | 32/60 [13:57<12:16, 26.30s/it]

using FAISS
using chunks only


 55%|█████▌    | 33/60 [14:23<11:49, 26.26s/it]

using FAISS
using chunks only


 57%|█████▋    | 34/60 [14:49<11:24, 26.35s/it]

using FAISS
using chunks only


 58%|█████▊    | 35/60 [15:15<10:54, 26.18s/it]

using FAISS
using chunks only


 60%|██████    | 36/60 [15:42<10:32, 26.37s/it]

using FAISS
using chunks only


 62%|██████▏   | 37/60 [16:08<10:04, 26.26s/it]

using FAISS
using chunks only


 63%|██████▎   | 38/60 [16:34<09:39, 26.34s/it]

using FAISS
using chunks only


 65%|██████▌   | 39/60 [17:01<09:11, 26.28s/it]

using FAISS
using chunks only


 67%|██████▋   | 40/60 [17:27<08:45, 26.27s/it]

using FAISS
using chunks only


 68%|██████▊   | 41/60 [17:53<08:18, 26.25s/it]

using FAISS
using chunks only


 70%|███████   | 42/60 [18:20<07:55, 26.40s/it]

using FAISS
using chunks only


 72%|███████▏  | 43/60 [18:46<07:27, 26.30s/it]

using FAISS
using chunks only


 73%|███████▎  | 44/60 [19:12<07:00, 26.31s/it]

using FAISS
using chunks only


 75%|███████▌  | 45/60 [19:38<06:34, 26.32s/it]

using FAISS
using chunks only


 77%|███████▋  | 46/60 [20:05<06:08, 26.32s/it]

using FAISS
using chunks only


 78%|███████▊  | 47/60 [20:31<05:42, 26.33s/it]

using FAISS
using chunks only


 80%|████████  | 48/60 [20:57<05:15, 26.32s/it]

using FAISS
using chunks only


 82%|████████▏ | 49/60 [21:23<04:47, 26.11s/it]

using FAISS
using chunks only


 83%|████████▎ | 50/60 [21:49<04:20, 26.07s/it]

using FAISS
using chunks only


 85%|████████▌ | 51/60 [22:15<03:55, 26.12s/it]

using FAISS
using chunks only


 87%|████████▋ | 52/60 [22:42<03:29, 26.23s/it]

using FAISS
using chunks only


 88%|████████▊ | 53/60 [23:08<03:03, 26.16s/it]

using FAISS
using chunks only


 90%|█████████ | 54/60 [23:34<02:37, 26.19s/it]

using FAISS
using chunks only


 92%|█████████▏| 55/60 [24:00<02:10, 26.04s/it]

using FAISS
using chunks only


 93%|█████████▎| 56/60 [24:26<01:44, 26.19s/it]

using FAISS
using chunks only


 95%|█████████▌| 57/60 [24:53<01:18, 26.24s/it]

using FAISS
using chunks only


 97%|█████████▋| 58/60 [25:19<00:52, 26.32s/it]

using FAISS
using chunks only


 98%|█████████▊| 59/60 [25:45<00:26, 26.27s/it]

using FAISS
using chunks only


100%|██████████| 60/60 [26:11<00:00, 26.20s/it]
  df.loc[qr_range[0]:qr_range[1]-1, "AI"] = [answ[0] for answ in ai_answer]


In [33]:
import re


def process_answers(answers):
    formatted_answers = []
    
    for a in answers:
        
        matches = re.findall(r'\[?\s*([A-E](?:\s*,\s*[A-E])*)\s*\]?', str(a)) # Extract answers like [A, C, D] or [A] or [B, D]
        answers = []
        for match in matches:
            answers.extend(re.split(r'\s*,\s*', match))  # Split by comma and remove spaces
        unique_sorted_answers = sorted(set(answers), key=lambda x: ['A', 'B', 'C', 'D', 'E'].index(x))
        if not unique_sorted_answers:
            formatted_answers.append(None)
        else:
            formatted_answers.append(unique_sorted_answers)
    return formatted_answers

df_results = df.loc[qr_range[0]:qr_range[1]-1, ["Question","Answer","AI", "Inference_Time"]]
df_results['Answer'] = df_results['Answer'].apply(lambda x: x.split(', '))
df_results['AI'] = process_answers(df_results["AI"])
df_results['Answer'] = process_answers(df_results["Answer"])



def calculate_scores(df):
    scores = []
    for index, row in df.iterrows():
        correct_answers = set(row['Answer'] if row['Answer'] is not None else [])
        ai_answers = set(row['AI'] if row['AI'] is not None else [])
        if ai_answers == correct_answers:
            score = 1.0
        else:
            score = 0.0
        scores.append(score)
    
    df['Score'] = scores
    accuracy = scores.count(1.0) / len(scores)
    print(f'Final Score: {scores.count(1.0):.2f}/{len(scores):.2f}')
    print(f'Accuracy: {accuracy:.2f}%')
    return df

# Apply the scoring function
scored_df = calculate_scores(df_results)

# Display the dataframe to verify the results
display(scored_df[['Question', 'Answer', 'AI', 'Score', 'Inference_Time']])

Final Score: 49.00/60.00
Accuracy: 0.82%


Unnamed: 0,Question,Answer,AI,Score,Inference_Time
0,What should you do in case your vehicle breaks...,"[A, C, D, E]","[C, D, E]",0.0,22.32306
1,What will happen when your front tire blows out?,[C],[C],1.0,24.361086
2,What should you do when an ambulance comes up ...,[D],[D],1.0,23.71798
3,While driving the hood of your car lifts up bl...,"[A, C]","[A, C]",1.0,27.85082
4,"In case of an accident, the first duty of the ...",[A],[C],0.0,26.940197
5,"When a vehicle starts to skid, what should the...",[B],[B],1.0,26.811013
6,"In case of injuries caused by an accident, the...",[A],[A],1.0,25.692555
7,What will happen when your rear tire blows out?,[B],[A],0.0,26.82762
8,"When a vehicle is stalled or disabled, the dri...",[C],[C],1.0,25.719195
9,If you are the first to arrive at the scene of...,[B],[B],1.0,27.199358


In [34]:
# Generate prompts dynamically
def generate_prompt(row):
    
    # Construct the prompt with few-shot examples
    prompt = f"\nActual Question: {row['Question']}\n" 
    prompt += "\nPlease answer the question based on the given context."
    
    return prompt

In [35]:
# Load the Excel file
file_path = '/mnt/c/Users/Jeryl Salas/Documents/AI 351/Project/LTO_EXAM_QnA.csv'
df = pd.read_csv(file_path, encoding='ISO-8859-1')
df['Prompt'] = df.apply(generate_prompt, axis=1)
display(df.head())

Unnamed: 0,Question,Answer,Prompt
0,Traffic Jam can be prevented if you,Keep opposing lanes open,\nActual Question: Traffic Jam can be prevente...
1,When making a right turn you should,Stay on the outermost lane of the road then si...,\nActual Question: When making a right turn yo...
2,"When you intend to turn right or left, signal ...",25 meters before you intend to make your turn,\nActual Question: When you intend to turn rig...
3,"At an intersection with a traffic light, make ...",The green light is on and there is a left turn...,\nActual Question: At an intersection with a t...
4,Graft and corruption in the traffic enforcemen...,Self disciplined by drivers and obeying traffi...,\nActual Question: Graft and corruption in the...


In [36]:
import time
qr_range = (0,60)
df["AI"] = np.nan
ai_answer = []
inference_times = []

for i in tqdm(range(*qr_range)):
    start_time = time.time()
    ai_answer.append(gen_query(df.loc[i,"Prompt"], top_k=15, client=client, mode='hybrid', model="llama3.1:8b"))
    end_time = time.time()
    inference_times.append(end_time - start_time)

df.loc[qr_range[0]:qr_range[1]-1, "AI"] = [answ[0] for answ in ai_answer]
df.loc[qr_range[0]:qr_range[1]-1, "Context"] = [answ[2] for answ in ai_answer]
df.loc[qr_range[0]:qr_range[1]-1, "Inference_Time"] = inference_times 
df_new = df.loc[qr_range[0]:qr_range[1]-1].copy()
df = df_new.copy()

  0%|          | 0/60 [00:00<?, ?it/s]

using Hybrid
using chunks only


  2%|▏         | 1/60 [00:12<12:28, 12.69s/it]

using Hybrid
using chunks only


  3%|▎         | 2/60 [00:18<08:16,  8.56s/it]

using Hybrid
using chunks only


  5%|▌         | 3/60 [00:22<06:10,  6.50s/it]

using Hybrid
using chunks only


  7%|▋         | 4/60 [00:26<05:15,  5.64s/it]

using Hybrid
using chunks only


  8%|▊         | 5/60 [00:31<04:58,  5.43s/it]

using Hybrid
using chunks only


 10%|█         | 6/60 [00:38<05:15,  5.83s/it]

using Hybrid
using chunks only


 12%|█▏        | 7/60 [00:42<04:40,  5.30s/it]

using Hybrid
using chunks only


 13%|█▎        | 8/60 [00:50<05:12,  6.00s/it]

using Hybrid
using chunks only


 15%|█▌        | 9/60 [00:54<04:37,  5.44s/it]

using Hybrid
using chunks only


 17%|█▋        | 10/60 [00:58<04:10,  5.00s/it]

using Hybrid
using chunks only


 18%|█▊        | 11/60 [01:04<04:19,  5.30s/it]

using Hybrid
using chunks only


 20%|██        | 12/60 [01:08<04:00,  5.02s/it]

using Hybrid
using chunks only


 22%|██▏       | 13/60 [01:13<03:57,  5.06s/it]

using Hybrid
using chunks only


 23%|██▎       | 14/60 [01:19<03:59,  5.21s/it]

using Hybrid
using chunks only


 25%|██▌       | 15/60 [01:24<03:59,  5.31s/it]

using Hybrid
using chunks only


 27%|██▋       | 16/60 [01:29<03:46,  5.14s/it]

using Hybrid
using chunks only


 28%|██▊       | 17/60 [01:34<03:37,  5.05s/it]

using Hybrid
using chunks only


 30%|███       | 18/60 [01:40<03:46,  5.39s/it]

using Hybrid
using chunks only


 32%|███▏      | 19/60 [01:45<03:28,  5.09s/it]

using Hybrid
using chunks only


 33%|███▎      | 20/60 [01:49<03:18,  4.95s/it]

using Hybrid
using chunks only


 35%|███▌      | 21/60 [01:54<03:05,  4.76s/it]

using Hybrid
using chunks only


 37%|███▋      | 22/60 [02:00<03:17,  5.21s/it]

using Hybrid
using chunks only


 38%|███▊      | 23/60 [02:06<03:21,  5.45s/it]

using Hybrid
using chunks only


 40%|████      | 24/60 [02:12<03:20,  5.56s/it]

using Hybrid
using chunks only


 42%|████▏     | 25/60 [02:17<03:13,  5.53s/it]

using Hybrid
using chunks only


 43%|████▎     | 26/60 [02:23<03:08,  5.53s/it]

using Hybrid
using chunks only


 45%|████▌     | 27/60 [02:27<02:52,  5.24s/it]

using Hybrid
using chunks only


 47%|████▋     | 28/60 [02:31<02:35,  4.87s/it]

using Hybrid
using chunks only


 48%|████▊     | 29/60 [02:36<02:34,  5.00s/it]

using Hybrid
using chunks only


 50%|█████     | 30/60 [02:41<02:26,  4.90s/it]

using Hybrid
using chunks only


 52%|█████▏    | 31/60 [02:46<02:21,  4.89s/it]

using Hybrid
using chunks only


 53%|█████▎    | 32/60 [02:50<02:13,  4.76s/it]

using Hybrid
using chunks only


 55%|█████▌    | 33/60 [02:56<02:18,  5.12s/it]

using Hybrid
using chunks only


 57%|█████▋    | 34/60 [03:02<02:17,  5.30s/it]

using Hybrid
using chunks only


 58%|█████▊    | 35/60 [03:07<02:07,  5.11s/it]

using Hybrid
using chunks only


 60%|██████    | 36/60 [03:12<02:05,  5.24s/it]

using Hybrid
using chunks only


 62%|██████▏   | 37/60 [03:21<02:20,  6.12s/it]

using Hybrid
using chunks only


 63%|██████▎   | 38/60 [03:26<02:07,  5.80s/it]

using Hybrid
using chunks only


 65%|██████▌   | 39/60 [03:31<02:02,  5.83s/it]

using Hybrid
using chunks only


 67%|██████▋   | 40/60 [03:37<01:53,  5.65s/it]

using Hybrid
using chunks only


 68%|██████▊   | 41/60 [03:42<01:47,  5.64s/it]

using Hybrid
using chunks only


 70%|███████   | 42/60 [03:48<01:40,  5.56s/it]

using Hybrid
using chunks only


 72%|███████▏  | 43/60 [03:54<01:36,  5.66s/it]

using Hybrid
using chunks only


 73%|███████▎  | 44/60 [04:00<01:31,  5.74s/it]

using Hybrid
using chunks only


 75%|███████▌  | 45/60 [04:04<01:22,  5.51s/it]

using Hybrid
using chunks only


 77%|███████▋  | 46/60 [04:10<01:15,  5.38s/it]

using Hybrid
using chunks only


 78%|███████▊  | 47/60 [04:15<01:10,  5.39s/it]

using Hybrid
using chunks only


 80%|████████  | 48/60 [04:20<01:04,  5.36s/it]

using Hybrid
using chunks only


 82%|████████▏ | 49/60 [04:25<00:57,  5.24s/it]

using Hybrid
using chunks only


 83%|████████▎ | 50/60 [04:32<00:57,  5.79s/it]

using Hybrid
using chunks only


 85%|████████▌ | 51/60 [04:39<00:53,  5.93s/it]

using Hybrid
using chunks only


 87%|████████▋ | 52/60 [04:43<00:43,  5.43s/it]

using Hybrid
using chunks only


 88%|████████▊ | 53/60 [04:48<00:38,  5.48s/it]

using Hybrid
using chunks only


 90%|█████████ | 54/60 [04:54<00:32,  5.37s/it]

using Hybrid
using chunks only


 92%|█████████▏| 55/60 [04:59<00:27,  5.52s/it]

using Hybrid
using chunks only


 93%|█████████▎| 56/60 [05:05<00:22,  5.61s/it]

using Hybrid
using chunks only


 95%|█████████▌| 57/60 [05:10<00:16,  5.34s/it]

using Hybrid
using chunks only


 97%|█████████▋| 58/60 [05:16<00:11,  5.53s/it]

using Hybrid
using chunks only


 98%|█████████▊| 59/60 [05:23<00:05,  5.96s/it]

using Hybrid
using chunks only


100%|██████████| 60/60 [05:29<00:00,  5.49s/it]
  df.loc[qr_range[0]:qr_range[1]-1, "AI"] = [answ[0] for answ in ai_answer]


# 10. Similarity Evaluation

In [37]:
from llama_index.core.evaluation import SemanticSimilarityEvaluator
from llama_index.core.base.embeddings.base import BaseEmbedding
import asyncio
from llama_index.core.embeddings import resolve_embed_model
from pydantic import PrivateAttr

class OllamaEmbeddingModel(BaseEmbedding):
    _client: Client = PrivateAttr()

    def __init__(self, model_name: str = "mxbai-embed-large", timeout: int = 300):
        super().__init__()
        self.model_name = model_name
        self._client = Client() 

    async def _aget_query_embedding(self, query: str) -> list[float]:
        return await self._aget_text_embedding(query)

    async def _aget_text_embedding(self, text: str) -> list[float]:
        loop = asyncio.get_event_loop()
        embedding_response = await loop.run_in_executor(
            None, self._client.embeddings, self.model_name, text
        )
        return embedding_response['embedding']  

    def _get_query_embedding(self, query: str) -> list[float]:
        return self._get_text_embedding(query)

    def _get_text_embedding(self, text: str) -> list[float]:
        embedding_response = self._client.embeddings(
            model=self.model_name,
            prompt=text
        )
        return embedding_response['embedding']


embed_model = OllamaEmbeddingModel(model_name="mxbai-embed-large")
evaluator = SemanticSimilarityEvaluator(
    embed_model=embed_model,
    similarity_threshold=0.6
)

results_scores = []
results_passing = []
for i in tqdm(range(len(df))):
    response = df.loc[i, "AI"]
    reference = df.loc[i, "Answer"]

    result = await evaluator.aevaluate(
    response=response,
    reference=reference,
    )
    results_scores.append(result.score)
    results_passing.append(result.passing)
    
df['Score'] = results_scores
df['Passing'] = results_passing

average_score = df['Score'].mean()
total_items = len(df)
passing_items = df['Passing'].sum()  
print(f"Average Score: {average_score:.4f}")
print(f"Passing: {passing_items}/{total_items}")
display(df[['Question', 'Answer', 'AI', 'Score', 'Passing', 'Inference_Time']])

  0%|          | 0/60 [00:00<?, ?it/s]

100%|██████████| 60/60 [00:05<00:00, 11.62it/s]

Average Score: 0.6473
Passing: 43/60





Unnamed: 0,Question,Answer,AI,Score,Passing,Inference_Time
0,Traffic Jam can be prevented if you,Keep opposing lanes open,"The question seems to be incomplete, but I'll ...",0.649728,True,12.690465
1,When making a right turn you should,Stay on the outermost lane of the road then si...,"According to the text, when turning to the lef...",0.761926,True,5.661499
2,"When you intend to turn right or left, signal ...",25 meters before you intend to make your turn,"According to the provided text, when you inten...",0.797366,True,4.041142
3,"At an intersection with a traffic light, make ...",The green light is on and there is a left turn...,"At an intersection with a traffic light, you s...",0.745402,True,4.33286
4,Graft and corruption in the traffic enforcemen...,Self disciplined by drivers and obeying traffi...,The correct answer is:\n\nAutomated issuance o...,0.597946,False,5.045053
5,"On a four(4) lane road with single white line,...",Overtake by passing over the solid white line,"According to the provided text, there is no sp...",0.752399,True,6.621878
6,A double solid yellow line with broken white l...,Absolutely no overtaking,"According to the provided text, a double solid...",0.590069,False,4.196586
7,"When making a U-Turn, you should",Check for traffic behind you and indicate your...,"According to the provided text, when making a ...",0.684109,True,7.499764
8,Signs that are triangular in shape and with a ...,Caution or warning signs,"According to the text, signs that are triangul...",0.685975,True,4.214451
9,"Signs that are round, inverted triangle or oct...",Regulatory signs,REGULATORY SIGNS,1.0,True,4.024455


# 11. Relevancy Evaluation

In [38]:
from llama_index.core.evaluation import RelevancyEvaluator

ollama_llm = Ollama(model="llama3.1:8b", request_timeout=300)
evaluator = RelevancyEvaluator(llm=ollama_llm)

eval_results = []

for i in tqdm(range(len(df))):
    eval_result = await evaluator.aevaluate(
        query=df.loc[i, "Question"],
        response=df.loc[i, "AI"],
        contexts=[df.loc[i, "Context"]]  
    )
    eval_results.append(eval_result.passing)

df['Eval'] = eval_results

total_items = len(df)
passing_items = df['Eval'].sum()
score = f"Score: {passing_items}/{total_items}"
percentage = passing_items / total_items if total_items > 0 else 0
print(score)
print(f"Percentage: {percentage:.2%}")
display(df[['Question', 'Answer', 'AI', 'Eval', 'Inference_Time']])

100%|██████████| 60/60 [04:44<00:00,  4.75s/it]

Score: 55/60
Percentage: 91.67%





Unnamed: 0,Question,Answer,AI,Eval,Inference_Time
0,Traffic Jam can be prevented if you,Keep opposing lanes open,"The question seems to be incomplete, but I'll ...",False,12.690465
1,When making a right turn you should,Stay on the outermost lane of the road then si...,"According to the text, when turning to the lef...",False,5.661499
2,"When you intend to turn right or left, signal ...",25 meters before you intend to make your turn,"According to the provided text, when you inten...",True,4.041142
3,"At an intersection with a traffic light, make ...",The green light is on and there is a left turn...,"At an intersection with a traffic light, you s...",True,4.33286
4,Graft and corruption in the traffic enforcemen...,Self disciplined by drivers and obeying traffi...,The correct answer is:\n\nAutomated issuance o...,True,5.045053
5,"On a four(4) lane road with single white line,...",Overtake by passing over the solid white line,"According to the provided text, there is no sp...",True,6.621878
6,A double solid yellow line with broken white l...,Absolutely no overtaking,"According to the provided text, a double solid...",True,4.196586
7,"When making a U-Turn, you should",Check for traffic behind you and indicate your...,"According to the provided text, when making a ...",True,7.499764
8,Signs that are triangular in shape and with a ...,Caution or warning signs,"According to the text, signs that are triangul...",True,4.214451
9,"Signs that are round, inverted triangle or oct...",Regulatory signs,REGULATORY SIGNS,True,4.024455
