# LTO RAG Table of Contents

1. Import Libraries
2. Connect to Ollama Server
3. Ingestion
4. QA Generation
5. Embedding and Retrieval<br>
 **5.A** Dense via FAISS  
 **5.B** FAISS Retrieval Evaluator  
 **5.C** FAISS Retrieval Evaluation  
 **5.D** Sparse Embedding via BM25  
 **5.E** Hybrid Embedding via Reciprocal Rank Fusion  
 **5.F** Hybrid Retrieval Evaluator  
 **5.G** Hybrid Retrieval Evaluation  
6. Post Retrieval<br>
 **6.A** Summarization  
 **6.B** Evaluation Generation  
7. Querying<br>
 **7.A** Query Transforms  
8. Query Generation
9. TDC Exam Evaluation
10. Similarity Evaluation
11. Relevancy Evaluation


# 1. Import Libraries

In [28]:
import os
import fitz
import re

from ollama import Client
import faiss
import pandas as pd
import numpy as np
import Stemmer
from tqdm import tqdm
import gradio as gr
import json

from llama_index.core import Document
from llama_index.core.node_parser import TokenTextSplitter
from llama_index.core.retrievers import BaseRetriever, QueryFusionRetriever
from llama_index.core.schema import TextNode, NodeWithScore
from llama_index.retrievers.bm25 import BM25Retriever
from llama_index.llms.ollama import Ollama


# For QA generation
import uuid
import warnings
from typing import Dict, List, Optional, Tuple

from llama_index.core.bridge.pydantic import BaseModel
from llama_index.core.llms.utils import LLM
from llama_index.core.schema import MetadataMode, TextNode
from llama_index.core.settings import Settings


# 2. Connect to Ollama Server

In [29]:
client = Client(
  host='http://localhost:11434',
)

# 3. Ingestion

In [30]:
# Path to the dataset folder
DATASET_PATH = r'/home/jeryl4913/lto_rag_reviewer/notebooks/extracted_text.json'

def get_text_and_metadata(input_path):
    """Load text and metadata from a file and perform chunking."""
    with open(input_path, "r", encoding="utf-8") as f:
        extracted_data = json.load(f)

    texts = []
    metadata = []

    for entry in tqdm(extracted_data, desc="Processing entries"):
        text = entry["text"]
        source_metadata = {
            "source": entry["source"],
            "folder": entry["folder"],
            "file_name": entry["file_name"],
            "page": entry["page"],
            "title": entry["title"],
            "url": entry["url"]
        }
        texts.append(text)
        metadata.append(source_metadata)

    return texts, metadata


In [31]:
docs, metadatas = get_text_and_metadata(DATASET_PATH)

Processing entries: 100%|██████████| 7815/7815 [00:00<00:00, 877321.50it/s]


In [32]:
documents = [Document(text=docs[t], metadata=metadatas[t]) for t in range(len(docs))]
splitter = TokenTextSplitter(
    chunk_size=512,
    chunk_overlap=20,
    separator=" ",
)
nodes = splitter.get_nodes_from_documents(documents)

In [33]:
print(nodes[0])

Node ID: a601f4a5-0b2f-4b50-83d1-aced29ca7cb5
Text: Land Transportation Office (LTO) The concept of land
transportation system in the Philippines started when our ancestors
invented the means of locomotion with the animals in moving people and
goods from place to place. Although the means of land transportation
during the early days were not as sophisticated as the modern vehicles
of today and th...


# 4. QA Generation
Used for Retreival Evaluation:
1. Get all document nodes
2. Generate question for each node (using llama 3.2)
3. Question Answer pairs: Generated Question, Node text

In [34]:
"""Common utils for embeddings."""

import json
import re
import uuid
import warnings
from typing import Dict, List, Optional, Tuple

from llama_index.core.bridge.pydantic import BaseModel
from llama_index.core.llms.utils import LLM
from llama_index.core.schema import MetadataMode, TextNode
from llama_index.core.settings import Settings
from tqdm import tqdm


class EmbeddingQAFinetuneDataset(BaseModel):
    """Embedding QA Finetuning Dataset.

    Args:
        queries (Dict[str, str]): Dict id -> query.
        corpus (Dict[str, str]): Dict id -> string.
        relevant_docs (Dict[str, List[str]]): Dict query id -> list of doc ids.

    """

    queries: Dict[str, str]  # dict id -> query
    corpus: Dict[str, str]  # dict id -> string
    relevant_docs: Dict[str, List[str]]  # query id -> list of doc ids
    mode: str = "text"

    @property
    def query_docid_pairs(self) -> List[Tuple[str, List[str]]]:
        """Get query, relevant doc ids."""
        return [
            (query, self.relevant_docs[query_id])
            for query_id, query in self.queries.items()
        ]

    def save_json(self, path: str) -> None:
        """Save json."""
        with open(path, "w") as f:
            json.dump(self.model_dump(), f, indent=4)

    @classmethod
    def from_json(cls, path: str) -> "EmbeddingQAFinetuneDataset":
        """Load json."""
        with open(path) as f:
            data = json.load(f)
        return cls(**data)


DEFAULT_QA_GENERATE_PROMPT_TMPL = """
Context information is below.

---------------------
{context_str}
---------------------

Based on the above context, generate {num_questions_per_chunk} diverse and specific questions that focus on key details and concepts. \
Do not include prefatory phrases like 'Here are some questions'. Directly list the questions in numerical order.
"""


# generate queries as a convenience function
def generate_qa_embedding_pairs(
    nodes: List[TextNode],
    llm: Optional[LLM] = None,
    qa_generate_prompt_tmpl: str = DEFAULT_QA_GENERATE_PROMPT_TMPL,
    num_questions_per_chunk: int = 2,
) -> EmbeddingQAFinetuneDataset:
    """Generate examples given a set of nodes."""
    llm = llm or Settings.llm
    node_dict = {
        node.node_id: node.get_content(metadata_mode=MetadataMode.NONE)
        for node in nodes
    }

    queries = {}
    relevant_docs = {}
    for node_id, text in tqdm(node_dict.items()):
        query = qa_generate_prompt_tmpl.format(
            context_str=text, num_questions_per_chunk=num_questions_per_chunk
        )
        response = llm.complete(query)

        result = str(response).strip().split("\n")
        questions = [
            re.sub(r"^\d+[\).\s]", "", question).strip() for question in result
        ]
        questions = [
            question
            for question in questions
            if len(question) > 0 and question.endswith("?") and "question" not in question.lower()
        ][:num_questions_per_chunk]

        num_questions_generated = len(questions)
        if num_questions_generated < num_questions_per_chunk:
            warnings.warn(
                f"Fewer questions generated ({num_questions_generated}) "
                f"than requested ({num_questions_per_chunk}) for node {node_id}."
            )

        for question in questions:
            question_id = str(uuid.uuid4())
            queries[question_id] = question
            relevant_docs[question_id] = [node_id]

    # construct dataset
    return EmbeddingQAFinetuneDataset(
        queries=queries, corpus=node_dict, relevant_docs=relevant_docs
    )


In [35]:
import os

access_token = os.popen("echo $OPENAI_API_KEY").read().strip()


if access_token is None:
    raise ValueError("OpenAI API key not found in environment.")

#print(access_token)


In [36]:
from llama_index.core.evaluation import EmbeddingQAFinetuneDataset
from llama_index.llms.ollama import Ollama
from llama_index.llms.openai import OpenAI

# Instantiate LLM
ollama_llm = Ollama(model="llama3.1:8b", request_timeout=300)

# llm = OpenAI(model="gpt-4", access_token=access_token)


# Example usage of the generate_qa_embedding_pairs
qa_dataset = generate_qa_embedding_pairs(
    nodes[:60], llm=ollama_llm, num_questions_per_chunk=1
)

queries = qa_dataset.queries.values()
print(list(queries)[2])

# Save the dataset
qa_dataset.save_json("pg_eval_dataset_LLAMA3.1_generated.json")
print("Successfully created QA dataset")


100%|██████████| 60/60 [08:25<00:00,  8.42s/it]

In what year was Executive Order Number 546 promulgated, creating the Ministry of Transportation and Communications (MOTC)?
Successfully created QA dataset





In [37]:
# Load
qa_dataset = EmbeddingQAFinetuneDataset.from_json("pg_eval_dataset_LLAMA3.1_generated.json")
print("Successfully loaded QA dataset")

Successfully loaded QA dataset


# 5. Embedding and Retrieval

## 5.A. Dense via FAISS

In [38]:
def generate_embeddings(nodes, client, model):
    # Generate embeddings for documents using Ollama
    for doc in tqdm(nodes):
        response = client.embeddings(prompt=doc.text, model=model)
        doc.embedding = response["embedding"]
    return nodes

In [39]:
class FaissIndexer:
    """
    Faiss-based indexer for efficient similarity search using inner-product (cosine) similarity.

    This class handles the creation and management of a FAISS index from node embeddings.
    
    :ivar faiss_index: The FAISS index for storing and querying embeddings.
    :vartype faiss_index: faiss.IndexFlatIP
    :ivar embedding_dim: Dimensionality of the embeddings.
    :vartype embedding_dim: int
    """

    def __init__(self):
        """
        Initialize the FaissIndexer class.

        :ivar faiss_index: The FAISS index, initialized as None.
        :ivar embedding_dim: The dimension of embeddings, initialized as None.
        """
        self.faiss_index = None
        self.embedding_dim = None

    def normalize_embeddings(self, embeddings):
        """
        Normalize embeddings to have unit L2 norm.

        :param embeddings: Array of embeddings to normalize.
        :type embeddings: np.ndarray
        :return: Normalized embeddings.
        :rtype: np.ndarray
        """
        return embeddings / np.linalg.norm(embeddings, axis=1, keepdims=True)

    def build_index(self, nodes):
        """
        Build the FAISS index from a list of nodes containing embeddings.

        :param nodes: List of nodes, where each node contains an `embedding` attribute.
        :type nodes: list
        :raises ValueError: If the nodes list is empty or embeddings are inconsistent.
        """
        if not nodes:
            raise ValueError("Nodes list cannot be empty.")
        
        embeddings = np.array([np.array(node.embedding) for node in nodes])
        normalized_embeddings = self.normalize_embeddings(embeddings)

        self.embedding_dim = normalized_embeddings[0].shape[0]
        self.faiss_index = faiss.IndexFlatIP(self.embedding_dim)  # Inner-product similarity
        self.faiss_index.add(normalized_embeddings)

    def get_index(self):
        """
        Get the FAISS index instance.

        :return: The FAISS index used for similarity search.
        :rtype: faiss.IndexFlatIP
        :raises ValueError: If the index has not been built.
        """
        if self.faiss_index is None:
            raise ValueError("Index has not been built yet. Call 'build_index' first.")
        return self.faiss_index

In [40]:
class FAISSVectorStoreRetriever(BaseRetriever):
    def __init__(self, faiss_index, documents):
        """
        Initialize the FAISS retriever.
        :param faiss_index: The FAISS index containing precomputed embeddings.
        :param documents: List of document chunks.
        :param embeddings: Precomputed embeddings corresponding to the document chunks.
        """
        self.faiss_index = faiss_index
        self.documents = documents

    def _retrieve(self, query_embedding, top_k=5):
        """
        Retrieve the top-k nearest neighbors using the FAISS index.
        :param query_embedding: The embedding of the query.
        :param top_k: Number of top results to retrieve.
        """

        norm_query_embedding = np.array([query_embedding])
        norm_query_embedding /= np.linalg.norm(norm_query_embedding, axis=1, keepdims=True)

        distances, indices = self.faiss_index.search(norm_query_embedding, top_k)
        retrieved_docs = [
            NodeWithScore(node=self.documents[idx], score=1 - dist)
            for idx, dist in zip(indices[0], distances[0])
            if idx != -1
        ]
        return retrieved_docs

## 5.B. FAISS Retreival Evaluator

In [41]:
from typing import List, Tuple, Any
from pydantic import Field, ConfigDict
from llama_index.core.evaluation.retrieval.base import (
    BaseRetrievalEvaluator,
    RetrievalEvalMode,
    RetrievalEvalResult
)
from llama_index.core.evaluation.retrieval.metrics import resolve_metrics
from llama_index.core.response.notebook_utils import display_source_node

class FAISSRetrievalEvaluator(BaseRetrievalEvaluator):
    retriever: "FAISSVectorStoreRetriever" = Field(..., description="FAISS Retriever instance")
    Print_Results: bool = Field(default=False, description="Whether to print retrieved results")

    model_config = ConfigDict(extra="forbid")  

    @classmethod
    def from_metric_names(
        cls,
        metric_names: List[str],
        retriever: "FAISSVectorStoreRetriever",
        Print_Results: bool = False,
        **kwargs: Any,
    ) -> "FAISSRetrievalEvaluator":
        metric_types = resolve_metrics(metric_names)
        metrics = [metric() for metric in metric_types]
        return cls(metrics=metrics, retriever=retriever, Print_Results=Print_Results, **kwargs)

    async def _aget_retrieved_ids_and_texts(
        self,
        query: str,
        mode: RetrievalEvalMode = RetrievalEvalMode.TEXT,
    ) -> Tuple[List[str], List[str]]:
        response = client.embeddings(prompt=query, model="mxbai-embed-large")
        query_embedding = response["embedding"]
        retrieved_docs = self.retriever._retrieve(query_embedding, top_k=15)
        
        # Conditionally print results
        if self.Print_Results:
            for doc in retrieved_docs:
                display_source_node(doc, source_length=1000)  # Directly use doc
        
        retrieved_ids = [doc.id_ for doc in retrieved_docs]  # doc, not doc.node
        retrieved_texts = [doc.text for doc in retrieved_docs]
        return retrieved_ids, retrieved_texts

    async def aevaluate(
        self,
        query: str,
        expected_ids: List[str],
        expected_texts: List[str] = [],
        mode: RetrievalEvalMode = RetrievalEvalMode.TEXT,
        **kwargs: Any,
    ) -> RetrievalEvalResult:
        retrieved_ids, retrieved_texts = await self._aget_retrieved_ids_and_texts(query, mode)
        metric_dict = {}
        
        for metric in self.metrics:
            # Call compute instead of evaluate
            result = metric.compute(
                query=query,
                expected_ids=expected_ids,
                retrieved_ids=retrieved_ids,
                expected_texts=expected_texts,
                retrieved_texts=retrieved_texts,
                **kwargs
            )
            metric_dict[metric.metric_name] = result  # Store the whole RetrievalMetricResult object
            print(f"{metric.metric_name}: {result.score}")  # Print each metric result
         
        # Return RetrievalEvalResult with all required fields
        return RetrievalEvalResult(
            query=query,
            retrieved_ids=retrieved_ids,
            retrieved_texts=retrieved_texts,  # Include retrieved texts
            expected_ids=expected_ids,         # Pass expected ids
            expected_texts=expected_texts,      # Include expected texts
            metric_dict=metric_dict            # Pass full RetrievalMetricResult objects
        )


In [42]:
nodes_embed = generate_embeddings(nodes,  client, "mxbai-embed-large")

100%|██████████| 12302/12302 [1:04:08<00:00,  3.20it/s]


## 5.C. FAISS Retreival Evaluation

In [43]:
indexer = FaissIndexer()

indexer.build_index(nodes)  
faiss_index = indexer.get_index()

retriever = FAISSVectorStoreRetriever(faiss_index=indexer.get_index(), documents=nodes_embed)

metrics = ["hit_rate", "mrr", "precision", "recall", "ap", "ndcg"]
evaluator = FAISSRetrievalEvaluator.from_metric_names(
    metric_names=metrics,
    retriever=retriever,
    Print_Results=True
)


sample_id, sample_query = list(qa_dataset.queries.items())[1]
sample_expected = qa_dataset.relevant_docs[sample_id]

print("Sample Problem:")
print(f"sample id: {sample_id}, text: {sample_query}")
print(f"sample exp: {sample_expected}")

print("\nRetrieval results:")

result = await evaluator.aevaluate(
    query=sample_query,
    expected_ids=sample_expected,
)

print("\n")
print(result)

Sample Problem:
sample id: 8d0ccead-fb73-4913-8964-e796cab9868b, text: What was the outcome of the issuance by the DPWH on June 5, 1961 of Administrative Regulation No. 1?
sample exp: ['247c50d0-ac9c-4bfd-bd76-366cd2c930aa']

Retrieval results:


**Node ID:** 2c4694fe-2012-4a39-83e7-23e62ce7a366<br>**Similarity:** 0.3319814205169678<br>**Text:** ' I . ' "' ' . '' (_ , ... ) int DPWH and DoTC !RR ofR.A. 8794 " ' '    ~ ... i .12 .. , . Establishment of the DPWH Road Program Office. Pursuant to the provisions of Section 9 of .R.A. 8794 which authorizes the DPWH Seci;-etary to undertake such structural and procedural improvements in the systems and agencies concerned as may be necessary to ensure the prudent, wise, effective and efficient utilization of the Special Funds, DPWH shall establish a Road   Program Office. The head of the Road Program Office shall be appointed by the DPWH Secretary. He may attend the meetings of the Board as a non-voting resource person. Staff of the Road Program Office would be drawn from the Bureau of Maintenance and other DPWH offices and units, and the private sector (subject to approval by the DPWH Secretary). The DPWH Secretary may also assign personnel, either on a temporary or permanent basis as the case may be, from other office and units ofDPWH. The functions, duties and responsibilities o...<br>

**Node ID:** 519cbf5c-c172-4eec-80a6-5dda3ef48a23<br>**Similarity:** 0.33920133113861084<br>**Text:** in their respective localit:i(:;.s: e.. Issue permits for the construction or renovation of buildings or establishments pursuant to the guidelines promulgated by the DPVVH in Section i 2 of the EVIDA; f. Ensure compliance of pubHc and private builclinqs and establishments \Nith Section 18 of the EVIDA; g. Issue necessary poncies or legislations regulating tt1e operations of EVs: F'rovidf)d, That it is in accordance with the EVIDA and EVIDA-IRR; and h. Provide designated loading and unloading stations for EVs. Section 16. Pow<~rs and Functions of the Department of Public Works and Highways {DPWH). In addition to its functions under existing laws and pursuant to Section 12 of the EVlDA, the DPWH shall establish guidelines on the following: a. Construction or lnstallation of EVCS and other related equiprnent in buildings ancl other establishments; and t). Issuance of permit5 for tho construction or renovation of buildin95 or establishment"s-&t;J;'.-  tJlWt~oN~;~if:;t+0n -l7 of thi:: EV...<br>

**Node ID:** 247c50d0-ac9c-4bfd-bd76-366cd2c930aa<br>**Similarity:** 0.34546053409576416<br>**Text:** Motor Vehicles. The Chief of the Division was called the Superintendent of Division of Motor Vehicles. Act No. 3992 was amended by Commonwealth Act Numbers 123, 548, 556, 652 and Republic Act Numbers 314, 587, and 2383. On June 2, 1945, Department Order No. 4 was issued by the Department of Public Works and Highways reorganizing the Division. This took effect after the liberation of the Philippines from the Japanese invasion. In 1947, Executive Order No. 94 was promulgated reorganizing the different executive departments, bureaus and offices. Under Section 82 of this E.O., the Division of Motor Vehicles was upgraded into the Motor Vehicles Office (MVO) with the category of a Bureau. The Chief of the MVO enjoyed the rights and privileges of a Bureau Director. During the fifties and early sixties, our country started undergoing rapid economic development. Industrialization advanced and as a consequence, more and better roads were constructed. The Filipino then realized the need for mo...<br>

**Node ID:** 181e1e9d-fe2e-4cea-aaef-a1d46ad2bbce<br>**Similarity:** 0.3541192412376404<br>**Text:** days prior to date of registration; WHEREAS, Under the Administrative Code of 1987 as amended, the DOTr shall be the primary policy, planning, programming, coordinating , implementing, regulating and administrative entity of the Executive Branch of the government in the promotion, development and regulation of dependable and coordinated networks of transportation systems as well as in the fast, safe, efficient and reliable postal, transportation services; WHEREAS, Executive Order No. 125-A mandates the DOTr to establish and prescribe rules and regulations for the inspection and registration of air and land transportation facilities, such as motor vehicles, tri mobiles, railways, and aircraft; WHEREAS, Section 1 of Republic Act No. 7718 (An Act Authorizing the Financing, Construction, Operation and Maintenance of Infrastructure Projects by the Private Sector, and for Other Purposes) mandates that it is the declared policy of the State to recognize the 2 ,.--~.::~~:1~ -- ~::~r :1~-   ...<br>

**Node ID:** 7a07191e-9ecf-4c20-82a1-a313b01b5f3b<br>**Similarity:** 0.3610658049583435<br>**Text:** / )' . . ,, . ' . .. ,  .... t OPWH and OoTC IRR.ofR.A. 8794   ' . t ' Article II. Provision'i Pertaining to DPWH \ . 19. Pursuant to Section 9 of R.A. 8794, the following provisions pertain to the functions, duties and responsibilities of DPWH with respect to the management and implementation of the Special   Road Support Fund, the Special Local Road Fund, and the Special Road Safety Fund. I I. Definition of Terms (a) National Roads - roads which have been classified through Laws, Executive Orders and Department Orders, as falling under the responsibility of the national government, and are further classified as either national primary (arterial) or national secondary roads. (b) National Primary Roads - roads which comprise the primary (arterial) h_ighway system of the country which interconn~cts the regions, provinces and major urban centers. The system is composed of a north-south backbone, east-west laterals and other roads of strategic importance. These are roads identified as ...<br>

**Node ID:** 5cdfae5a-4bd1-45ee-8f08-9f3a6326e70a<br>**Similarity:** 0.36325812339782715<br>**Text:** .. ' ., .  .. r . , '\ ' \ t \ I   ' \   ! ~ r   .. '  . . . c,.. \ '\. , ., ' . I   ' -.- Joint DPWH and Do TC IRR of R.A. 8794 the Philippine Clean Air Act of 1999 and its Implementing Rules and Regulations; and (h) To establish and implement the appropriate structural and procedural improvements to carry out these policies. 2. Establishment of Special Accounts. Pursuant to Section 7 of R.A. 8794, there shall be established four separate Special Trust Accounts in the National Treasury. DPWH shall, through the Department of Budget and Management (DBM), establish one separate Special Trust Account for each of the Special Road S~pport Fund, Special Local Road Fund and Special Road Safety Fund. Do TC shall, through DBM, establish one Special Trust Account for the Special Vehicle Pollution Control Fund. 3. Establishment and Composition of the Road Board. Pursuant to Section 7 of R.A. 8794, there has been established a Road Board (the Board) to manage in a prudent and efficient manner t...<br>

**Node ID:** b24bd770-f8c3-474f-8609-2919a1f0fb4c<br>**Similarity:** 0.364032506942749<br>**Text:** \. l.\   .;: ._ ,..-"j  " ~i ~1 , I ..,..--, ~....,_ _I ,- ,   ! ' ,/' i . )\ L ~ ~   . < . --<! 1 # l t. " ' .> '\?.E~..,AR,lfME~T OF PUBLIC WORKS . , .Go: AND HIGHWAYS DEPARTMENT OF TRANSPORTATION AND COMMUNICATIONS l ~ ' '. ' / IMPLEMENTING RULES AND REGULATIONS OF REPUBLIC ACT NO. 8794 (AN ACT IMPOSING A MOTOR VEHICLE USER'S CHARGE ON OWNERS OF ALL TYPES OF MOTOR VEHICLES AND FOR OTHER PURPOSES) The following joint Implementing Rules and Regulations, herein after referred to as IRR, are hereby issued by the Department of Public Works and Highways, hereinafter referred to as DPWH, and the Department of Transportation and Communications, hereinafter referred to as DoTC, pursuant to the provisions of Section 9 of Republic Act (R.A.) No. 8794, which provides, among others, for the imposition of a motor vehicle user's charge as well as the creation of a Board. Article I. Common Provisions 1. Declaration of Policy. Pursuant to Section 2 of R.A. 8794, it is the policy of the State to p...<br>

**Node ID:** 2bad6f02-0050-4b3e-9d69-e54085b11c62<br>**Similarity:** 0.36821043491363525<br>**Text:** Section 1. Administration and Enforcement These Implementing Rules and _Regulations shall be administered by the.,be- _ partment and/or its authorized representatives or through other government , agencies designated or deputized by the qepariment: or by this act, executive,. orders or memorandum circulars, and others. Section 2. Rules and Regulations of other Government Agencies The rules and regulations issued by other government agencies and. instru ~ mentalities for the prevention and/or abatement of pollution not consistent with this Act shall supplement the rules and regulations issu~d by the Department through the Bureau. Section 3. Authentication with Official Seal All decisions, orders and appropriate legal documents hereinafter pro_i'nul- gated shall be issued and authenticated with the official seal of the Department or other government agencies designated by this A~t. Section 4. Jurisdiction The Department through the Bureau shall have exclusive and original jurisdic- ti...<br>

**Node ID:** af465d0b-a998-4f80-8d67-c28c228261c1<br>**Similarity:** 0.3686549663543701<br>**Text:** Department of Finance, Budget and management, an the Transportation and Communication, as ex officio members. The remaining three (3) members shall come from transport and motorist organization, which have been in existence and active for the last five (5) years prior to this Act. They shall be appointed for the term of two (2) years each by the President of the Philippines upon the recommendation of the secretaries of the DPWH and the DOTC. Section 8. Status of the Special Funds. - The four (4) special funds established under this Act shall be distinct and separate from and in addition in any appropriation authorized and granted yearly to the DPWH and the DOTC. To cover expenditures for the identified objects of expenditures under this Act. Congress shall continue to appropriate an amount in the General appropriations Act for road maintenance of the DPWH: Provided, however, That any savings for each year out of such appropriation shall revert to the General Fund. Any savings from t...<br>

**Node ID:** f0a91c16-1fa1-42c1-9005-c3e3cdc6d699<br>**Similarity:** 0.3722567558288574<br>**Text:** Republic ofthe Philippines DEPARTMENT OF TRANSPORTATION SUBJECT + AMENDED GUIDELINES GOVERNING THE USE OR [ATTACHMENT OF SIRENS, BELLS, HORNS, WHISTLES (OR SIMILAR GADGETS THAT EMIT EXCEPTIONALLY [LOUD OR STARTLING SOUNDS AND DOME LIGHTS AND  OTHER SIMILAR SIGNALING OR FLASHING DEVICES ON MOTOR VEHICLES DATE 18 pprn 2006 WHEREAS, the Deparment of Tonsporaton (007?) the primary pokey, plannog. programming, coordinating, implementing,  and administrative enity of the  administer and enforce al laws, rules and regulation relative to transportation, WHEREAS, Presidential Decree No. 96 (PO 6), directed the Commissioner ofthe Land  Transportation to draw and promulgate rules and regulations necessary to give effect to  THE USE OR ATTACHMENT OF SIRENS, BELLS, HORNS, WHISTLES OR SIMILAR GADGETS WHEREAS, adhering to the said directv, the Bureau of Land Transportation (now the Land Transportation fice) issued Administrative Order No 1, series 1973 which set the  ules in regulations set forth ...<br>

**Node ID:** f13de0a0-e65d-455b-b0b4-ae0d9c1f5500<br>**Similarity:** 0.37257254123687744<br>**Text:** this Act and every five (5) years thereafter, review the applicability and enforcement of all foregoing pecuniary penalties and shall initiate amendment and/or upgrade the same as may be necessary, subject to the approval of the Secretary of the DOTC.1 wphi 1 Section 17. Implementing Rules and Regulations.   The DOTC, the DOH and the NAPOLCOM shall, within three (3) months from the effectivity of this Act, jointly promulgate the necessary implementing rules and regulations to carry out the provisions of this Act. Section 18. Separability Clause.   If, for any reason, any part or provision of this Act is declared invalid, such declaration shall not affect the other provisions of this Act. Section 19. Repealing Clause.   Subparagraph (f), Section 56, Article 1 of Republic Act No. 4136, otherwise known as the "Land Transportation and Traffic Code", as amended; subparagraph (f), Section 5 of Republic Act No. 7924, otherwise known as "An Act Creating the Metropolitan Manila Development A...<br>

**Node ID:** 49cb191e-a234-4931-b2a6-ab2d1e622377<br>**Similarity:** 0.37414413690567017<br>**Text:** REPU13LICOFTHE PtllLIPPINES ! U.P. LAW CENTER~ ! OFFICE of lhe NATIONAL AOMINISTRATIV.E REGISH Adminislra1 1 v~ Rules 3nd Regula11ons DEPARTMENT OF TRANSPORTATION ill AUG 2 4 2021 IB DEPARTMENT ORDER NO. 2 O 2 O;.. O 2 j 1   ~P.2=: SUBJECT GUIDELINES FOR THE SCRAPPING OF OLD PUBLIC UTILITY VEHICLE UNITS (PUVS) UNDER THE DEPARTMENT ORDER 2017-011 OTHERWISE KNOWN AS THE OMNIBUS FHANCHISING GUIDELINES DATE Li NOVEMBER 2020 WHEREAS, The Department of Transportation (DOTr), as the primary policy, planning, programming, coordinating, and implementing agency of the Executive branch of the government, in the regulation of dependable, safe, and reliable transportation system and services, is mandated by laws to ensure the roadworthiness of vehicles operated in the streets and higtnvays of the country; WHEREAS, the Land Transportation Franchising and Regulatory Board (LTFH.13), as a~1 agency under the administrative supervision and control of the DOTr, is mandated to promulgate, administer, e...<br>

**Node ID:** 6ec6e78e-0818-4f47-a34b-bb08f30c3354<br>**Similarity:** 0.3777167797088623<br>**Text:** WHEREAS, Section 21 (c), Article 4 of Republic Act No. 8749 provides that the DOTC (now DOTr), together with the Department of Trade and Industry, and the Department of Environment and Natural Resources shall establish the procedures for the inspection of motor vehicles and the testing of their emissions; WHEREAS, the "Implementing Rules and Regulations of the Philippine Clean Air Act of 1999" provides that all private in-use motor vehicles and vehicles with updated/ enhanced engine whose chassis are pre-registered with L TO will only be allowed renewal of annual registration when, upon inspection by the LTO or other authorized private Motor Vehicle Inspection Station; WHEREAS, the "Implementing Rules and Regulations of the Philippine Clean Air Act of 1999" also provided that the DOTr shall conduct the vehicle test utilizing the Motor Vehicle Inspection System (MVIS) or its duly authorized and accredited inspection centers consistent with Republic Act No. 7394 or the "Consumer Act o...<br>

**Node ID:** 4dad5a6a-2c43-4184-a91e-a5273d320885<br>**Similarity:** 0.37835150957107544<br>**Text:** comfort anxl convenience to parsons and property in their charges as well as the safety of  persons and property within thelr areas of operations; WHEREAS, Section 4 ( ) of Republic Act (RA) No, 10930 provides thatthe Dej t 10" provides that the Department, in coordination with the LTFRB ane! other concerned agencies and private stakeholders, aggressively and regularly conduct a nationwide information, education and  communication campaign on road safety, including the list of measures implemented  pursuant to and violations punishable hereunder; WHEREAS, RA 10930 likewise provides that the Department, withthe assistance of LTFRB, shall promulgate the implementing rules and regulations necessary to ensure its effective implementation; WHEREAS, Section 5 (b) of EO 202 provides that the LTFRB has the power toissue, amend, revise, suspend or cancel Certificates of Public Convenience (CPC) or permits authorizing, the operation of publi land transportation services provilel by motorized ...<br>

**Node ID:** 97deaa8a-4a92-4e9b-8de9-4884f8ca8cc9<br>**Similarity:** 0.37851130962371826<br>**Text:** Law Enforcement Agency Involved in Traffic Administration DOTr DOTr is the primary policy, planning, programming, coordinating, implementing, regulating and administrative entity of the executive branch of the government in the promotion, development, regulation of a dependable and coordinated network of transportation. LTFRB Certificate of Public Convenience (franchising of public utility motor vehicle) including enforcement of the Public Service Law (CA 146). PNP-HPG Enforcement of Republic Act No. 4136 MMDA/LGUs Responsible for traffic direction and control within Metro Manila and its Localities DPWH Responsible for road engineering and infrastructure as well as construction, maintenance and repairs of roads including installation of traffic signs and signals and pavement markings. Relationship with Other Agencies<br>

hit_rate: 1.0
mrr: 0.3333333333333333
precision: 0.06666666666666667
recall: 1.0
ap: 0.3333333333333333
ndcg: 0.5


Query: What was the outcome of the issuance by the DPWH on June 5, 1961 of Administrative Regulation No. 1?
Metrics: {'hit_rate': 1.0, 'mrr': 0.3333333333333333, 'precision': 0.06666666666666667, 'recall': 1.0, 'ap': 0.3333333333333333, 'ndcg': 0.5}



In [44]:
  
evaluator = FAISSRetrievalEvaluator.from_metric_names(
    metric_names=metrics,
    retriever=retriever,
    Print_Results=False
)

eval_results = await evaluator.aevaluate_dataset(qa_dataset)


def display_results(name, eval_results):
    """Display results from evaluate."""

    metric_dicts = []
    for eval_result in eval_results:
        metric_dict = eval_result.metric_vals_dict
        metric_dicts.append(metric_dict)

    full_df = pd.DataFrame(metric_dicts)

    columns = {
        "retrievers": [name],
        **{k: [full_df[k].mean()] for k in metrics},
    }

    metric_df = pd.DataFrame(columns)

    return metric_df


display_results("top-2 eval", eval_results)

hit_rate: 1.0
mrr: 0.2
precision: 0.06666666666666667
recall: 1.0
ap: 0.2
ndcg: 0.38685280723454163
hit_rate: 1.0
mrr: 0.3333333333333333
precision: 0.06666666666666667
recall: 1.0
ap: 0.3333333333333333
ndcg: 0.5
hit_rate: 1.0
mrr: 1.0
precision: 0.06666666666666667
recall: 1.0
ap: 1.0
ndcg: 1.0
hit_rate: 1.0
mrr: 1.0
precision: 0.06666666666666667
recall: 1.0
ap: 1.0
ndcg: 1.0
hit_rate: 1.0
mrr: 1.0
precision: 0.06666666666666667
recall: 1.0
ap: 1.0
ndcg: 1.0
hit_rate: 0.0
mrr: 0.0
precision: 0.0
recall: 0.0
ap: 0.0
ndcg: 0.0
hit_rate: 1.0
mrr: 0.5
precision: 0.06666666666666667
recall: 1.0
ap: 0.5
ndcg: 0.6309297535714575
hit_rate: 1.0
mrr: 0.25
precision: 0.06666666666666667
recall: 1.0
ap: 0.25
ndcg: 0.43067655807339306
hit_rate: 1.0
mrr: 1.0
precision: 0.06666666666666667
recall: 1.0
ap: 1.0
ndcg: 1.0
hit_rate: 1.0
mrr: 1.0
precision: 0.06666666666666667
recall: 1.0
ap: 1.0
ndcg: 1.0
hit_rate: 1.0
mrr: 0.5
precision: 0.06666666666666667
recall: 1.0
ap: 0.5
ndcg: 0.630929753571457

Unnamed: 0,retrievers,hit_rate,mrr,precision,recall,ap,ndcg
0,top-2 eval,0.711864,0.366524,0.047458,0.711864,0.366524,0.445828


In [45]:
#indexing
index = FaissIndexer()
index.build_index(nodes_embed)
faiss_index = index.get_index()

faiss_retriever = FAISSVectorStoreRetriever(faiss_index=faiss_index,documents=nodes_embed)

## 5.D Sparse Embedding via BM25

In [46]:
bm25_retriever = BM25Retriever.from_defaults(
   nodes=nodes,
   similarity_top_k=5,
   stemmer=Stemmer.Stemmer("english"),
   language="english",
)

## 5.E Hybrid Retrieval via Reciprocal Rank

In [47]:
def hybrid_embedding(results: dict, top_k: int):
    x = QueryFusionRetriever
    ranked_results = QueryFusionRetriever._reciprocal_rerank_fusion(x, results)
    return ranked_results[:top_k]

## 5.F Hybrid Retrieval Evaluator

In [48]:
from typing import List, Tuple, Any, Dict
from pydantic import Field, ConfigDict
from llama_index.core.evaluation.retrieval.base import (
    BaseRetrievalEvaluator,
    RetrievalEvalMode,
    RetrievalEvalResult
)
from llama_index.core.evaluation.retrieval.metrics import resolve_metrics
from llama_index.core.response.notebook_utils import display_source_node

class HybridRetrievalEvaluator(BaseRetrievalEvaluator):
    faiss_retriever: "FAISSVectorStoreRetriever" = Field(..., description="FAISS Retriever instance")
    bm25_retriever: "BM25Retriever" = Field(..., description="BM25 Retriever instance")
    Print_Results: bool = Field(default=False, description="Whether to print retrieved results")

    model_config = ConfigDict(extra="forbid")  

    @classmethod
    def from_metric_names(
        cls,
        metric_names: List[str],
        faiss_retriever: "FAISSVectorStoreRetriever",
        bm25_retriever: "BM25Retriever",
        Print_Results: bool = False,
        **kwargs: Any,
    ) -> "HybridRetrievalEvaluator":
        metric_types = resolve_metrics(metric_names)
        metrics = [metric() for metric in metric_types]
        return cls(metrics=metrics, faiss_retriever=faiss_retriever, bm25_retriever=bm25_retriever, Print_Results=Print_Results, **kwargs)

    async def _aget_retrieved_ids_and_texts(
        self,
        query: str,
        mode: RetrievalEvalMode = RetrievalEvalMode.TEXT,
        top_k: int = 15
    ) -> Tuple[List[str], List[str]]:
        response = client.embeddings(prompt=query, model="mxbai-embed-large")
        query_embedding = response["embedding"]

        # FAISS retrieval
        faiss_docs = self.faiss_retriever._retrieve(query_embedding, top_k=top_k)

        # BM25 retrieval
        bm25_docs = self.bm25_retriever.retrieve(query)

        # Combine results
        results = {'faiss': faiss_docs, 'bm25': bm25_docs}
        ranked_results = QueryFusionRetriever._reciprocal_rerank_fusion(None, results)
        ranked_results = ranked_results[:top_k]

        # Optionally display results
        if self.Print_Results:
            for doc in ranked_results:
                display_source_node(doc, source_length=1000)

        retrieved_ids = [doc.id_ for doc in ranked_results]
        retrieved_texts = [doc.text for doc in ranked_results]
        return retrieved_ids, retrieved_texts

    async def aevaluate(
        self,
        query: str,
        expected_ids: List[str],
        expected_texts: List[str] = [],
        mode: RetrievalEvalMode = RetrievalEvalMode.TEXT,
        **kwargs: Any,
    ) -> RetrievalEvalResult:
        retrieved_ids, retrieved_texts = await self._aget_retrieved_ids_and_texts(query, mode)
        metric_dict = {}
        
        for metric in self.metrics:
            # Call compute instead of evaluate
            result = metric.compute(
                query=query,
                expected_ids=expected_ids,
                retrieved_ids=retrieved_ids,
                expected_texts=expected_texts,
                retrieved_texts=retrieved_texts,
                **kwargs
            )
            metric_dict[metric.metric_name] = result  # Store the whole RetrievalMetricResult object
            print(f"{metric.metric_name}: {result.score}")  # Print each metric result
        
        # Return RetrievalEvalResult with all required fields
        return RetrievalEvalResult(
            query=query,
            retrieved_ids=retrieved_ids,
            retrieved_texts=retrieved_texts,
            expected_ids=expected_ids,
            expected_texts=expected_texts,
            metric_dict=metric_dict
        )


## 5.G BM25 Retreival Evaluation

In [49]:
evaluator = HybridRetrievalEvaluator.from_metric_names(
    metric_names=metrics,
    faiss_retriever=faiss_retriever,
    bm25_retriever=bm25_retriever,
    Print_Results=True  # Toggle as needed
)

sample_id, sample_query = list(qa_dataset.queries.items())[1]
sample_expected = qa_dataset.relevant_docs[sample_id]

print("Sample Problem:")
print(f"sample id: {sample_id}, text: {sample_query}")
print(f"sample exp: {sample_expected}")

print("\nRetrieval results:")

result = await evaluator.aevaluate(
    query=sample_query,
    expected_ids=sample_expected
)

print("\n")
print(result)

Sample Problem:
sample id: 8d0ccead-fb73-4913-8964-e796cab9868b, text: What was the outcome of the issuance by the DPWH on June 5, 1961 of Administrative Regulation No. 1?
sample exp: ['247c50d0-ac9c-4bfd-bd76-366cd2c930aa']

Retrieval results:


**Node ID:** 247c50d0-ac9c-4bfd-bd76-366cd2c930aa<br>**Similarity:** 0.030555555555555555<br>**Text:** Motor Vehicles. The Chief of the Division was called the Superintendent of Division of Motor Vehicles. Act No. 3992 was amended by Commonwealth Act Numbers 123, 548, 556, 652 and Republic Act Numbers 314, 587, and 2383. On June 2, 1945, Department Order No. 4 was issued by the Department of Public Works and Highways reorganizing the Division. This took effect after the liberation of the Philippines from the Japanese invasion. In 1947, Executive Order No. 94 was promulgated reorganizing the different executive departments, bureaus and offices. Under Section 82 of this E.O., the Division of Motor Vehicles was upgraded into the Motor Vehicles Office (MVO) with the category of a Bureau. The Chief of the MVO enjoyed the rights and privileges of a Bureau Director. During the fifties and early sixties, our country started undergoing rapid economic development. Industrialization advanced and as a consequence, more and better roads were constructed. The Filipino then realized the need for mo...<br>

**Node ID:** 97deaa8a-4a92-4e9b-8de9-4884f8ca8cc9<br>**Similarity:** 0.016666666666666666<br>**Text:** Law Enforcement Agency Involved in Traffic Administration DOTr DOTr is the primary policy, planning, programming, coordinating, implementing, regulating and administrative entity of the executive branch of the government in the promotion, development, regulation of a dependable and coordinated network of transportation. LTFRB Certificate of Public Convenience (franchising of public utility motor vehicle) including enforcement of the Public Service Law (CA 146). PNP-HPG Enforcement of Republic Act No. 4136 MMDA/LGUs Responsible for traffic direction and control within Metro Manila and its Localities DPWH Responsible for road engineering and infrastructure as well as construction, maintenance and repairs of roads including installation of traffic signs and signals and pavement markings. Relationship with Other Agencies<br>

**Node ID:** 4dad5a6a-2c43-4184-a91e-a5273d320885<br>**Similarity:** 0.01639344262295082<br>**Text:** comfort anxl convenience to parsons and property in their charges as well as the safety of  persons and property within thelr areas of operations; WHEREAS, Section 4 ( ) of Republic Act (RA) No, 10930 provides thatthe Dej t 10" provides that the Department, in coordination with the LTFRB ane! other concerned agencies and private stakeholders, aggressively and regularly conduct a nationwide information, education and  communication campaign on road safety, including the list of measures implemented  pursuant to and violations punishable hereunder; WHEREAS, RA 10930 likewise provides that the Department, withthe assistance of LTFRB, shall promulgate the implementing rules and regulations necessary to ensure its effective implementation; WHEREAS, Section 5 (b) of EO 202 provides that the LTFRB has the power toissue, amend, revise, suspend or cancel Certificates of Public Convenience (CPC) or permits authorizing, the operation of publi land transportation services provilel by motorized ...<br>

**Node ID:** a98a6958-593e-4cee-a212-44d808936b60<br>**Similarity:** 0.01639344262295082<br>**Text:** '-.. ' ... . , \' Jint DPWH and DoTC IRR of R.A. 8794 . (f) To review and update the standards and procedures required of all local governments in their road maintenance operations, and to submit the findings and recommendations to the Board for appropriate action; and to provide technical assistance to local governments in preparing an inventory of the existing local road network and their conditions. (g) To coordinate its program of work at all times with such other units within and outside of DPWH und~rtaking activities which may be related to road maintenance and road safety. (h) To maintain the separate and distinct nature and accountabilities of monies received from each of the Special Road Funds, namely, the Special Road Support Fund, the Special Local Road Fund and the Special Road Safety Fund. The regular maintenance (GAA) fund shall likewise remain separate and distinct in nature and accountability from the Special Road Funds. (i) To expedite implementation of the approved...<br>

**Node ID:** 6ec6e78e-0818-4f47-a34b-bb08f30c3354<br>**Similarity:** 0.016129032258064516<br>**Text:** WHEREAS, Section 21 (c), Article 4 of Republic Act No. 8749 provides that the DOTC (now DOTr), together with the Department of Trade and Industry, and the Department of Environment and Natural Resources shall establish the procedures for the inspection of motor vehicles and the testing of their emissions; WHEREAS, the "Implementing Rules and Regulations of the Philippine Clean Air Act of 1999" provides that all private in-use motor vehicles and vehicles with updated/ enhanced engine whose chassis are pre-registered with L TO will only be allowed renewal of annual registration when, upon inspection by the LTO or other authorized private Motor Vehicle Inspection Station; WHEREAS, the "Implementing Rules and Regulations of the Philippine Clean Air Act of 1999" also provided that the DOTr shall conduct the vehicle test utilizing the Motor Vehicle Inspection System (MVIS) or its duly authorized and accredited inspection centers consistent with Republic Act No. 7394 or the "Consumer Act o...<br>

**Node ID:** ae87ffda-97e4-41de-abf5-2acb565b1062<br>**Similarity:** 0.016129032258064516<br>**Text:** .-, t1' M~~~ Secreta1y General House of Representatives 0  clRTJFIED COPY:<br>

**Node ID:** 49cb191e-a234-4931-b2a6-ab2d1e622377<br>**Similarity:** 0.015873015873015872<br>**Text:** REPU13LICOFTHE PtllLIPPINES ! U.P. LAW CENTER~ ! OFFICE of lhe NATIONAL AOMINISTRATIV.E REGISH Adminislra1 1 v~ Rules 3nd Regula11ons DEPARTMENT OF TRANSPORTATION ill AUG 2 4 2021 IB DEPARTMENT ORDER NO. 2 O 2 O;.. O 2 j 1   ~P.2=: SUBJECT GUIDELINES FOR THE SCRAPPING OF OLD PUBLIC UTILITY VEHICLE UNITS (PUVS) UNDER THE DEPARTMENT ORDER 2017-011 OTHERWISE KNOWN AS THE OMNIBUS FHANCHISING GUIDELINES DATE Li NOVEMBER 2020 WHEREAS, The Department of Transportation (DOTr), as the primary policy, planning, programming, coordinating, and implementing agency of the Executive branch of the government, in the regulation of dependable, safe, and reliable transportation system and services, is mandated by laws to ensure the roadworthiness of vehicles operated in the streets and higtnvays of the country; WHEREAS, the Land Transportation Franchising and Regulatory Board (LTFH.13), as a~1 agency under the administrative supervision and control of the DOTr, is mandated to promulgate, administer, e...<br>

**Node ID:** df60fb1d-cc36-4e75-ae21-1f29e1db8565<br>**Similarity:** 0.015873015873015872<br>**Text:** .. - If for any reason any section or provision of this Act fa declared unconstitutional, other provisions hereof which are not affected thereby shall continue to be in foll force and effect. SEC. 13. Repealing Clause. -All l~ws, decrees, or rules and regulations which are inconsistent with or contrary to the provisions of this Act are hereby amended or repealed. SEC. 14. Effectiuity. -This Act shall take effect fifteen (15) days after its publication ~n the Official Gazette or in two (2) national newspapers of general circulation. Approved. ~~,.~ - FELICIANO~~  Speaker of the House of Representatives . 5 This Act which is a consolidation of. Senate Bill No. 2488 and House Bill No. 4462 was finally passed by the Senate and   the House of Representatives on May 27, 2Dl5. .~ tf M~~~ Se_cretary General House. of Representatives Approved: l'JOC 2~l' ..   ~ '~ BE:N10Iro s. '.A..QUINO n1 . President of t e Philippines ~~ 0 1 11iioorfl 1 liiu 1namlm~i&11   PNOYOF649<br>

**Node ID:** f13de0a0-e65d-455b-b0b4-ae0d9c1f5500<br>**Similarity:** 0.015625<br>**Text:** this Act and every five (5) years thereafter, review the applicability and enforcement of all foregoing pecuniary penalties and shall initiate amendment and/or upgrade the same as may be necessary, subject to the approval of the Secretary of the DOTC.1 wphi 1 Section 17. Implementing Rules and Regulations.   The DOTC, the DOH and the NAPOLCOM shall, within three (3) months from the effectivity of this Act, jointly promulgate the necessary implementing rules and regulations to carry out the provisions of this Act. Section 18. Separability Clause.   If, for any reason, any part or provision of this Act is declared invalid, such declaration shall not affect the other provisions of this Act. Section 19. Repealing Clause.   Subparagraph (f), Section 56, Article 1 of Republic Act No. 4136, otherwise known as the "Land Transportation and Traffic Code", as amended; subparagraph (f), Section 5 of Republic Act No. 7924, otherwise known as "An Act Creating the Metropolitan Manila Development A...<br>

**Node ID:** ec3b0b78-9b66-433b-bed1-081263e34660<br>**Similarity:** 0.015625<br>**Text:** S. No. 2488 H. No. 44 62 21\r.publit of fqe Jl1Hippines filon2rrss of  4r J4Hippinrs cfrJHefro cfrJHauila Begun and held in Metro Manila, on Monday, the twenty-eighth day of July, two thousand fourteen .   l REPUBLIC Acr No. 10 6 6 6] AN ACT PROVIDING FOR THE SAFETY OF CHILDREN ABOARD MOTORCYLES Be ii enacled by tire Senate and House of Representatives of the Philippines in Congress assembled: SECTION l. Short Title. - This Act shall be known as "Children's Safety on Motorcycles Act of 2015". SEC. 2. Declaration of Policy. -It is the policy of the State to defend the right of children to assistance, including proper care and nutrition, and special protection from all forms of neglect, abuse, cruelty, exploitation, and other conditions prejudicial to their developmem. Towards this end, the State shall pursue a more proactive and preventive approach to secui:e the safety of passengers, especially children, by regulating the operation of motorcycles along roads and highways.<br>

**Node ID:** f0a91c16-1fa1-42c1-9005-c3e3cdc6d699<br>**Similarity:** 0.015384615384615385<br>**Text:** Republic ofthe Philippines DEPARTMENT OF TRANSPORTATION SUBJECT + AMENDED GUIDELINES GOVERNING THE USE OR [ATTACHMENT OF SIRENS, BELLS, HORNS, WHISTLES (OR SIMILAR GADGETS THAT EMIT EXCEPTIONALLY [LOUD OR STARTLING SOUNDS AND DOME LIGHTS AND  OTHER SIMILAR SIGNALING OR FLASHING DEVICES ON MOTOR VEHICLES DATE 18 pprn 2006 WHEREAS, the Deparment of Tonsporaton (007?) the primary pokey, plannog. programming, coordinating, implementing,  and administrative enity of the  administer and enforce al laws, rules and regulation relative to transportation, WHEREAS, Presidential Decree No. 96 (PO 6), directed the Commissioner ofthe Land  Transportation to draw and promulgate rules and regulations necessary to give effect to  THE USE OR ATTACHMENT OF SIRENS, BELLS, HORNS, WHISTLES OR SIMILAR GADGETS WHEREAS, adhering to the said directv, the Bureau of Land Transportation (now the Land Transportation fice) issued Administrative Order No 1, series 1973 which set the  ules in regulations set forth ...<br>

**Node ID:** af465d0b-a998-4f80-8d67-c28c228261c1<br>**Similarity:** 0.015151515151515152<br>**Text:** Department of Finance, Budget and management, an the Transportation and Communication, as ex officio members. The remaining three (3) members shall come from transport and motorist organization, which have been in existence and active for the last five (5) years prior to this Act. They shall be appointed for the term of two (2) years each by the President of the Philippines upon the recommendation of the secretaries of the DPWH and the DOTC. Section 8. Status of the Special Funds. - The four (4) special funds established under this Act shall be distinct and separate from and in addition in any appropriation authorized and granted yearly to the DPWH and the DOTC. To cover expenditures for the identified objects of expenditures under this Act. Congress shall continue to appropriate an amount in the General appropriations Act for road maintenance of the DPWH: Provided, however, That any savings for each year out of such appropriation shall revert to the General Fund. Any savings from t...<br>

**Node ID:** 2bad6f02-0050-4b3e-9d69-e54085b11c62<br>**Similarity:** 0.014925373134328358<br>**Text:** Section 1. Administration and Enforcement These Implementing Rules and _Regulations shall be administered by the.,be- _ partment and/or its authorized representatives or through other government , agencies designated or deputized by the qepariment: or by this act, executive,. orders or memorandum circulars, and others. Section 2. Rules and Regulations of other Government Agencies The rules and regulations issued by other government agencies and. instru ~ mentalities for the prevention and/or abatement of pollution not consistent with this Act shall supplement the rules and regulations issu~d by the Department through the Bureau. Section 3. Authentication with Official Seal All decisions, orders and appropriate legal documents hereinafter pro_i'nul- gated shall be issued and authenticated with the official seal of the Department or other government agencies designated by this A~t. Section 4. Jurisdiction The Department through the Bureau shall have exclusive and original jurisdic- ti...<br>

**Node ID:** b24bd770-f8c3-474f-8609-2919a1f0fb4c<br>**Similarity:** 0.014705882352941176<br>**Text:** \. l.\   .;: ._ ,..-"j  " ~i ~1 , I ..,..--, ~....,_ _I ,- ,   ! ' ,/' i . )\ L ~ ~   . < . --<! 1 # l t. " ' .> '\?.E~..,AR,lfME~T OF PUBLIC WORKS . , .Go: AND HIGHWAYS DEPARTMENT OF TRANSPORTATION AND COMMUNICATIONS l ~ ' '. ' / IMPLEMENTING RULES AND REGULATIONS OF REPUBLIC ACT NO. 8794 (AN ACT IMPOSING A MOTOR VEHICLE USER'S CHARGE ON OWNERS OF ALL TYPES OF MOTOR VEHICLES AND FOR OTHER PURPOSES) The following joint Implementing Rules and Regulations, herein after referred to as IRR, are hereby issued by the Department of Public Works and Highways, hereinafter referred to as DPWH, and the Department of Transportation and Communications, hereinafter referred to as DoTC, pursuant to the provisions of Section 9 of Republic Act (R.A.) No. 8794, which provides, among others, for the imposition of a motor vehicle user's charge as well as the creation of a Board. Article I. Common Provisions 1. Declaration of Policy. Pursuant to Section 2 of R.A. 8794, it is the policy of the State to p...<br>

**Node ID:** 5cdfae5a-4bd1-45ee-8f08-9f3a6326e70a<br>**Similarity:** 0.014492753623188406<br>**Text:** .. ' ., .  .. r . , '\ ' \ t \ I   ' \   ! ~ r   .. '  . . . c,.. \ '\. , ., ' . I   ' -.- Joint DPWH and Do TC IRR of R.A. 8794 the Philippine Clean Air Act of 1999 and its Implementing Rules and Regulations; and (h) To establish and implement the appropriate structural and procedural improvements to carry out these policies. 2. Establishment of Special Accounts. Pursuant to Section 7 of R.A. 8794, there shall be established four separate Special Trust Accounts in the National Treasury. DPWH shall, through the Department of Budget and Management (DBM), establish one separate Special Trust Account for each of the Special Road S~pport Fund, Special Local Road Fund and Special Road Safety Fund. Do TC shall, through DBM, establish one Special Trust Account for the Special Vehicle Pollution Control Fund. 3. Establishment and Composition of the Road Board. Pursuant to Section 7 of R.A. 8794, there has been established a Road Board (the Board) to manage in a prudent and efficient manner t...<br>

hit_rate: 1.0
mrr: 1.0
precision: 0.06666666666666667
recall: 1.0
ap: 1.0
ndcg: 1.0


Query: What was the outcome of the issuance by the DPWH on June 5, 1961 of Administrative Regulation No. 1?
Metrics: {'hit_rate': 1.0, 'mrr': 1.0, 'precision': 0.06666666666666667, 'recall': 1.0, 'ap': 1.0, 'ndcg': 1.0}



In [50]:
evaluator = HybridRetrievalEvaluator.from_metric_names(
    metric_names=metrics,
    faiss_retriever=faiss_retriever,
    bm25_retriever=bm25_retriever,
    Print_Results=False  
)

eval_results = await evaluator.aevaluate_dataset(qa_dataset)

def display_results(name, eval_results):
    """Display results from evaluate."""

    metric_dicts = []
    for eval_result in eval_results:
        metric_dict = eval_result.metric_vals_dict
        metric_dicts.append(metric_dict)

    full_df = pd.DataFrame(metric_dicts)

    columns = {
        "retrievers": [name],
        **{k: [full_df[k].mean()] for k in metrics},
    }

    metric_df = pd.DataFrame(columns)

    return metric_df

display_results("top-2 eval", eval_results)


hit_rate: 1.0
mrr: 0.5
precision: 0.06666666666666667
recall: 1.0
ap: 0.5
ndcg: 0.6309297535714575
hit_rate: 1.0
mrr: 1.0
precision: 0.06666666666666667
recall: 1.0
ap: 1.0
ndcg: 1.0
hit_rate: 1.0
mrr: 0.3333333333333333
precision: 0.06666666666666667
recall: 1.0
ap: 0.3333333333333333
ndcg: 0.5
hit_rate: 1.0
mrr: 1.0
precision: 0.06666666666666667
recall: 1.0
ap: 1.0
ndcg: 1.0
hit_rate: 0.0
mrr: 0.0
precision: 0.0
recall: 0.0
ap: 0.0
ndcg: 0.0
hit_rate: 1.0
mrr: 0.5
precision: 0.06666666666666667
recall: 1.0
ap: 0.5
ndcg: 0.6309297535714575
hit_rate: 1.0
mrr: 0.5
precision: 0.06666666666666667
recall: 1.0
ap: 0.5
ndcg: 0.6309297535714575
hit_rate: 1.0
mrr: 0.5
precision: 0.06666666666666667
recall: 1.0
ap: 0.5
ndcg: 0.6309297535714575
hit_rate: 1.0
mrr: 0.5
precision: 0.06666666666666667
recall: 1.0
ap: 0.5
ndcg: 0.6309297535714575
hit_rate: 0.0
mrr: 0.0
precision: 0.0
recall: 0.0
ap: 0.0
ndcg: 0.0
hit_rate: 1.0
mrr: 1.0
precision: 0.06666666666666667
recall: 1.0
ap: 1.0
ndcg: 1.0
hit

Unnamed: 0,retrievers,hit_rate,mrr,precision,recall,ap,ndcg
0,top-2 eval,0.728814,0.363079,0.048588,0.728814,0.363079,0.450275


# 6. Post Retrieval

## 6.A Summarization

In [51]:
def summarize_each_chunk(nodes, client, query, model="llama3.1:8b", parent=False):
    if parent:
        chunks = [doc.text for doc in nodes]
    else:
        chunks = [doc.node.text for doc in nodes]
    summaries = []
    
    for i, chunk in enumerate(chunks):
        prompt = f"""
        Summarize the following text in one concise paragraph, focusing on key points relevant to the query: "{query}".
        
        - Emphasize information directly related to the query.
        - Exclude unrelated, redundant, or speculative details.
        - Do NOT introduce new information or answer the query itself. 
        
        Text:
        {chunk}
        
        Summary:
        """
        
        response = client.generate(model=model, prompt=prompt)
        summary = response['response'].strip()
        summaries.append(summary)

    return summaries

# 6.B Evaluation Generation

In [52]:
def generate_response_with_notice(summaries, query, client, model="llama3.1:8b"):
    # Combine summaries into context block
    context = "\n".join(summaries)
    
    # Create prompt to answer based on summarized text
    prompt = f"""
    Use the following summarized information to answer the query accurately and concisely. 
    DO NOT USE BACKGROUND KNOWLEDGE OUTSIDE THE CONTEXT PROVIDED.
    If the information is not sufficient to fully address the query, respond ONLY with:
    "The available information is insufficient to provide a complete answer to this query."

    Summarized Context:
    {context}
    
    Query:
    {query}
    
    Response:
    """
    
    # Send the prompt to Ollama
    response = client.generate(
        model=model,
        prompt=prompt
    )
    
    return response['response'].strip()

# 7. Querying

## 7.A Query Transforms

# 8. Query Generation

In [53]:
docstore = {}

# Store documents using full metadata as the key
for doc in documents:
    key = tuple(doc.metadata.items())  # Convert metadata to tuple for hashable key
    docstore[key] = doc

In [54]:
def get_document_by_chunk_metadata(chunk_node):
    # Convert chunk metadata to tuple for matching
    metadata_key = tuple(chunk_node.metadata.items())

    # Retrieve document from docstore
    document = docstore.get(metadata_key)
    return document

In [55]:
def remove_duplicate_documents(doc_list):
    seen_ids = set()
    unique_docs = []

    for doc in doc_list:
        if doc.doc_id not in seen_ids:
            seen_ids.add(doc.doc_id)
            unique_docs.append(doc)

    return unique_docs

In [56]:
def gen_query(query, top_k, client, mode='dense', summary=False, model="llama3.1:8b", chunks_only=False):
    response = client.embeddings(prompt=query, model="mxbai-embed-large")
    query_embedding = response["embedding"]

    top_k_docs = faiss_retriever._retrieve(query_embedding, top_k=top_k)

    bm25_retriever = BM25Retriever.from_defaults(
    nodes=nodes,
    similarity_top_k=top_k,
    stemmer=Stemmer.Stemmer("english"),
    language="english",
    )
    retrieved_nodes = bm25_retriever.retrieve(query)

    results = {'faiss': top_k_docs, 'bm25':retrieved_nodes}
    ranked_results = hybrid_embedding(results, top_k=top_k)

    if mode == 'dense':
        print('using FAISS')
        ans_nodes =top_k_docs
    elif mode == 'sparse':
        print('using BM25')
        ans_nodes = retrieved_nodes
    else:
        print('using Hybrid')
        ans_nodes = ranked_results

    parent_flag = True
    context = set([get_document_by_chunk_metadata(docs).text for docs in ans_nodes])
    if chunks_only:
        parent_flag = False
        print('using chunks only')
        context = [docs.node.text for docs in ans_nodes]
        
    if summary:
        print('using summaries')
        context_nodes = remove_duplicate_documents([get_document_by_chunk_metadata(docs) for docs in ans_nodes])

        if chunks_only:
            context_nodes=ans_nodes
        summaries = summarize_each_chunk(context_nodes, client, model='llama3.1:8b', query=query,parent=parent_flag)
        context = summaries

    answer = generate_response_with_notice(context, query, client, model=model)

    # Format the references
    references = []
    for i, doc in enumerate(ranked_results[:top_k], start=1):
        metadata = doc.metadata
        source_info = f"Source {i}: {metadata['title']} (Page {metadata['page']}, Folder: {metadata['folder']})"
        references.append(source_info)

    return answer, "\n".join(references), "\n".join(context)

# 9. TDC Exam Evaluation

In [57]:
# Generate prompts dynamically
def generate_prompt(row):
    options = []
    for choice in ['A', 'B', 'C', 'D', 'E']:
        # Check for NaN or blank values
        if pd.notna(row[choice]) and row[choice] != '':
            options.append(f"{choice}. {row[choice]}")
    
    # Construct the prompt with few-shot examples
    prompt = f"\nActual Question: {row['Question']}\n" + "\n".join(options)
    prompt += "\nPlease answer only in letters and put them inside a bracket '[]'. If the question contains the statement 'Check all that apply' then add comma separator if there are multiple answers ONLY IF ALLOWED."
    
    return prompt

In [58]:
# Load the Excel file
file_path = '/mnt/c/Users/Jeryl Salas/Documents/AI 351/Project/LTO_EXAM.csv'
df = pd.read_csv(file_path)
df['Prompt'] = df.apply(generate_prompt, axis=1)
display(df.head())

Unnamed: 0,Question,A,B,C,D,E,Answer,Prompt
0,What should you do in case your vehicle breaks...,Open your trunk and hood,Stand on the expressway and flag down passing ...,Call for help using a mobile phone or an expre...,Park as far to the right as possible,Put your hazard warning light on,"A, C, D, E",\nActual Question: What should you do in case ...
1,What will happen when your front tire blows out?,The back end will sway towards the side of the...,The back end will sway away from the blowout,The front end will pull towards the side of th...,The front end will pull to the opposite side o...,,C,\nActual Question: What will happen when your ...
2,What should you do when an ambulance comes up ...,Stop as soon as you can,"Maintain your speed, let the ambulance driver ...",Speed up so that you don't hold the ambulance,Pull over to the right and slow down or even s...,,D,\nActual Question: What should you do when an ...
3,While driving the hood of your car lifts up bl...,Look through the gap underneath the hood or ou...,Brake suddenly so you don't leave the road,Pull to the side of the road and refasten the ...,Turn your headlights on and look out of the si...,,"A,C",\nActual Question: While driving the hood of y...
4,"In case of an accident, the first duty of the ...",pick-up the injured person and take him to the...,report the accident to the nearest hospital,report the accident to the nearest police station,,,A,"\nActual Question: In case of an accident, the..."


In [59]:
qr_range = (0,60)
df["AI"] = np.nan
ai_answer = []
for i in tqdm(range(*qr_range)):
    ai_answer.append(gen_query(df.loc[i,"Prompt"], top_k=15, client=client, mode='hybrid', model="llama3.1:8b"))

df.loc[qr_range[0]:qr_range[1]-1, "AI"] = [answ[0] for answ in ai_answer]
df.loc[qr_range[0]:qr_range[1]-1, "Context"] = [answ[2] for answ in ai_answer]

  0%|          | 0/60 [00:00<?, ?it/s]

using Hybrid


  2%|▏         | 1/60 [00:25<25:32, 25.98s/it]

using Hybrid


  3%|▎         | 2/60 [00:49<23:39, 24.48s/it]

using Hybrid


  5%|▌         | 3/60 [01:14<23:37, 24.87s/it]

using Hybrid


  7%|▋         | 4/60 [01:37<22:35, 24.21s/it]

using Hybrid


  8%|▊         | 5/60 [02:01<22:01, 24.02s/it]

using Hybrid


 10%|█         | 6/60 [02:29<22:44, 25.27s/it]

using Hybrid


 12%|█▏        | 7/60 [02:54<22:14, 25.18s/it]

using Hybrid


 13%|█▎        | 8/60 [03:19<21:53, 25.25s/it]

using Hybrid


 15%|█▌        | 9/60 [03:42<20:49, 24.50s/it]

using Hybrid


 17%|█▋        | 10/60 [04:10<21:12, 25.44s/it]

using Hybrid


 18%|█▊        | 11/60 [04:34<20:36, 25.24s/it]

using Hybrid


 20%|██        | 12/60 [05:01<20:27, 25.58s/it]

using Hybrid


 22%|██▏       | 13/60 [05:25<19:44, 25.19s/it]

using Hybrid


 23%|██▎       | 14/60 [05:50<19:08, 24.97s/it]

using Hybrid


 25%|██▌       | 15/60 [05:55<14:12, 18.95s/it]

using Hybrid


 27%|██▋       | 16/60 [06:20<15:25, 21.04s/it]

using Hybrid


 28%|██▊       | 17/60 [06:42<15:06, 21.08s/it]

using Hybrid


 30%|███       | 18/60 [07:10<16:15, 23.23s/it]

using Hybrid


 32%|███▏      | 19/60 [07:36<16:25, 24.05s/it]

using Hybrid


 33%|███▎      | 20/60 [08:01<16:18, 24.45s/it]

using Hybrid


 35%|███▌      | 21/60 [08:24<15:31, 23.89s/it]

using Hybrid


 37%|███▋      | 22/60 [08:50<15:36, 24.64s/it]

using Hybrid


 38%|███▊      | 23/60 [09:15<15:19, 24.84s/it]

using Hybrid


 40%|████      | 24/60 [09:40<14:54, 24.85s/it]

using Hybrid


 42%|████▏     | 25/60 [10:05<14:24, 24.70s/it]

using Hybrid


 43%|████▎     | 26/60 [10:30<14:04, 24.85s/it]

using Hybrid


 45%|████▌     | 27/60 [10:52<13:15, 24.10s/it]

using Hybrid


 47%|████▋     | 28/60 [11:15<12:39, 23.74s/it]

using Hybrid


 48%|████▊     | 29/60 [11:39<12:18, 23.82s/it]

using Hybrid


 50%|█████     | 30/60 [12:07<12:28, 24.96s/it]

using Hybrid


 52%|█████▏    | 31/60 [12:31<12:00, 24.84s/it]

using Hybrid


 53%|█████▎    | 32/60 [12:56<11:36, 24.88s/it]

using Hybrid


 55%|█████▌    | 33/60 [13:20<11:02, 24.55s/it]

using Hybrid


 57%|█████▋    | 34/60 [13:43<10:24, 24.03s/it]

using Hybrid


 58%|█████▊    | 35/60 [14:10<10:21, 24.87s/it]

using Hybrid


 60%|██████    | 36/60 [14:35<10:02, 25.11s/it]

using Hybrid


 62%|██████▏   | 37/60 [15:00<09:33, 24.94s/it]

using Hybrid


 63%|██████▎   | 38/60 [15:22<08:48, 24.04s/it]

using Hybrid


 65%|██████▌   | 39/60 [15:49<08:44, 24.98s/it]

using Hybrid


 67%|██████▋   | 40/60 [16:14<08:19, 24.96s/it]

using Hybrid


 68%|██████▊   | 41/60 [16:39<07:56, 25.08s/it]

using Hybrid


 70%|███████   | 42/60 [17:04<07:30, 25.05s/it]

using Hybrid


 72%|███████▏  | 43/60 [17:09<05:21, 18.94s/it]

using Hybrid


 73%|███████▎  | 44/60 [17:33<05:26, 20.43s/it]

using Hybrid


 75%|███████▌  | 45/60 [18:00<05:37, 22.53s/it]

using Hybrid


 77%|███████▋  | 46/60 [18:30<05:46, 24.74s/it]

using Hybrid


 78%|███████▊  | 47/60 [18:56<05:26, 25.11s/it]

using Hybrid


 80%|████████  | 48/60 [19:22<05:04, 25.41s/it]

using Hybrid


 82%|████████▏ | 49/60 [19:53<04:55, 26.85s/it]

using Hybrid


 83%|████████▎ | 50/60 [20:18<04:25, 26.56s/it]

using Hybrid


 85%|████████▌ | 51/60 [20:49<04:10, 27.80s/it]

using Hybrid


 87%|████████▋ | 52/60 [21:15<03:38, 27.29s/it]

using Hybrid


 88%|████████▊ | 53/60 [21:19<02:21, 20.23s/it]

using Hybrid


 90%|█████████ | 54/60 [21:23<01:32, 15.39s/it]

using Hybrid


 92%|█████████▏| 55/60 [21:27<00:59, 11.91s/it]

using Hybrid


 93%|█████████▎| 56/60 [21:49<01:00, 15.07s/it]

using Hybrid


 95%|█████████▌| 57/60 [22:12<00:52, 17.36s/it]

using Hybrid


 97%|█████████▋| 58/60 [22:17<00:27, 13.64s/it]

using Hybrid


 98%|█████████▊| 59/60 [22:45<00:17, 17.87s/it]

using Hybrid


100%|██████████| 60/60 [23:05<00:00, 23.09s/it]
  df.loc[qr_range[0]:qr_range[1]-1, "AI"] = [answ[0] for answ in ai_answer]


In [60]:
import re


def process_answers(answers):
    formatted_answers = []
    
    for a in answers:
        
        matches = re.findall(r'\[?\s*([A-E](?:\s*,\s*[A-E])*)\s*\]?', str(a)) # Extract answers like [A, C, D] or [A] or [B, D]
        answers = []
        for match in matches:
            answers.extend(re.split(r'\s*,\s*', match))  # Split by comma and remove spaces
        unique_sorted_answers = sorted(set(answers), key=lambda x: ['A', 'B', 'C', 'D', 'E'].index(x))
        if not unique_sorted_answers:
            formatted_answers.append(None)
        else:
            formatted_answers.append(unique_sorted_answers)
    return formatted_answers

df_results = df.loc[qr_range[0]:qr_range[1]-1, ["Question","Answer","AI"]]
df_results['Answer'] = df_results['Answer'].apply(lambda x: x.split(', '))
df_results['AI'] = process_answers(df_results["AI"])
df_results['Answer'] = process_answers(df_results["Answer"])



def calculate_scores(df):
    scores = []
    for index, row in df.iterrows():
        correct_answers = set(row['Answer'] if row['Answer'] is not None else [])
        ai_answers = set(row['AI'] if row['AI'] is not None else [])
        if ai_answers == correct_answers:
            score = 1.0
        else:
            score = 0.0
        scores.append(score)
    
    df['Score'] = scores
    accuracy = scores.count(1.0) / len(scores)
    print(f'Final Score: {scores.count(1.0):.2f}/{len(scores):.2f}')
    print(f'Accuracy: {accuracy:.2f}%')
    return df

# Apply the scoring function
scored_df = calculate_scores(df_results)

# Display the dataframe to verify the results
display(scored_df[['Question', 'Answer', 'AI', 'Score']])

Final Score: 33.00/60.00
Accuracy: 0.55%


Unnamed: 0,Question,Answer,AI,Score
0,What should you do in case your vehicle breaks...,"[A, C, D, E]","[C, D, E]",0.0
1,What will happen when your front tire blows out?,[C],[B],0.0
2,What should you do when an ambulance comes up ...,[D],[A],0.0
3,While driving the hood of your car lifts up bl...,"[A, C]","[A, C, D]",0.0
4,"In case of an accident, the first duty of the ...",[A],[C],0.0
5,"When a vehicle starts to skid, what should the...",[B],"[B, C]",0.0
6,"In case of injuries caused by an accident, the...",[A],"[A, B]",0.0
7,What will happen when your rear tire blows out?,[B],[A],0.0
8,"When a vehicle is stalled or disabled, the dri...",[C],[C],1.0
9,If you are the first to arrive at the scene of...,[B],[B],1.0


In [61]:
# Generate prompts dynamically
def generate_prompt(row):
    
    # Construct the prompt with few-shot examples
    prompt = f"\nActual Question: {row['Question']}\n" 
    prompt += "\nPlease answer the question based on the given context."
    
    return prompt

In [62]:
# Load the Excel file
file_path = '/mnt/c/Users/Jeryl Salas/Documents/AI 351/Project/LTO_EXAM_QnA.csv'
df = pd.read_csv(file_path, encoding='ISO-8859-1')
df['Prompt'] = df.apply(generate_prompt, axis=1)
display(df.head())

Unnamed: 0,Question,Answer,Prompt
0,Traffic Jam can be prevented if you,Keep opposing lanes open,\nActual Question: Traffic Jam can be prevente...
1,When making a right turn you should,Stay on the outermost lane of the road then si...,\nActual Question: When making a right turn yo...
2,"When you intend to turn right or left, signal ...",25 meters before you intend to make your turn,\nActual Question: When you intend to turn rig...
3,"At an intersection with a traffic light, make ...",The green light is on and there is a left turn...,\nActual Question: At an intersection with a t...
4,Graft and corruption in the traffic enforcemen...,Self disciplined by drivers and obeying traffi...,\nActual Question: Graft and corruption in the...


In [63]:
qr_range = (0,60)
df["AI"] = np.nan
ai_answer = []
for i in tqdm(range(*qr_range)):
    ai_answer.append(gen_query(df.loc[i,"Prompt"], top_k=15, client=client, mode='hybrid', model="llama3.1:8b"))

df.loc[qr_range[0]:qr_range[1]-1, "AI"] = [answ[0] for answ in ai_answer]
df.loc[qr_range[0]:qr_range[1]-1, "Context"] = [answ[2] for answ in ai_answer]
df_new = df.loc[qr_range[0]:qr_range[1]-1].copy()
df = df_new.copy()

  0%|          | 0/60 [00:00<?, ?it/s]

using Hybrid


  2%|▏         | 1/60 [00:07<07:21,  7.48s/it]

using Hybrid


  3%|▎         | 2/60 [00:36<19:21, 20.03s/it]

using Hybrid


  5%|▌         | 3/60 [01:03<22:18, 23.48s/it]

using Hybrid


  7%|▋         | 4/60 [01:33<24:02, 25.75s/it]

using Hybrid


  8%|▊         | 5/60 [02:03<24:58, 27.25s/it]

using Hybrid


 10%|█         | 6/60 [02:30<24:40, 27.43s/it]

using Hybrid


 12%|█▏        | 7/60 [02:59<24:31, 27.76s/it]

using Hybrid


 13%|█▎        | 8/60 [03:24<23:21, 26.95s/it]

using Hybrid


 15%|█▌        | 9/60 [03:55<23:59, 28.22s/it]

using Hybrid


 17%|█▋        | 10/60 [04:20<22:47, 27.34s/it]

using Hybrid


 18%|█▊        | 11/60 [04:49<22:32, 27.60s/it]

using Hybrid


 20%|██        | 12/60 [05:14<21:39, 27.07s/it]

using Hybrid


 22%|██▏       | 13/60 [05:40<20:50, 26.61s/it]

using Hybrid


 23%|██▎       | 14/60 [06:08<20:40, 26.96s/it]

using Hybrid


 25%|██▌       | 15/60 [06:36<20:37, 27.50s/it]

using Hybrid


 27%|██▋       | 16/60 [07:01<19:31, 26.62s/it]

using Hybrid


 28%|██▊       | 17/60 [07:29<19:26, 27.12s/it]

using Hybrid


 30%|███       | 18/60 [07:55<18:42, 26.72s/it]

using Hybrid


 32%|███▏      | 19/60 [08:22<18:18, 26.80s/it]

using Hybrid


 33%|███▎      | 20/60 [08:50<18:07, 27.20s/it]

using Hybrid


 35%|███▌      | 21/60 [09:18<17:46, 27.34s/it]

using Hybrid


 37%|███▋      | 22/60 [09:48<17:53, 28.26s/it]

using Hybrid


 38%|███▊      | 23/60 [10:18<17:38, 28.62s/it]

using Hybrid


 40%|████      | 24/60 [10:46<17:05, 28.50s/it]

using Hybrid


 42%|████▏     | 25/60 [11:12<16:14, 27.85s/it]

using Hybrid


 43%|████▎     | 26/60 [11:39<15:39, 27.63s/it]

using Hybrid


 45%|████▌     | 27/60 [12:13<16:07, 29.33s/it]

using Hybrid


 47%|████▋     | 28/60 [12:40<15:22, 28.83s/it]

using Hybrid


 48%|████▊     | 29/60 [13:07<14:33, 28.17s/it]

using Hybrid


 50%|█████     | 30/60 [13:34<13:57, 27.92s/it]

using Hybrid


 52%|█████▏    | 31/60 [14:02<13:30, 27.95s/it]

using Hybrid


 53%|█████▎    | 32/60 [14:27<12:38, 27.09s/it]

using Hybrid


 55%|█████▌    | 33/60 [14:55<12:14, 27.21s/it]

using Hybrid


 57%|█████▋    | 34/60 [15:19<11:19, 26.13s/it]

using Hybrid


 58%|█████▊    | 35/60 [15:47<11:12, 26.89s/it]

using Hybrid


 60%|██████    | 36/60 [16:13<10:35, 26.49s/it]

using Hybrid


 62%|██████▏   | 37/60 [16:42<10:30, 27.43s/it]

using Hybrid


 63%|██████▎   | 38/60 [17:07<09:42, 26.48s/it]

using Hybrid


 65%|██████▌   | 39/60 [17:33<09:12, 26.30s/it]

using Hybrid


 67%|██████▋   | 40/60 [18:00<08:52, 26.63s/it]

using Hybrid


 68%|██████▊   | 41/60 [18:24<08:12, 25.92s/it]

using Hybrid


 70%|███████   | 42/60 [18:48<07:35, 25.32s/it]

using Hybrid


 72%|███████▏  | 43/60 [18:54<05:29, 19.39s/it]

using Hybrid


 73%|███████▎  | 44/60 [19:19<05:39, 21.24s/it]

using Hybrid


 75%|███████▌  | 45/60 [19:24<04:04, 16.30s/it]

using Hybrid


 77%|███████▋  | 46/60 [19:50<04:29, 19.27s/it]

using Hybrid


 78%|███████▊  | 47/60 [20:10<04:13, 19.49s/it]

using Hybrid


 80%|████████  | 48/60 [20:16<03:04, 15.36s/it]

using Hybrid


 82%|████████▏ | 49/60 [20:40<03:17, 17.96s/it]

using Hybrid


 83%|████████▎ | 50/60 [21:06<03:24, 20.47s/it]

using Hybrid


 85%|████████▌ | 51/60 [21:35<03:25, 22.83s/it]

using Hybrid


 87%|████████▋ | 52/60 [22:01<03:10, 23.80s/it]

using Hybrid


 88%|████████▊ | 53/60 [22:21<02:39, 22.82s/it]

using Hybrid


 90%|█████████ | 54/60 [22:46<02:20, 23.49s/it]

using Hybrid


 92%|█████████▏| 55/60 [23:15<02:04, 24.94s/it]

using Hybrid


 93%|█████████▎| 56/60 [23:36<01:35, 23.92s/it]

using Hybrid


 95%|█████████▌| 57/60 [23:42<00:55, 18.39s/it]

using Hybrid


 97%|█████████▋| 58/60 [24:07<00:40, 20.44s/it]

using Hybrid


 98%|█████████▊| 59/60 [24:32<00:21, 21.94s/it]

using Hybrid


100%|██████████| 60/60 [24:56<00:00, 24.94s/it]
  df.loc[qr_range[0]:qr_range[1]-1, "AI"] = [answ[0] for answ in ai_answer]


# 10. Similarity Evaluation

In [64]:
from llama_index.core.evaluation import SemanticSimilarityEvaluator
from llama_index.core.base.embeddings.base import BaseEmbedding
import asyncio
from llama_index.core.embeddings import resolve_embed_model
from pydantic import PrivateAttr

class OllamaEmbeddingModel(BaseEmbedding):
    _client: Client = PrivateAttr()

    def __init__(self, model_name: str = "mxbai-embed-large", timeout: int = 300):
        super().__init__()
        self.model_name = model_name
        self._client = Client() 

    async def _aget_query_embedding(self, query: str) -> list[float]:
        return await self._aget_text_embedding(query)

    async def _aget_text_embedding(self, text: str) -> list[float]:
        loop = asyncio.get_event_loop()
        embedding_response = await loop.run_in_executor(
            None, self._client.embeddings, self.model_name, text
        )
        return embedding_response['embedding']  

    def _get_query_embedding(self, query: str) -> list[float]:
        return self._get_text_embedding(query)

    def _get_text_embedding(self, text: str) -> list[float]:
        embedding_response = self._client.embeddings(
            model=self.model_name,
            prompt=text
        )
        return embedding_response['embedding']


embed_model = OllamaEmbeddingModel(model_name="mxbai-embed-large")
evaluator = SemanticSimilarityEvaluator(
    embed_model=embed_model,
    similarity_threshold=0.6
)

results_scores = []
results_passing = []
for i in tqdm(range(len(df))):
    response = df.loc[i, "AI"]
    reference = df.loc[i, "Answer"]

    result = await evaluator.aevaluate(
    response=response,
    reference=reference,
    )
    results_scores.append(result.score)
    results_passing.append(result.passing)
    
df['Score'] = results_scores
df['Passing'] = results_passing

average_score = df['Score'].mean()
total_items = len(df)
passing_items = df['Passing'].sum()  
print(f"Average Score: {average_score:.4f}")
print(f"Passing: {passing_items}/{total_items}")
display(df[['Question', 'Answer', 'AI', 'Score', 'Passing']])

  0%|          | 0/60 [00:00<?, ?it/s]

100%|██████████| 60/60 [00:23<00:00,  2.58it/s]

Average Score: 0.6398
Passing: 45/60





Unnamed: 0,Question,Answer,AI,Score,Passing
0,Traffic Jam can be prevented if you,Keep opposing lanes open,The actual question is not directly stated in ...,0.62576,True
1,When making a right turn you should,Stay on the outermost lane of the road then si...,"According to the given text, when making a rig...",0.760502,True
2,"When you intend to turn right or left, signal ...",25 meters before you intend to make your turn,"According to Article IV, Section 45 of the Tra...",0.663455,True
3,"At an intersection with a traffic light, make ...",The green light is on and there is a left turn...,"Based on the provided information, at an inter...",0.700548,True
4,Graft and corruption in the traffic enforcemen...,Self disciplined by drivers and obeying traffi...,The correct answer is:\n\nUsing a digitalized ...,0.614937,True
5,"On a four(4) lane road with single white line,...",Overtake by passing over the solid white line,"According to the provided text, on a multi-lan...",0.756958,True
6,A double solid yellow line with broken white l...,Absolutely no overtaking,"According to the provided text, a double solid...",0.607177,True
7,"When making a U-Turn, you should",Check for traffic behind you and indicate your...,"When making a U-turn, you should start turning...",0.76511,True
8,Signs that are triangular in shape and with a ...,Caution or warning signs,Horizontal Alignment Markers.,0.550258,False
9,"Signs that are round, inverted triangle or oct...",Regulatory signs,"Based on the provided text, it appears to be a...",0.681984,True


# 11. Relevancy Evaluation

In [65]:
from llama_index.core.evaluation import RelevancyEvaluator

ollama_llm = Ollama(model="llama3.1:8b", request_timeout=300)
evaluator = RelevancyEvaluator(llm=ollama_llm)

eval_results = []

for i in tqdm(range(len(df))):
    eval_result = await evaluator.aevaluate(
        query=df.loc[i, "Question"],
        response=df.loc[i, "AI"],
        contexts=[df.loc[i, "Context"]]  
    )
    eval_results.append(eval_result.passing)

df['Eval'] = eval_results

total_items = len(df)
passing_items = df['Eval'].sum()
score = f"Score: {passing_items}/{total_items}"
percentage = passing_items / total_items if total_items > 0 else 0
print(score)
print(f"Percentage: {percentage:.2%}")
display(df[['Question', 'Answer', 'AI', 'Eval']])

100%|██████████| 60/60 [30:32<00:00, 30.55s/it]

Score: 50/60
Percentage: 83.33%





Unnamed: 0,Question,Answer,AI,Eval
0,Traffic Jam can be prevented if you,Keep opposing lanes open,The actual question is not directly stated in ...,True
1,When making a right turn you should,Stay on the outermost lane of the road then si...,"According to the given text, when making a rig...",True
2,"When you intend to turn right or left, signal ...",25 meters before you intend to make your turn,"According to Article IV, Section 45 of the Tra...",True
3,"At an intersection with a traffic light, make ...",The green light is on and there is a left turn...,"Based on the provided information, at an inter...",True
4,Graft and corruption in the traffic enforcemen...,Self disciplined by drivers and obeying traffi...,The correct answer is:\n\nUsing a digitalized ...,True
5,"On a four(4) lane road with single white line,...",Overtake by passing over the solid white line,"According to the provided text, on a multi-lan...",False
6,A double solid yellow line with broken white l...,Absolutely no overtaking,"According to the provided text, a double solid...",True
7,"When making a U-Turn, you should",Check for traffic behind you and indicate your...,"When making a U-turn, you should start turning...",False
8,Signs that are triangular in shape and with a ...,Caution or warning signs,Horizontal Alignment Markers.,True
9,"Signs that are round, inverted triangle or oct...",Regulatory signs,"Based on the provided text, it appears to be a...",True
