#### Setup

In [2]:
from fastembed import TextEmbedding, SparseTextEmbedding, LateInteractionTextEmbedding

#### Embeddings

In [None]:
dense_embedding_model=TextEmbedding("sentence-transformers/all-MiniLM-L6-v2")
sparse_embedding_model=SparseTextEmbedding("Qdrant/bm25")
late_interaction_embedding_model=LateInteractionTextEmbedding("colbert-ir/colbertv2.0")

#### Chunking the dataset

In [4]:
import json 
import re
import os

In [5]:
file_name="cctv_camera.json"
base_name=os.path.splitext(os.path.basename(file_name))[0]

In [6]:
with open(file_name,'r',encoding="utf-8") as f:
    data=json.load(f)

   



In [9]:
item_list=data["mods"]["listItems"]

In [7]:
def chunk_each_docs(doc,chunk_size=128):
    start=0
    doc=json.dumps(doc)
    tokens=re.findall(r'\w+|[{}[\]:,",]',doc)
    chunks=[]
    while start<=len(tokens):
        end=min(start+chunk_size,len(tokens))
        chunk=" ".join(tokens[start:end])
        chunks.append(chunk)
        start+=chunk_size
    return chunks






In [10]:
all_chunks=[]
for item in item_list:
        chunk_of_each_docs=chunk_each_docs(item)
        all_chunks.append(chunk_of_each_docs)
        

In [11]:
len(all_chunks)

40

In [12]:
all_chunks[0][0]

'{ " name " : " V380 WiFi PTZ Security Camera Triple Lens , 3 Screens , Wireless CCTV IP Camera with Night Vision And Motion Detection " , " nid " : " 366335641 " , " itemId " : " 366335641 " , " icons " : [ { " domClass " : " 150565 " , " type " : " img " , " group " : " 6 " , " showType " : " 0 " } , { " domClass " : " 150318 " , " text " : " Gems save Rs 44 " , " type " : " text " , " group " : " 2 " , " showType " : " 0 " }'

In [13]:
from qdrant_client import QdrantClient, models
from sentence_transformers import SentenceTransformer


In [14]:
client=QdrantClient("http://localhost:6333")

#### Creating Collection

In [197]:
if not client.collection_exists("reranking_hybridsearch"):
    client.create_collection(
        collection_name="reranking_hybridsearch",
        vectors_config={
            "dense":models.VectorParams(size=dense_embedding_model.embedding_size,distance=models.Distance.COSINE),
                        "late_interaction":models.VectorParams(size=late_interaction_embedding_model.embedding_size,distance=models.Distance.COSINE,multivector_config=models.MultiVectorConfig(comparator=models.MultiVectorComparator.MAX_SIM),hnsw_config=models.HnswConfigDiff(m=0))},
        sparse_vectors_config={"sparse":models.SparseVectorParams(modifier=models.Modifier.IDF)},

    )

In [15]:
dense_embedding_model.embedding_size

384

In [16]:
late_interaction_embedding_model.embedding_size

128

In [200]:
client.create_payload_index(
    collection_name="reranking_hybridsearch",
    field_name="doc_id",
    field_schema="integer"

)

UpdateResult(operation_id=207, status=<UpdateStatus.COMPLETED: 'completed'>)

In [201]:
client.create_payload_index(
    collection_name="reranking_hybridsearch",
    field_name="chunk_id",
    field_schema="integer"

)

UpdateResult(operation_id=209, status=<UpdateStatus.COMPLETED: 'completed'>)

In [202]:
client.create_payload_index(
    collection_name="reranking_hybridsearch",
    field_name="file_name",
    field_schema="keyword"
)

UpdateResult(operation_id=211, status=<UpdateStatus.COMPLETED: 'completed'>)

#### Embedding

In [17]:
all_dense_vector=[]
for i in range (len(all_chunks)):
    dense_embeddings=list(dense_embedding_model.embed(chunk for chunk in all_chunks[i]))
    all_dense_vector.append(dense_embeddings)

In [18]:
len(all_dense_vector)

40

In [19]:
all_sparse_vector=[]
for i in range (len(all_chunks)):
    sparse_embeddings=list(sparse_embedding_model.embed(chunk for chunk in all_chunks[i]))
    all_sparse_vector.append(sparse_embeddings)


In [20]:
all_colbert_vector=[]
for i in range (len(all_chunks)):
    colbert_embeddings=list(late_interaction_embedding_model.embed(chunk for chunk in all_chunks[i]))
    all_colbert_vector.append(colbert_embeddings)

#### Inserting points in collection

In [207]:
offset = 0
doc_offset=0

# Get collection info
info = client.get_collection(collection_name="hybrid_search_daraz_items")
count = info.points_count

# If collection is not empty, scroll to get the last inserted chunk
if count != 0:
    res, _ = client.scroll(
        collection_name="reranking_hybridsearch",
        with_payload=True,
        with_vectors=False,
        limit=1,
        order_by={
            "key": "chunk_id",
            "direction": "desc"
        }
    )

    if res:
        doc_number=res[0].payload.get("doc_id")
        last_id = res[0].id
        offset = last_id + 1 
        doc_offset=doc_number+1  # continue inserting after last point
    else:
        offset = 0
        doc_offset=0
else:
    offset = 0
    doc_offset=0


# Insert new chunks (dense + sparse vectors)
for doc in range(len(all_chunks)):
    for idx in range(len(all_chunks[doc])):
        client.upsert(
            collection_name="reranking_hybridsearch",
            points=[
                models.PointStruct(
                    id=offset,
                    payload={
                        "doc_id": doc_offset,                         # only store document index
                        "chunk_id": offset,                    # unique chunk ID
                        "chunk": all_chunks[doc][idx],
                        "file_name": base_name
                    },
                    vector={
            "dense":all_dense_vector[doc][idx],
            "sparse": all_sparse_vector[doc][idx].as_object(),
            "late_interaction": all_colbert_vector[doc][idx],
        },
                )
            ]
        )
        offset += 1
    doc_offset+=1
    



#### Retrieval

In [69]:
query="CCTV_CAMERA at low cost "

In [70]:
dense_vectors = next(dense_embedding_model.query_embed(query))
sparse_vectors = next(sparse_embedding_model.query_embed(query))
late_vectors = next(late_interaction_embedding_model.query_embed(query))

In [71]:
len(dense_vectors)

384

In [72]:
late_vectors

array([[ 0.10201225, -0.01358971, -0.03966511, ...,  0.13676956,
         0.02714994,  0.05499203],
       [-0.09304268,  0.09009052, -0.0726609 , ..., -0.04155932,
        -0.05782515, -0.01928774],
       [-0.1084124 ,  0.07779364, -0.05189863, ..., -0.0998296 ,
        -0.07773299, -0.0130346 ],
       ...,
       [-0.00326232,  0.03520354,  0.05967482, ...,  0.01122641,
         0.00725493,  0.11533666],
       [ 0.07735341,  0.04561583,  0.07559254, ..., -0.03175567,
         0.06811673,  0.18131661],
       [ 0.09695499,  0.05272482,  0.05505125, ..., -0.03351296,
         0.07378528,  0.17181656]], shape=(32, 128), dtype=float32)

In [73]:
prefetch = [
        models.Prefetch(
            query=dense_vectors,
            using="dense",
            limit=20,
        ),
        models.Prefetch(
            query=models.SparseVector(**sparse_vectors.as_object()),
            using="sparse",
            limit=20,
        ),
    ]

In [74]:
results = client.query_points(
        collection_name="reranking_hybridsearch",
        prefetch=prefetch,
        query=late_vectors,
        using="late_interaction",
        with_payload=True,
        limit=10,
).points

In [75]:
results

[ScoredPoint(id=268, version=280, score=19.625256, payload={'doc_id': 49, 'chunk_id': 268, 'chunk': '" location " : " Bagmati Province " , " description " : [ " Plz read this highlight carefully " , " As it is a low budget product is has no ethernet port in it and the wifi range is only upto 5 metres So if u want to make connection plz keep your router close to this cc camera within 5 metre of distance and both the router and this camera should be in the same room If your router is not in the same room or if your router and this camera are located in different rooms it wont work and connection will be so slow " , " So plz only do purchase only if your router and this camera will be near', 'file_name': 'cctv_camera'}, vector=None, shard_key=None, order_value=None),
 ScoredPoint(id=376, version=388, score=19.335121, payload={'doc_id': 64, 'chunk_id': 376, 'chunk': '7D m tpp_findSimilar q camera similarType findSimilarV1 price 3234 00 spuTriggerItem 0 " , " gridTitleLine " : " 2 " , " isF

In [76]:
results[0]

ScoredPoint(id=268, version=280, score=19.625256, payload={'doc_id': 49, 'chunk_id': 268, 'chunk': '" location " : " Bagmati Province " , " description " : [ " Plz read this highlight carefully " , " As it is a low budget product is has no ethernet port in it and the wifi range is only upto 5 metres So if u want to make connection plz keep your router close to this cc camera within 5 metre of distance and both the router and this camera should be in the same room If your router is not in the same room or if your router and this camera are located in different rooms it wont work and connection will be so slow " , " So plz only do purchase only if your router and this camera will be near', 'file_name': 'cctv_camera'}, vector=None, shard_key=None, order_value=None)

In [None]:
### keyapi=AIzaSyAVTL8nyv5EWAOvLBy1Y6IQVYtjGy-O7MA

In [77]:
from qdrant_client import QdrantClient, models

In [78]:
all_docs_retrieved=[]
for points in results:
    doc_id=points.payload.get("doc_id")
    all_docs_retrieved.append(doc_id)

In [79]:
all_docs_retrieved

[49, 64, 49, 47, 77, 73, 51, 50, 71, 78]

In [80]:
len(all_docs_retrieved)

10

In [81]:
all_docs_retrieved[0]

49

In [82]:
retrieved_chunks = []
for doc in all_docs_retrieved:

    doc_chunk, _ = client.scroll(
    collection_name="reranking_hybridsearch",
    scroll_filter=models.Filter(
        must=[
            models.FieldCondition(key="doc_id", match=models.MatchValue(value=doc)),
        ]
    ),
    limit=10,
    with_payload=True,
    with_vectors=False,
    )
    retrieved_chunks.append(doc_chunk)

In [83]:
retrieved_chunks

[[Record(id=266, payload={'doc_id': 49, 'chunk_id': 266, 'chunk': '{ " name " : " 5 or 3 Antenna 2MP With WiFi Wireless Surveillance Night Vision CCTV Camera " , " nid " : " 127458649 " , " itemId " : " 127458649 " , " icons " : [ { " domClass " : " 150565 " , " type " : " img " , " group " : " 6 " , " showType " : " 0 " } , { " domClass " : " 175175 " , " type " : " img " , " group " : " 3 " , " showType " : " 0 " } ] , " image " : " https : np live 21 slatic net kf Sc36a741a49aa4bab8f91a3b654c6fcd4m jpg " ,', 'file_name': 'cctv_camera'}, vector=None, shard_key=None, order_value=None),
  Record(id=267, payload={'doc_id': 49, 'chunk_id': 267, 'chunk': '" isSmartImage " : false , " utLogMap " : { " srp_name " : " LazadaMainSrp " , " x_object_type " : " item " , " src " : " organic " , " trafficType " : " organic " , " x_sku_ids " : " 1034542418 " , " x_item_ids " : " 127458649 " , " iconList " : " 150565 175175 " , " SN " : " f2c4c23a4ec97433ccd27c0620ae54e5 " , " current_price " : " 16

In [84]:
len(retrieved_chunks)

10

In [85]:
retrieved_chunks[0]

[Record(id=266, payload={'doc_id': 49, 'chunk_id': 266, 'chunk': '{ " name " : " 5 or 3 Antenna 2MP With WiFi Wireless Surveillance Night Vision CCTV Camera " , " nid " : " 127458649 " , " itemId " : " 127458649 " , " icons " : [ { " domClass " : " 150565 " , " type " : " img " , " group " : " 6 " , " showType " : " 0 " } , { " domClass " : " 175175 " , " type " : " img " , " group " : " 3 " , " showType " : " 0 " } ] , " image " : " https : np live 21 slatic net kf Sc36a741a49aa4bab8f91a3b654c6fcd4m jpg " ,', 'file_name': 'cctv_camera'}, vector=None, shard_key=None, order_value=None),
 Record(id=267, payload={'doc_id': 49, 'chunk_id': 267, 'chunk': '" isSmartImage " : false , " utLogMap " : { " srp_name " : " LazadaMainSrp " , " x_object_type " : " item " , " src " : " organic " , " trafficType " : " organic " , " x_sku_ids " : " 1034542418 " , " x_item_ids " : " 127458649 " , " iconList " : " 150565 175175 " , " SN " : " f2c4c23a4ec97433ccd27c0620ae54e5 " , " current_price " : " 1665

In [86]:
total_chunk = []
for doc in retrieved_chunks:
    
    chunks = []
    for chunk in doc:
        chunk_desc = chunk.payload.get("chunk")
        chunks.append(chunk_desc)
    total_chunk.append(chunks)

In [87]:
total_chunk

[['{ " name " : " 5 or 3 Antenna 2MP With WiFi Wireless Surveillance Night Vision CCTV Camera " , " nid " : " 127458649 " , " itemId " : " 127458649 " , " icons " : [ { " domClass " : " 150565 " , " type " : " img " , " group " : " 6 " , " showType " : " 0 " } , { " domClass " : " 175175 " , " type " : " img " , " group " : " 3 " , " showType " : " 0 " } ] , " image " : " https : np live 21 slatic net kf Sc36a741a49aa4bab8f91a3b654c6fcd4m jpg " ,',
  '" isSmartImage " : false , " utLogMap " : { " srp_name " : " LazadaMainSrp " , " x_object_type " : " item " , " src " : " organic " , " trafficType " : " organic " , " x_sku_ids " : " 1034542418 " , " x_item_ids " : " 127458649 " , " iconList " : " 150565 175175 " , " SN " : " f2c4c23a4ec97433ccd27c0620ae54e5 " , " current_price " : " 1665 " , " x_object_id " : " 127458649 " } , " originalPriceShow " : " " , " priceShow " : " Rs 1 , 665 " , " ratingScore " : " 2 8518518518518516 " , " review " : " 27 " ,',
  '" location " : " Bagmati Prov

In [88]:
total_chunk[0]

['{ " name " : " 5 or 3 Antenna 2MP With WiFi Wireless Surveillance Night Vision CCTV Camera " , " nid " : " 127458649 " , " itemId " : " 127458649 " , " icons " : [ { " domClass " : " 150565 " , " type " : " img " , " group " : " 6 " , " showType " : " 0 " } , { " domClass " : " 175175 " , " type " : " img " , " group " : " 3 " , " showType " : " 0 " } ] , " image " : " https : np live 21 slatic net kf Sc36a741a49aa4bab8f91a3b654c6fcd4m jpg " ,',
 '" isSmartImage " : false , " utLogMap " : { " srp_name " : " LazadaMainSrp " , " x_object_type " : " item " , " src " : " organic " , " trafficType " : " organic " , " x_sku_ids " : " 1034542418 " , " x_item_ids " : " 127458649 " , " iconList " : " 150565 175175 " , " SN " : " f2c4c23a4ec97433ccd27c0620ae54e5 " , " current_price " : " 1665 " , " x_object_id " : " 127458649 " } , " originalPriceShow " : " " , " priceShow " : " Rs 1 , 665 " , " ratingScore " : " 2 8518518518518516 " , " review " : " 27 " ,',
 '" location " : " Bagmati Provinc

#### Gemini API Calls

In [None]:
from google import genai
import os
from dotenv import load_dotenv
from google.genai import types

load_dotenv()
def chat(prompt:str):
    client = genai.Client(api_key=os.getenv("GEMINI_API_KEY"))
    response = client.models.generate_content(
    model = "gemini-2.5-flash-lite", contents=prompt,
    )
    return response

In [89]:
from google import genai #Gemini/GenAI client library
import os
from dotenv import load_dotenv #Loads variables from a .env file
from google.genai import types #an access GEMINI_API_KEY 

load_dotenv()
def chat(prompt:str):
    client = genai.Client(api_key=os.getenv("GEMINI_API_KEY"))
    response = client.models.generate_content(
    model = "gemini-2.5-flash-lite", 
    contents=prompt,
    )
    return response

In [93]:
from fastembed import TextEmbedding,SparseTextEmbedding,LateInteractionTextEmbedding
from qdrant_client import QdrantClient, models
client = QdrantClient(url = "http://localhost:6333")
#GLOBAL CONFIGS
collection_name = "reranking_hybridsearch"
dense_encoder = TextEmbedding("sentence-transformers/all-MiniLM-L6-v2")
sparse_encoder = SparseTextEmbedding("Qdrant/bm25")
late_colbert_encoder = LateInteractionTextEmbedding("colbert-ir/colbertv2.0")

def embedding_of_query(query:str):
    dense_embeds = next(dense_encoder.query_embed(query))
    sparse_embeds = next(sparse_encoder.query_embed(query))
    late_embeds = next(late_colbert_encoder.query_embed(query))

    return dense_embeds,sparse_embeds,late_embeds

def doc_retrieval(query:str):
    dense_vectors,sparse_vectors,late_vectors = embedding_of_query(query)

    prefetch = [
    models.Prefetch(query = dense_vectors,
    using = "dense",
    limit = 20,
    ),
    models.Prefetch(query = models.SparseVector(**sparse_vectors.as_object()),
                    using = "sparse",
                    limit = 20)
    ]
    query_results = client.query_points(
    collection_name = collection_name,
    prefetch=prefetch,
    query = late_vectors,
    using = "late_interaction",
    limit = 5,
    with_payload = True,
    with_vectors= False
    ).points
    retrieved_docs = []
    for result in query_results:
        doc_id = result.payload.get("doc_id")
        retrieved_docs.append(doc_id)
    return retrieved_docs

def retrieval(query:str):
    retrieved_docs = doc_retrieval(query)
    #here the retrieved docs consist of a list of doc_id [1,2,3,4,5]
    #so we just scroll through each doc_id (scrolling with filter) and retrieve all the points from the doc_id
    retrived_points = []
    for doc in retrieved_docs:
        info, _ = client.scroll(
            collection_name= collection_name,
            scroll_filter=models.Filter(
                must = [
                    models.FieldCondition(key="doc_id",match=models.MatchValue(value=doc))
                ]
            ),
            limit = 10,
            with_payload=True,
            with_vectors=False
        )
        retrived_points.append(info)
    all_chunks = []
    for point in retrived_points:
        chunks = []
        for chunk in point:
            chunk_desc = chunk.payload.get("chunk")
            chunks.append(chunk_desc)
        all_chunks.append(chunks)
    
    return all_chunks

def prompt_creation_and_api_calls():
    query = "5 Cheapest tripod to buy"
    #this gets the retrived products(combined and cleaned) from the query and creates the prompt
    augmented_document = retrieval(query)
    prompt = f"""
    You are a helpful and friendly assistant that answers questions about Daraz products.
    Use the information from the passages below to provide a clear and complete answer.
    Explain things in simple terms for a non-technical customer.
    QUESTION: '{query}'
    PASSAGE: '{augmented_document}' 
    """

    #API CALL
    response = chat(prompt)
    print(response.text)
        
prompt_creation_and_api_calls()

Here are the cheapest tripods available:

1.  **Camera Sport SJCAM3 48910 Single Waterproof Mini Can 1 Set Telescopic Pole Selfie Stick Tripod Blue for GoProHero1165 New EKEN** - This tripod is priced at **Rs 3,165**. It seems to be a versatile option that can also be used as a selfie stick.

2.  **103cm Lightweight Camera Tripod For Mobile Tripod Camera Portable SLR Bluetooth compatible Desktop Cam Stand Monopod Smartphone** - This tripod costs **Rs 4,994** and is designed to be lightweight and portable, suitable for both mobile phones and SLR cameras.

3.  **Portable Folding Tripod Stand 2 in 1 Mini Tripod Adjustable Camera Mount Angle Legs For Canon Cameras DV Camcorders** - This tripod is available for **Rs 5,158**. It's a folding tripod that can adjust its angle and is compatible with cameras like Canon DV camcorders.

4.  **PULUZ Mini Octopus Flexible Tripod Holder with Remote Control for SLR Cameras Cellphone GoPro DJI Insta360** - This tripod is priced at **Rs 5,258**. It's a f