In [2]:
from sentence_transformers import SentenceTransformer
model = SentenceTransformer('sentence-transformers/paraphrase-xlm-r-multilingual-v1')

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


In [14]:
with open("demo.txt", "r", encoding="utf-8") as f:
    text = f.read()

documents = text.split(".")   # splits by sentence
documents = [d.strip() for d in documents if d.strip()]

len(documents)

query = "population of India"

docs_embeddings = model.encode(documents)
query_embedding = model.encode(query)

In [15]:
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
similarities = cosine_similarity(np.array([query_embedding]), docs_embeddings)
similarities

index = np.argsort(similarities[0])[::-1]

ranked_docs = [(documents[i], similarities[0][i]) for i in index]

top_4_docs = [doc[0] for doc in ranked_docs[:4]]

In [17]:
import cohere
import os
from dotenv import load_dotenv

load_dotenv()

co = cohere.ClientV2(os.getenv("COHERE_API_KEY"))

top_4_docs = [
    "visali is good person",
    "India, officially the Republic of India,[j][20] is a country in South Asia. .",
    "It is a pluralistic, multilingual and multi-ethnic society.",
    " India's population grew from 361 million in 1951 to over 1.4 billion in 2023",
]

response = co.rerank(
    model="rerank-v3.5",
    query="population of India",
    documents=top_4_docs,
)

for r in response.results:
    doc_index = r.index       # index of matched document
    doc_text = top_4_docs[doc_index]
    score = r.relevance_score

    print(f"Document: {doc_text}")
    print(f"Relevance Score: {score}")
    print("---")


Document:  India's population grew from 361 million in 1951 to over 1.4 billion in 2023
Relevance Score: 0.82048416
---
Document: India, officially the Republic of India,[j][20] is a country in South Asia. .
Relevance Score: 0.15074033
---
Document: It is a pluralistic, multilingual and multi-ethnic society.
Relevance Score: 0.08306123
---
Document: visali is good person
Relevance Score: 0.021567926
---
