In [2]:
import os
from dotenv import load_dotenv
load_dotenv()

True

In [3]:
os.environ['HF_TOKEN'] = os.getenv("HF_TOKEN")


## Sentence Transformers on Hugging Face

In [4]:
from langchain_huggingface import HuggingFaceEmbeddings

  from .autonotebook import tqdm as notebook_tqdm


In [5]:
embeddings= HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

In [6]:
text= "this is a test documents"

# query_result= embeddings.embed_query(text)        # use it during Retrievel Time.
query_result = embeddings.embed_documents([text])   # During data ingestion / preparation

query_result
# Single Vector Output

[[-0.037860263139009476,
  0.11697699129581451,
  -0.035404738038778305,
  0.046963538974523544,
  0.03040660358965397,
  -0.035603415220975876,
  -0.057690054178237915,
  0.05563271790742874,
  -0.024714937433600426,
  0.040872447192668915,
  0.0728103369474411,
  -0.004337641876190901,
  0.010705916211009026,
  0.012321311049163342,
  -0.08437152951955795,
  -0.03895781561732292,
  -0.012011958286166191,
  -0.037018124014139175,
  0.003207421163097024,
  0.07600060850381851,
  0.033895689994096756,
  0.07895711064338684,
  -0.025616414844989777,
  -0.01802067458629608,
  0.01912405900657177,
  0.023786017671227455,
  -0.057602934539318085,
  0.01292523555457592,
  0.07224418222904205,
  -0.036136843264102936,
  0.034909993410110474,
  0.0689869299530983,
  0.0817037969827652,
  0.05590958520770073,
  0.07321035861968994,
  -0.0015631247078999877,
  0.06098972633481026,
  0.02068469673395157,
  0.03279160335659981,
  0.025676729157567024,
  -0.006628983188420534,
  -0.1101616844534874

In [7]:
print(len(query_result))

1


In [8]:
doc_result = embeddings.embed_documents([text, "This is not a test document."])
doc_result
# It is a (2 x 384) output. (Multiple Vectors Output)


[[-0.03786027431488037,
  0.11697696149349213,
  -0.03540470823645592,
  0.04696355760097504,
  0.03040657378733158,
  -0.035603392869234085,
  -0.05769006535410881,
  0.055632710456848145,
  -0.0247149970382452,
  0.040872447192668915,
  0.0728103369474411,
  -0.0043376232497394085,
  0.010705905966460705,
  0.01232129242271185,
  -0.08437155187129974,
  -0.03895784541964531,
  -0.012011932209134102,
  -0.037018101662397385,
  0.0032073950860649347,
  0.07600057870149612,
  0.033895671367645264,
  0.07895711809396744,
  -0.025616368278861046,
  -0.018020659685134888,
  0.019124062731862068,
  0.023785971105098724,
  -0.05760296434164047,
  0.012925248593091965,
  0.07224420458078384,
  -0.03613685816526413,
  0.03491004556417465,
  0.06898696720600128,
  0.08170382678508759,
  0.055909570306539536,
  0.07321035861968994,
  -0.0015630784910172224,
  0.060989733785390854,
  0.020684685558080673,
  0.0327916145324707,
  0.025676751509308815,
  -0.006628991570323706,
  -0.1101616695523262

In [9]:
doc_result[1]

[-0.018281688913702965,
 0.11222250014543533,
 -0.014233234338462353,
 0.023113178089261055,
 0.018091244623064995,
 -0.06463789939880371,
 -0.09148522466421127,
 0.01180267333984375,
 -0.01598641835153103,
 0.0723106786608696,
 0.08414015173912048,
 0.02900099940598011,
 -0.01847774349153042,
 -0.026501813903450966,
 -0.06828758865594864,
 -0.030838482081890106,
 0.0023050811141729355,
 -0.04135826230049133,
 0.04280933737754822,
 0.11545827239751816,
 0.05841617286205292,
 0.04131651669740677,
 0.06467802077531815,
 0.0003132781130261719,
 0.033074114471673965,
 0.03075045719742775,
 -0.054606977850198746,
 0.03554137051105499,
 0.012300824746489525,
 -0.0634215697646141,
 0.01733584702014923,
 0.08036100119352341,
 0.017498211935162544,
 0.02135596238076687,
 0.10046585649251938,
 -0.008925318717956543,
 0.08060932159423828,
 0.011547681875526905,
 0.04908854886889458,
 0.039257727563381195,
 -0.00905328243970871,
 -0.10785306245088577,
 -0.020152850076556206,
 0.03695908933877945,


In [10]:
len(doc_result) #2, since we embedded 2 documents.

2

In [11]:
print(len(doc_result[0]))
print(len(doc_result[1]))

384
384


#### So, in a RAG pipeline:

##### 1. During data ingestion, you call embed_documents() → store vectors in a vector DB.

##### 2. During query time, you call embed_query() → compare that single query vector with stored document vectors to find nearest matches.

1. embed_documents() → indexing
2. embed_query() → retrieval

## Example

In [12]:
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

In [13]:
query_vec = np.array(query_result).reshape(1, -1)    # Convert 1D embedding to 2D array.
docs_vecs = np.array(doc_result)                     # Convert list of doc embeddings to 2D matrix

similarity = cosine_similarity(query_vec, docs_vecs)

print(similarity)


[[1.         0.84902753]]


In [14]:
query_vec.shape

(1, 384)

In [15]:
docs_vecs.shape

(2, 384)

## Example-2

In [16]:

# Example embeddings

query_emb1 = embeddings.embed_query("What is AI?")

doc_embs1 = embeddings.embed_documents([
    "What is AI?",
    "Artificial Intelligent?",

    "What is Cricket?",
    
    "AI stands for Artificial Intelligence.",
    
    "Bananas are not blue in color."
])

# Compute cosine similarity
scores = cosine_similarity([query_emb1], doc_embs1)

print(scores)


[[1.         0.65027912 0.25649966 0.87556417 0.08138527]]
