In [1]:
from dotenv import load_dotenv
import os

load_dotenv()  # Load environment variables from .env file
os.environ["HF_TOKEN"]=os.getenv("HF_TOKEN")

# Sentence Transformers 

- is a Python framework for generating state-of-the-art sentence, text, and image embeddings
- Developed by SBERT (Sentence-BERT), it's designed to map sentences and paragraphs to dense vector representations in a high-dimensional space
*  Here are some key points:
- Embedding Generation: It converts sentences into embeddings, which can be used for tasks like semantic search, clustering, and retrieval
- Model Availability: Hugging Face hosts a variety of pre-trained Sentence Transformer models, such as paraphrase-MiniLM-L6-v2 and all-MiniLM-L6-v2
- Ease of Use: You can easily install and use these models with the sentence-transformers library4
- Applications: Common applications include sentence similarity, text classification, and information retrieval

In [1]:
from langchain_huggingface import HuggingFaceEmbeddings

In [6]:
embeddings=HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

  from tqdm.autonotebook import tqdm, trange


In [8]:
# example on embedding using huggingface 
text_example="This is example on embedding using hugging face"
query_result=embeddings.embed_query(text_example)
query_result

[-0.025797102600336075,
 0.015364105813205242,
 0.01567201130092144,
 -0.0015951163368299603,
 0.01715136505663395,
 0.010118387639522552,
 -0.023816300556063652,
 -0.06025158613920212,
 -0.010870803147554398,
 -0.04353555664420128,
 0.07643440365791321,
 -0.008433958515524864,
 0.018362822011113167,
 0.04921623691916466,
 0.025185607373714447,
 0.03750022128224373,
 -0.008206035941839218,
 0.11035458743572235,
 -0.025629611685872078,
 0.05437533184885979,
 -0.044469546526670456,
 -0.04117857664823532,
 0.04544368386268616,
 -0.08982104063034058,
 0.002320449333637953,
 -0.07025360316038132,
 -0.0009970295941457152,
 0.023723352700471878,
 0.1794537603855133,
 -0.0505865104496479,
 0.0085959667339921,
 -0.07139820605516434,
 -0.007565204054117203,
 0.06324577331542969,
 -0.09763719886541367,
 0.08820395171642303,
 -0.03507566452026367,
 0.10294324904680252,
 -0.1162930279970169,
 0.014033078216016293,
 -0.045084841549396515,
 0.03748387098312378,
 0.01846240647137165,
 -0.0054486356675

In [9]:
len(query_result)

384

In [10]:
query_result[0]

-0.025797102600336075

In [12]:
embeddings.embed_documents([text_example])

[[-0.025797102600336075,
  0.015364105813205242,
  0.01567201130092144,
  -0.0015951163368299603,
  0.01715136505663395,
  0.010118387639522552,
  -0.023816300556063652,
  -0.06025158613920212,
  -0.010870803147554398,
  -0.04353555664420128,
  0.07643440365791321,
  -0.008433958515524864,
  0.018362822011113167,
  0.04921623691916466,
  0.025185607373714447,
  0.03750022128224373,
  -0.008206035941839218,
  0.11035458743572235,
  -0.025629611685872078,
  0.05437533184885979,
  -0.044469546526670456,
  -0.04117857664823532,
  0.04544368386268616,
  -0.08982104063034058,
  0.002320449333637953,
  -0.07025360316038132,
  -0.0009970295941457152,
  0.023723352700471878,
  0.1794537603855133,
  -0.0505865104496479,
  0.0085959667339921,
  -0.07139820605516434,
  -0.007565204054117203,
  0.06324577331542969,
  -0.09763719886541367,
  0.08820395171642303,
  -0.03507566452026367,
  0.10294324904680252,
  -0.1162930279970169,
  0.014033078216016293,
  -0.045084841549396515,
  0.0374838709831237