#### Embedding Techniques Using HuggingFace


In [1]:
import os
from dotenv import load_dotenv

load_dotenv()

OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
LANGSMITH_API_KEY = os.getenv("LANGSMITH_API_KEY")
LANGSMITH_ENDPOINT = os.getenv("LANGSMITH_ENDPOINT")
LANGSMITH_PROJECT = os.getenv("LANGSMITH_PROJECT")
LANGSMITH_TRACING = os.getenv("LANGSMITH_TRACING", "true")
HF_TOKEN = os.getenv("HF_TOKEN")
GROQ_API_KEY = os.getenv("GROQ_API_KEY")
TAVILY_API_KEY = os.getenv("TAVILY_API_KEY")

#### Sentence Transformers on Hugging Face

Hugging Face sentence-transformers is a Python framework for state-of-the-art sentence, text and image embeddings. One of the embedding models is used in the HuggingFaceEmbeddings class. We have also added an alias for SentenceTransformerEmbeddings for users who are more familiar with directly using that package.


In [2]:
from langchain_huggingface import HuggingFaceEmbeddings

embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
text = "this is atest documents"
query_result = embeddings.embed_query(text)
query_result

[-0.04311221092939377,
 0.13562119007110596,
 0.022339938208460808,
 0.00721672922372818,
 0.03421058505773544,
 0.024034056812524796,
 -0.024848803877830505,
 0.04566733539104462,
 0.01885056309401989,
 0.04899343103170395,
 -0.004306023009121418,
 0.05968935415148735,
 0.002952256705611944,
 -0.05999084189534187,
 -0.11980380117893219,
 -0.005690651945769787,
 -0.02096848003566265,
 0.009721233509480953,
 0.04023448005318642,
 0.05047000199556351,
 -0.0021607629023492336,
 0.09888076782226562,
 0.021964700892567635,
 -0.058519911020994186,
 0.029561879113316536,
 0.00411768676713109,
 -0.09333004057407379,
 -0.04305519163608551,
 0.0696839839220047,
 -0.04684080183506012,
 0.04395323991775513,
 0.010073358193039894,
 0.09620824456214905,
 0.027930229902267456,
 0.07333722710609436,
 -0.012976894155144691,
 0.0761367455124855,
 -0.011923196725547314,
 0.011215245351195335,
 -0.008163190446794033,
 -0.010897384956479073,
 -0.07058070600032806,
 -0.027595993131399155,
 -0.00615307781845

In [4]:
len(query_result)

384

In [5]:
doc_result = embeddings.embed_documents([text, "This is not a test document."])
doc_result[0]

[-0.04311221092939377,
 0.13562121987342834,
 0.0223399605602026,
 0.007216694764792919,
 0.03421052172780037,
 0.024034079164266586,
 -0.024848833680152893,
 0.04566733539104462,
 0.01885056681931019,
 0.048993371427059174,
 -0.004305948503315449,
 0.059689320623874664,
 0.0029523177072405815,
 -0.05999079719185829,
 -0.11980383843183517,
 -0.005690652411431074,
 -0.020968396216630936,
 0.009721244685351849,
 0.04023452475667,
 0.05046999454498291,
 -0.002160744508728385,
 0.098880834877491,
 0.02196468971669674,
 -0.05851993337273598,
 0.029561955481767654,
 0.004117709118872881,
 -0.093330018222332,
 -0.0430552139878273,
 0.06968405842781067,
 -0.04684080183506012,
 0.043953269720077515,
 0.010073449462652206,
 0.09620821475982666,
 0.027930257841944695,
 0.07333717495203018,
 -0.012976775877177715,
 0.07613668590784073,
 -0.011923224665224552,
 0.011215230450034142,
 -0.008163176476955414,
 -0.010897442698478699,
 -0.07058070600032806,
 -0.027596022933721542,
 -0.006153053604066372