# Embedding function

## Method 01: ollama

In [None]:
%pip install -qU langchain-ollama

In [None]:
from langchain_ollama import OllamaEmbeddings

# need to download the model first
# ollama pull llama3
# 同样支持其他模型，如 Qwen2.5:14b mxbai-embed-large

embeddings = OllamaEmbeddings(
    model="Qwen2.5:14b",
)

In [None]:
text = "LangChain is the framework for building context-aware reasoning applications"
single_vector = embeddings.embed_query(text)
print(str(single_vector)[:100])  # Show the first 100 characters of the vector

In [None]:
text2 = (
    "LangGraph is a library for building stateful, multi-actor applications with LLMs"
)
two_vectors = embeddings.embed_documents([text, text2])
for vector in two_vectors:
    print(str(vector)[:100])  # Show the first 100 characters of the vector

In [None]:
# Create a vector store with a sample text
from langchain_core.vectorstores import InMemoryVectorStore

text = "LangChain is the framework for building context-aware reasoning applications"

vectorstore = InMemoryVectorStore.from_texts(
    [text],
    embedding=embeddings,
)

# Use the vectorstore as a retriever
retriever = vectorstore.as_retriever()

# Retrieve the most similar text
retrieved_documents = retriever.invoke("What is LangChain?")

# show the retrieved document's content
retrieved_documents[0].page_content

In [1]:
import ollama
import chromadb

documents = [
  "Llamas are members of the camelid family meaning they're pretty closely related to vicuñas and camels",
  "Llamas were first domesticated and used as pack animals 4,000 to 5,000 years ago in the Peruvian highlands",
  "Llamas can grow as much as 6 feet tall though the average llama between 5 feet 6 inches and 5 feet 9 inches tall",
  "Llamas weigh between 280 and 450 pounds and can carry 25 to 30 percent of their body weight",
  "Llamas are vegetarians and have very efficient digestive systems",
  "Llamas live to be about 20 years old, though some only live for 15 years and others live to be 30 years old",
]

client = chromadb.Client()
collection = client.create_collection(name="docs3")

# store each document in a vector embedding database
for i, d in enumerate(documents):
  response = ollama.embed(model="Qwen2.5:14b", input=d)
  embeddings = response["embeddings"]
  collection.add(
    ids=[str(i)],
    embeddings=embeddings,
    documents=[d]
  )

In [5]:
# an example input
input = "What animals are llamas related to?"

# generate an embedding for the input and retrieve the most relevant doc
response = ollama.embed(
  model="Qwen2.5:14b",
  input=input
)
results = collection.query(
  query_embeddings=response["embeddings"],
  n_results=1
)
data = results['documents'][0][0]

In [7]:
# generate a response combining the prompt and data we retrieved in step 2
output = ollama.generate(
  model="Qwen2.5:14b",
  prompt=f"Using this data: {data}. Respond to this prompt: {input}"
)

print(output['response'])

Llamas are part of the Camelidae family, which includes camels, alpacas, guanacos, and vicuñas. They share a common ancestor with these animals and have similar characteristics despite some variations in their habitats and physical appearances. So while they might not look like it at first glance, llamas are closely related to camels as well as the other South American camelids mentioned.


## Method 02: Hugging Face & LangChain

In [None]:
%pip install -U sentence-transformers


### 参考笔记下载模型到本地

huggingface-cli download --resume-download Qwen/Qwen2.5-0.5B-Instruct --local-dir models/Qwen/Qwen2.5-0.5B-Instruct  
huggingface-cli download --resume-download sentence-transformers/all-MiniLM-L6-v2 --local-dir models/sentence-transformers/all-MiniLM-L6-v2


#### sentence_transformers

In [9]:
from sentence_transformers import SentenceTransformer
sentences = ["This is an example sentence", "Each sentence is converted"]

model_name = 'models/sentence-transformers/all-MiniLM-L6-v2'

model = SentenceTransformer(model_name)
embeddings = model.encode(sentences)
print(embeddings)

  from .autonotebook import tqdm as notebook_tqdm


[[ 6.76568523e-02  6.34958521e-02  4.87130806e-02  7.93049783e-02
   3.74480374e-02  2.65278365e-03  3.93749177e-02 -7.09843170e-03
   5.93614466e-02  3.15370150e-02  6.00980893e-02 -5.29051945e-02
   4.06067483e-02 -2.59308573e-02  2.98428051e-02  1.12687412e-03
   7.35149607e-02 -5.03819585e-02 -1.22386590e-01  2.37028394e-02
   2.97265165e-02  4.24768887e-02  2.56337821e-02  1.99519587e-03
  -5.69190867e-02 -2.71598585e-02 -3.29035372e-02  6.60248846e-02
   1.19007148e-01 -4.58791740e-02 -7.26215243e-02 -3.25839706e-02
   5.23413867e-02  4.50552888e-02  8.25298112e-03  3.67024168e-02
  -1.39415041e-02  6.53919354e-02 -2.64272988e-02  2.06364246e-04
  -1.36643481e-02 -3.62809524e-02 -1.95043795e-02 -2.89738458e-02
   3.94270942e-02 -8.84091258e-02  2.62425095e-03  1.36713823e-02
   4.83062975e-02 -3.11565585e-02 -1.17329188e-01 -5.11690527e-02
  -8.85287523e-02 -2.18961928e-02  1.42986625e-02  4.44167890e-02
  -1.34814931e-02  7.43392482e-02  2.66382918e-02 -1.98762193e-02
   1.79191

#### langchain_huggingface

In [11]:
from langchain_huggingface import HuggingFaceEmbeddings


model_kwargs = {'device': 'cpu'}
encode_kwargs = {'normalize_embeddings': False}
embedding = HuggingFaceEmbeddings(
    model_name=model_name,
    model_kwargs=model_kwargs,
    encode_kwargs=encode_kwargs
)

# Use the embedding instance
documents = ["foo bar"]
output = embedding.embed_documents(documents)

In [12]:
print(output)

[[-0.0017989963525906205, -0.013487111777067184, 0.02420108951628208, -0.009858008474111557, -0.09280230849981308, 0.005080039147287607, 0.13498304784297943, 0.005321366246789694, 0.04112976789474487, -0.04029317572712898, 0.015876546502113342, -0.025389710441231728, 0.01163634192198515, -0.050170429050922394, 0.0218398068100214, 0.0593220517039299, -0.010649238713085651, -0.013808458112180233, -0.033850785344839096, -0.03034793771803379, -0.053076982498168945, 0.09256669133901596, -0.05047602578997612, 0.0027815590146929026, -0.036567479372024536, 0.017954794690012932, -0.07646935433149338, 0.004213039763271809, 0.030650123953819275, -0.06343981623649597, 0.06149103865027428, 0.1092502772808075, 0.051992982625961304, -0.01656285673379898, -0.0678321123123169, -0.03733935207128525, 0.04910312220454216, 0.046153414994478226, 0.06622489541769028, 0.055696383118629456, -0.04327422007918358, -0.03246814385056496, -0.03804902359843254, -0.009681589901447296, 0.016374045982956886, 0.05635270

#### HF transformers

In [13]:
from transformers import AutoTokenizer, AutoModel

# Load the tokenizer and model
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModel.from_pretrained(model_name)



In [14]:
import torch

# Function to get embeddings
def get_embeddings(text):
    inputs = tokenizer(text, return_tensors='pt')
    with torch.no_grad():
        outputs = model(**inputs)
    return outputs.last_hidden_state.mean(dim=1)

# Example usage
text = "This is an example sentence."
embedding = get_embeddings(text)
print(embedding)

tensor([[ 4.6800e-01,  3.2343e-01,  2.9820e-01,  4.5350e-01,  1.7479e-01,
         -1.9004e-02,  3.5664e-02, -6.3107e-02,  2.9992e-01,  1.0729e-01,
          3.4672e-01, -1.4916e-01,  2.2109e-01, -5.9878e-02,  2.2805e-01,
         -2.3420e-02,  2.3570e-01, -3.0576e-01, -4.6247e-01,  1.5686e-01,
          2.5805e-01,  1.6850e-01,  1.5763e-01,  7.0108e-02, -1.5944e-01,
         -1.2217e-01, -2.4225e-01,  3.4938e-01,  5.2594e-01, -1.4147e-01,
         -3.2221e-01, -1.4581e-01,  1.8868e-01,  2.1689e-01,  7.6292e-02,
          1.8386e-01, -5.2244e-02,  4.0462e-01, -2.1122e-01, -3.2414e-02,
          4.4955e-02,  2.4184e-04,  6.2174e-03, -5.7089e-02,  6.5080e-02,
         -4.0146e-01, -7.8740e-04,  2.6154e-02,  1.2217e-01, -1.5045e-01,
         -5.1197e-01, -2.1838e-01, -4.3485e-01, -1.1974e-02,  8.5842e-02,
          2.3562e-01,  2.9498e-02,  2.8519e-01,  1.2879e-01, -7.6894e-02,
         -8.6563e-02, -1.1272e-01, -4.5261e-01,  3.1581e-01,  7.1173e-01,
          1.1608e-01,  5.7722e-03,  2.