In [2]:
from langchain_elasticsearch import ElasticsearchStore
from langchain_community.embeddings.ollama import OllamaEmbeddings

embedding = OllamaEmbeddings(model="nomic-embed-text")
elastic_vector_search = ElasticsearchStore(
    es_url="http://localhost:9200",
    index_name="test_index",
    embedding=embedding,
    # es_user="elastic",
    # es_password="changeme",
)

In [3]:
from langchain_community.document_loaders import TextLoader
from langchain_text_splitters import CharacterTextSplitter

loader = TextLoader("../data/state_of_the_union.txt")
documents = loader.load()
text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=0)
docs = text_splitter.split_documents(documents)

In [22]:
db = ElasticsearchStore.from_documents(
    docs,
    embedding,
    es_url="http://localhost:9200",
    index_name="test-basic",
)
db.client.indices.refresh(index="test-basic")

query = "What did the president say about Ketanji Brown Jackson"
results = db.similarity_search(query)
print(results)

# db.client.indices.delete(index="test-basic")
db.client.indices.get(index="test-basic")

[Document(page_content='One of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \n\nAnd I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence.', metadata={'source': 'data/state_of_the_union.txt', 'date': '2016-01-01', 'rating': 2, 'author': 'John Doe'}), Document(page_content='One of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \n\nAnd I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence.', metadata={'source': 'data/state_of_the_union.txt', 'date': '2016-01-01', 'rating': 2, 'author': 'John Doe'}), Document(page_content='One of the most serious constitution

ObjectApiResponse({'test-basic': {'aliases': {}, 'mappings': {'properties': {'metadata': {'properties': {'author': {'type': 'text', 'fields': {'keyword': {'type': 'keyword', 'ignore_above': 256}}}, 'date': {'type': 'date'}, 'rating': {'type': 'long'}, 'source': {'type': 'text', 'fields': {'keyword': {'type': 'keyword', 'ignore_above': 256}}}}}, 'text': {'type': 'text', 'fields': {'keyword': {'type': 'keyword', 'ignore_above': 256}}}, 'vector': {'type': 'dense_vector', 'dims': 768, 'index': True, 'similarity': 'cosine'}}}, 'settings': {'index': {'routing': {'allocation': {'include': {'_tier_preference': 'data_content'}}}, 'number_of_shards': '1', 'provided_name': 'test-basic', 'creation_date': '1714329725498', 'number_of_replicas': '1', 'uuid': 'V-xI6sQgQkuuk4E04_ye0A', 'version': {'created': '8503000'}}}}})

## Metadata

In [5]:
# Adding metadata to documents
for i, doc in enumerate(docs):
    doc.metadata["date"] = f"{range(2010, 2020)[i % 10]}-01-01"
    doc.metadata["rating"] = range(1, 6)[i % 5]
    doc.metadata["author"] = ["John Doe", "Jane Doe"][i % 2]

db = ElasticsearchStore.from_documents(
    docs, embedding, es_url="http://localhost:9200", index_name="test-metadata"
)
db.client.indices.refresh(index="test-metadata")


query = "What did the president say about Ketanji Brown Jackson"
docs = db.similarity_search(query)
print(docs[0].metadata)

{'source': 'data/state_of_the_union.txt', 'date': '2010-01-01', 'rating': 1, 'author': 'John Doe'}


In [6]:
docs = db.similarity_search(
    query, filter=[{"term": {"metadata.author.keyword": "John Doe"}}]
)
print(docs[0].metadata)

{'source': 'data/state_of_the_union.txt', 'date': '2010-01-01', 'rating': 1, 'author': 'John Doe'}


In [7]:
docs = db.similarity_search(
    query,
    filter=[{"match": {"metadata.author": {"query": "Jon", "fuzziness": "AUTO"}}}],
)
print(docs[0].metadata)

{'source': 'data/state_of_the_union.txt', 'date': '2010-01-01', 'rating': 1, 'author': 'John Doe'}


In [8]:
docs = db.similarity_search(
    "Any mention about Fred?",
    filter=[{"range": {"metadata.date": {"gte": "2010-01-01"}}}],
)
print(docs[0].metadata)

{'source': 'data/state_of_the_union.txt', 'date': '2019-01-01', 'rating': 5, 'author': 'Jane Doe'}


In [9]:
docs = db.similarity_search(
    "Any mention about Fred?", filter=[{"range": {"metadata.rating": {"gte": 2}}}]
)
print(docs[0].metadata)

{'source': 'data/state_of_the_union.txt', 'date': '2019-01-01', 'rating': 5, 'author': 'Jane Doe'}


## Chain

In [19]:
from elasticsearch import Elasticsearch

db = Elasticsearch(
    hosts="http://localhost:9200",
)

customers = [
    {"firstname": "Jennifer", "lastname": "Walters"},
    {"firstname": "Monica","lastname":"Rambeau"},
    {"firstname": "Carol","lastname":"Danvers"},
    {"firstname": "Wanda","lastname":"Maximoff"},
    {"firstname": "Jennifer","lastname":"Takeda"},
]
for i, customer in enumerate(customers):
    db.create(index="test-customers", document=customer, id=i)

In [20]:
from langchain.chains.elasticsearch_database import ElasticsearchDatabaseChain
from langchain.llms.ollama import Ollama

llm = Ollama(model="dolphin-mistral")
chain = ElasticsearchDatabaseChain.from_llm(llm=llm, database=db, verbose=True)

In [21]:
question = "What are the first names of all the customers?"
chain.run(question)



[1m> Entering new ElasticsearchDatabaseChain chain...[0m
What are the first names of all the customers?
ESQuery:[32;1m[1;3m{'query': {'match': {'firstname': 'Jennifer'}}, 'aggs': {'customer_firstnames': {'terms': {'field': 'firstname.keyword', 'size': 10, 'order': {'_key': 'asc'}}}}}[0m
ESResult: [33;1m[1;3m{'took': 3, 'timed_out': False, '_shards': {'total': 4, 'successful': 4, 'skipped': 0, 'failed': 0}, 'hits': {'total': {'value': 4, 'relation': 'eq'}, 'max_score': 0.87546873, 'hits': [{'_index': 'customers', '_id': '0', '_score': 0.87546873, '_source': {'firstname': 'Jennifer', 'lastname': 'Walters'}}, {'_index': 'customers', '_id': '4', '_score': 0.87546873, '_source': {'firstname': 'Jennifer', 'lastname': 'Takeda'}}, {'_index': 'test-customers', '_id': '0', '_score': 0.87546873, '_source': {'firstname': 'Jennifer', 'lastname': 'Walters'}}, {'_index': 'test-customers', '_id': '4', '_score': 0.87546873, '_source': {'firstname': 'Jennifer', 'lastname': 'Takeda'}}]}, 'aggreg

"Question: What are the first names of all the customers?\nData: {'took': 3, 'timed_out': False, '_shards': {'total': 4, 'successful': 4, 'skipped': 0, 'failed': 0}, 'hits': {'total': {'value': 4, 'relation': 'eq'}, 'max_score': 0.87546873, 'hits': [{'_index': 'customers', '_id': '0', '_score': 0.87546873, '_source': {'firstname': 'Jennifer', 'lastname': 'Walters'}}, {'_index': 'customers', '_id': '4', '_score': 0.87546873, '_source': {'firstname': 'Jennifer', 'lastname': 'Takeda'}}, {'_index': 'test-customers', '_id': '0', '_score': 0.87546873, '_source': {'firstname': 'Jennifer', 'lastname': 'Walters'}}, {'_index': 'test-customers', '_id': '4', '_score': 0.87546873, '_source': {'firstname': 'Jennifer', 'lastname': 'Takeda'}}]}, 'aggregations': {'customer_firstnames': {'doc_count_error_upper_bound': 0, 'sum_other_doc_count': 0, 'buckets': [{'key': 'Jennifer', 'doc_count': 4}]}}}\nAnswer: Jennifer"