<a href="https://colab.research.google.com/github/vaishnavimaramreddy/ERP-MANAGEMENT-SYSTEM/blob/main/ChromaDB_and_Metadata.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#chromadb and metadata
# vaishnavi


In [5]:
# https://cookbook.chromadb.dev/core/filters
!pip install chromadb

Collecting chromadb
  Downloading chromadb-0.6.3-py3-none-any.whl.metadata (6.8 kB)
Collecting build>=1.0.3 (from chromadb)
  Downloading build-1.2.2.post1-py3-none-any.whl.metadata (6.5 kB)
Collecting chroma-hnswlib==0.7.6 (from chromadb)
  Downloading chroma_hnswlib-0.7.6-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (252 bytes)
Collecting fastapi>=0.95.2 (from chromadb)
  Downloading fastapi-0.115.11-py3-none-any.whl.metadata (27 kB)
Collecting uvicorn>=0.18.3 (from uvicorn[standard]>=0.18.3->chromadb)
  Downloading uvicorn-0.34.0-py3-none-any.whl.metadata (6.5 kB)
Collecting posthog>=2.4.0 (from chromadb)
  Downloading posthog-3.21.0-py2.py3-none-any.whl.metadata (2.9 kB)
Collecting onnxruntime>=1.14.1 (from chromadb)
  Downloading onnxruntime-1.21.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (4.5 kB)
Collecting opentelemetry-exporter-otlp-proto-grpc>=1.2.0 (from chromadb)
  Downloading opentelemetry_exporter_otlp_proto_grpc-1.31.1-py

In [13]:
import chromadb
from chromadb.utils import embedding_functions
from langchain.schema import Document
import uuid

docs = [
    Document(
        page_content="Warm, sweet spice with notes of clove and nutmeg, perfect for desserts and chai.",
        metadata={"name": "Cinnamon", "flavor_profile": "sweet, warm, spicy", "use": "desserts, beverages", "origin": "Sri Lanka"},
    ),
    Document(
        page_content="Pungent, earthy spice with a strong aroma, essential in savory dishes and garam masala.",
        metadata={"name": "Cumin", "flavor_profile": "earthy, pungent, warm", "use": "curries, stews, spice blends", "origin": "Middle East"},
    ),
    Document(
        page_content="Fiery, bright yellow spice with a peppery bite, adds color and depth to curries.",
        metadata={"name": "Turmeric", "flavor_profile": "earthy, bitter, peppery", "use": "curries, rice, health drinks", "origin": "India"},
    ),
    Document(
        page_content="Aromatic, sweet spice with a cooling sensation, used in both sweet and savory dishes.",
        metadata={"name": "Cardamom", "flavor_profile": "sweet, aromatic, cooling", "use": "desserts, beverages, curries", "origin": "India"},
    ),
    Document(
        page_content="Intensely pungent spice with a sharp, biting flavor, a key ingredient in many curries.",
        metadata={"name": "Ginger", "flavor_profile": "pungent, spicy, warm", "use": "curries, stir-fries, teas", "origin": "Southeast Asia"},
    ),
    Document(
        page_content="Hot, fiery spice that adds a strong kick to dishes, available in various colors and intensities.",
        metadata={"name": "Chili Pepper", "flavor_profile": "hot, fiery, pungent", "use": "curries, pickles, sauces", "origin": "Americas"},
    ),
    Document(
        page_content="Sweet, warm spice with a nutty flavor, often used in sweet dishes and garam masala.",
        metadata={"name": "Nutmeg", "flavor_profile": "sweet, nutty, warm", "use": "desserts, beverages, spice blends", "origin": "Indonesia"},
    ),
    Document(
        page_content="Aromatic, floral spice with a slightly bitter taste, used in spice blends and biryanis.",
        metadata={"name": "Saffron", "flavor_profile": "floral, earthy, slightly bitter", "use": "rice dishes, desserts, beverages", "origin": "Iran"},
    ),
    Document(
        page_content="Small, pungent seeds with a sharp, peppery flavor, used in tempering and pickles.",
        metadata={"name": "Mustard Seeds", "flavor_profile": "pungent, sharp, peppery", "use": "tempering, pickles, curries", "origin": "Mediterranean"},
    ),
    Document(
        page_content="Aromatic, slightly bitter leaves with a strong, distinct flavor, used in curries and chutneys.",
        metadata={"name": "Fenugreek Leaves", "flavor_profile": "bitter, aromatic, strong", "use": "curries, breads, spice blends", "origin": "Mediterranean"},
    ),
]

# Extract data
document_texts = [doc.page_content for doc in docs]
document_metadatas = [doc.metadata for doc in docs]
document_ids = [str(uuid.uuid4()) for _ in range(len(docs))]  # Generate UUIDs

collection_name = "spice_collection"
client = chromadb.Client()

try:
    client.delete_collection(name=collection_name)
    print(f"Collection '{collection_name}' deleted successfully.")
except ValueError:
    print(f"Collection '{collection_name}' does not exist.")
except Exception as e:
    print(f"An error occurred: {e}")

collection = client.create_collection(
    name="spice_collection",
    embedding_function=embedding_functions.SentenceTransformerEmbeddingFunction(
        model_name="all-MiniLM-L6-v2"
    ),
)

# Add documents to ChromaDB
collection.add(documents=document_texts, metadatas=document_metadatas, ids=document_ids)

# Example query
results = collection.query(query_texts=["What is the origin of Nutmeg"], n_results=2)
print(results)

Collection 'spice_collection' deleted successfully.
{'ids': [['ce0e07be-0764-40f6-9ab3-0d2d6b5a3465', 'd7961940-0169-4baa-95c4-7bcc6fc9693a']], 'embeddings': None, 'documents': [['Small, pungent seeds with a sharp, peppery flavor, used in tempering and pickles.', 'Warm, sweet spice with notes of clove and nutmeg, perfect for desserts and chai.']], 'uris': None, 'data': None, 'metadatas': [[{'flavor_profile': 'pungent, sharp, peppery', 'name': 'Mustard Seeds', 'origin': 'Mediterranean', 'use': 'tempering, pickles, curries'}, {'flavor_profile': 'sweet, warm, spicy', 'name': 'Cinnamon', 'origin': 'Sri Lanka', 'use': 'desserts, beverages'}]], 'distances': [[1.2257800102233887, 1.3819797039031982]], 'included': [<IncludeEnum.distances: 'distances'>, <IncludeEnum.documents: 'documents'>, <IncludeEnum.metadatas: 'metadatas'>]}


In [14]:
for key,value in results.items():
  print(key,value)
  print(""*25)

ids [['ce0e07be-0764-40f6-9ab3-0d2d6b5a3465', 'd7961940-0169-4baa-95c4-7bcc6fc9693a']]

embeddings None

documents [['Small, pungent seeds with a sharp, peppery flavor, used in tempering and pickles.', 'Warm, sweet spice with notes of clove and nutmeg, perfect for desserts and chai.']]

uris None

data None

metadatas [[{'flavor_profile': 'pungent, sharp, peppery', 'name': 'Mustard Seeds', 'origin': 'Mediterranean', 'use': 'tempering, pickles, curries'}, {'flavor_profile': 'sweet, warm, spicy', 'name': 'Cinnamon', 'origin': 'Sri Lanka', 'use': 'desserts, beverages'}]]

distances [[1.2257800102233887, 1.3819797039031982]]

included [<IncludeEnum.distances: 'distances'>, <IncludeEnum.documents: 'documents'>, <IncludeEnum.metadatas: 'metadatas'>]



In [17]:
for meta in results["metadatas"][0]:
  print(meta["name"])

Mustard Seeds
Cinnamon


In [18]:
results = collection.query(
    query_texts=["I want a spice"],
    n_results=2,
    where={"use": "curries, breads, spice blends"}
)

results

{'ids': [['e613e035-276c-43a8-9bdb-063030d7f8ac']],
 'embeddings': None,
 'documents': [['Aromatic, slightly bitter leaves with a strong, distinct flavor, used in curries and chutneys.']],
 'uris': None,
 'data': None,
 'metadatas': [[{'flavor_profile': 'bitter, aromatic, strong',
    'name': 'Fenugreek Leaves',
    'origin': 'Mediterranean',
    'use': 'curries, breads, spice blends'}]],
 'distances': [[1.184003233909607]],
 'included': [<IncludeEnum.distances: 'distances'>,
  <IncludeEnum.documents: 'documents'>,
  <IncludeEnum.metadatas: 'metadatas'>]}

In [8]:
results = collection.query(
    query_texts=["a spice of delight"],
    n_results=2,
    where_document={"$contains": "chutney"}
)

results

{'ids': [['47408754-0fdd-4909-b547-28aec08fd030']],
 'embeddings': None,
 'documents': [['Aromatic, slightly bitter leaves with a strong, distinct flavor, used in curries and chutneys.']],
 'uris': None,
 'data': None,
 'metadatas': [[{'flavor_profile': 'bitter, aromatic, strong',
    'name': 'Fenugreek Leaves',
    'origin': 'Mediterranean',
    'use': 'curries, breads, spice blends'}]],
 'distances': [[0.9449310302734375]],
 'included': [<IncludeEnum.distances: 'distances'>,
  <IncludeEnum.documents: 'documents'>,
  <IncludeEnum.metadatas: 'metadatas'>]}

In [20]:
def filter_by_metadata_contains(collection, query_texts, metadata_field, search_term, n_results=10):
    """
    Filters a ChromaDB collection based on whether a specified metadata field contains a search term.
    """
    results = collection.query(query_texts=query_texts, n_results=n_results)
    filtered_results = {
        "ids": [],
        "documents": [],
        "metadatas": [],
        "distances": []
    }

    for i, metadata in enumerate(results["metadatas"][0]):
        if metadata_field in metadata and search_term in metadata[metadata_field]:
            filtered_results["ids"].append(results["ids"][0][i])
            filtered_results["documents"].append(results["documents"][0][i])
            filtered_results["metadatas"].append(metadata)
            filtered_results["distances"].append(results["distances"][0][i])

    # Ensure results are in the same format as the original query.
    return {
        "ids": [filtered_results["ids"]],
        "documents": [filtered_results["documents"]],
        "metadatas": [filtered_results["metadatas"]],
        "distances": [filtered_results["distances"]],
    }



In [21]:
# Example usage: Find documents with metadata "tags" containing "fruit"
query_texts = ["Find a spice delight"]
metadata_field = "use"
search_term = "bread"
filtered_results = filter_by_metadata_contains(collection, query_texts, metadata_field, search_term)
id = filtered_results["ids"][0][0]
print("Documents with 'bread' in use:", filtered_results["ids"])

collection.get(ids=[id])

Documents with 'bread' in use: [['e613e035-276c-43a8-9bdb-063030d7f8ac']]


{'ids': ['e613e035-276c-43a8-9bdb-063030d7f8ac'],
 'embeddings': None,
 'documents': ['Aromatic, slightly bitter leaves with a strong, distinct flavor, used in curries and chutneys.'],
 'uris': None,
 'data': None,
 'metadatas': [{'flavor_profile': 'bitter, aromatic, strong',
   'name': 'Fenugreek Leaves',
   'origin': 'Mediterranean',
   'use': 'curries, breads, spice blends'}],
 'included': [<IncludeEnum.documents: 'documents'>,
  <IncludeEnum.metadatas: 'metadatas'>]}