In [2]:
from typing import Optional, List

from llama_index.core import StorageContext
from llama_index.storage.chat_store.postgres import PostgresChatStore
from llama_index.core.memory import ChatMemoryBuffer
import pandas as pd
from llama_index.experimental.query_engine import PandasQueryEngine
from llama_index.core.tools import QueryEngineTool, ToolMetadata, FunctionTool
from llama_index.core.readers import SimpleDirectoryReader
from llama_index.core.indices import VectorStoreIndex
from llama_index.core.settings import Settings
from llama_index.llms.ollama import Ollama
import nest_asyncio
from llama_index.embeddings.ollama import OllamaEmbedding
from llama_index.core.query_engine import BaseQueryEngine
import uuid
import chromadb

from llama_index.core.agent import ReActAgent
from llama_index.core.prompts import PromptTemplate
from llama_index.vector_stores.chroma import ChromaVectorStore
from pydantic import BaseModel

from llama_index.core.vector_stores import (
    MetadataFilter,
    MetadataFilters,
    FilterOperator,
)

import os

"""
Init for APP
"""
nest_asyncio.apply()
chats = {}
chat_files = {}

# LLM and Embedding settings
llm = Ollama(model="llama3.1")
embed_model = OllamaEmbedding(model_name="nomic-embed-text")
Settings.llm = llm
Settings.embed_model = embed_model
Settings.chunk_size = 512
Settings.chunk_overlap = 50

# vector DB
chroma_client = chromadb.HttpClient()
chroma_collection = chroma_client.get_or_create_collection(os.getenv("CHROMA_COLLECTION_NAME"))
chroma_vector_store = ChromaVectorStore(chroma_collection=chroma_collection)

# postgres DB
chat_store = PostgresChatStore.from_uri(
    uri="postgresql+asyncpg://postgres:password@127.0.0.1:5432/llama-rag",
)

class PandasTool(BaseModel):
    pandas_query_engine: PandasQueryEngine
    file_name: str
    async def apandas_tool(self, query: str):
        """Executes a query with Pandas and return the string result"""
        try:
            result = await self.pandas_query_engine.aquery(query)
            return str(result.response)  # Ensures only the output is returned
        except Exception as e:
            return f"Error: {str(e)}"

class ChatFile(BaseModel):
    id: str
    user_id: str
    chat_id: str
    file_name: str
    path: str
    mime_type: str

class Chat(BaseModel):
    chat_id: str
    user_id: str
    title: str
    description: str
    context: str
    chat_files: Optional[List[ChatFile]] = None

class ToolsCollection(BaseModel):
    pd_tools: List[PandasTool]
    query_engines: List[BaseQueryEngine]
    files: List[ChatFile]

def index_uploaded_file(chat_file: ChatFile):
    documents = SimpleDirectoryReader(input_files=[chat_file.path]).load_data()
    vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
    storage_context = StorageContext.from_defaults(vector_store=vector_store)

    VectorStoreIndex.from_documents(documents=documents,
                                            storage_context=storage_context,
                                            vector_store=vector_store, show_progress=True, embedding=Settings.embed_model)

def upload_file_to_chat(file, chat_id: str):
    # get chat
    chat: Chat = chats.get(chat_id)
    chat_file_id = uuid.uuid4()
    chat_file = ChatFile(
        id=chat_file_id,
        # TODO: change ID to session or Payload
        user_id=f"", # get the id by the session or payload
        mime_type=file.mime_type,
        file_name=file.file_name,
        path=f"/uploads/{chat_id}/{file.file_name}",
        chat_id=chat_id,
    )
    # set the file to the chat, updates the chat
    # TODO: change to DB
    chat.chat_files.append(file)
    chats[chat_id] = chat
    # save the ChatFile
    # TODO: change to DB storage
    chat_files[chat_file_id] = chat_file
    # async, do something behind the request
    # index the file
    index_uploaded_file(chat_file)
    return chat

def create_chat(title: str, description: str, context: str):
    """
    When the user first creates a chat. Create a chat entry.
    :param title:
    :param description:
    :param context:
    :return:
    """
    chat_id = uuid.uuid4()
    # TODO: get the user id later from the session or payload
    chat = Chat(chat_id=str(chat_id), title=title, description=description, context=context, user_id=f"user-id")
    chats[chat_id] = chat
    return chat

def create_filters_for_files(files: List[ChatFile]):
    filters = [
        MetadataFilters(
            filters=[
                MetadataFilter(
                    operator=FilterOperator.EQ,
                    key="file_id",
                    value=file.id,
                )
            ]
        ) for file in files
    ]
    return filters

def get_query_engines_from_filters(filters: List[MetadataFilters]):
    storage_context = StorageContext.from_defaults(vector_store=chroma_vector_store)
    vector_index = VectorStoreIndex.from_vector_store(vector_store=chroma_vector_store, storage_context=storage_context, embed_model=Settings.embed_model)
    query_engines = [
        vector_index.as_query_engine(filters=_filters) for _filters in filters
    ]
    return query_engines

def get_pandas_tools_from_files(files: List[ChatFile]):
    pd_tools = []
    for file in files:
        if file.mime_type == "application/csv":
            pd_tool = PandasTool(pandas_query_engine=PandasQueryEngine(df=pd.read_csv(file.path, verbose=True)), file_name=file.file_name)
            pd_tools.append(pd_tool)
        if file.mime_type == "application/excel":
            pd_tool = PandasTool(pandas_query_engine=PandasQueryEngine(df=pd.read_excel(file.path, verbose=True)), file_name=file.file_name)
            pd_tools.append(pd_tool)
    return pd_tools

def aggregate_tools_from_collection(collection: ToolsCollection):
    tools = [
        QueryEngineTool(
            query_engine=query_engine,
            metadata=ToolMetadata(
                name=f"query_engine_{i}",
                description=f"Queries through the document {collection.files[i].file_name}",
            )
        ) for i, query_engine in enumerate(collection.query_engines)
    ]
    pd_tools = [
        FunctionTool.from_defaults(
            async_fn=pd_tool.apandas_tool,
            name=f"pandas_tool_{[i]}",
            description=f"Pandas query tool for the spreadsheet {pd_tool.file_name}",
        ) for i, pd_tool in enumerate(collection.pd_tools)
    ]
    tools = tools + pd_tools
    return tools

async def chat_with_llm(query: str, chat_id: str):
    """
    Sends a query to the chat for communicating with the LLM.
    :param query:
    :param chat_id:
    :return:
    """
    # get the chat by its ID from the DB
    chat = chats[chat_id]
    chat_memory = ChatMemoryBuffer(
        token_limit=5000,
        chat_store=chat_store,
        chat_store_key=chat_id,
    )

    # get the files by the chat_id and create filters
    files: List[ChatFile] = chat.chat_files
    filters = create_filters_for_files(files)

    # builds index and get query engines from the filters
    query_engines = get_query_engines_from_filters(filters=filters)

    # check if file is csv/xcsl for Pandas Tool
    pd_tools = get_pandas_tools_from_files(files=files)

    tools_collection = ToolsCollection(pd_tools=pd_tools, query_engines=query_engines, files=files)
    tools = aggregate_tools_from_collection(collection=tools_collection)

    # create an agent to work with
    agent = ReActAgent.from_tools(
        tools=tools,
        llm=Settings.llm,
        memory=chat_memory,
        verbose=True,
        max_iterations=20,
    )
    system_prompt = PromptTemplate(chat.context)
    agent.update_prompts({"agent_worker:system_prompt": system_prompt})
    response = await agent.achat(query=query)
    return response

IndentationError: expected an indented block after function definition on line 153 (2388024663.py, line 156)