In [None]:
!pip install 'markitdown[all]'

In [None]:
from markitdown import MarkItDown

md = MarkItDown(enable_plugins=False) # Set to True to enable plugins
result = md.convert("./data/{REPLACE_WITH_SENSIBLE_DATA.xlsx}")
print(result.text_content)

In [None]:
print(result.text_content)

In [None]:
with open("./data/result.md", "w", encoding="utf-8") as f:
    f.write(result.text_content)

In [28]:
from llama_index.core.tools import QueryEngineTool, ToolMetadata
from llama_index.core.node_parser import SentenceSplitter
from llama_index.core.readers import SimpleDirectoryReader
from llama_index.core.indices import VectorStoreIndex
from llama_index.core.settings import Settings
from llama_index.llms.ollama import Ollama
import nest_asyncio
from llama_index.embeddings.ollama import OllamaEmbedding

from llama_index.core import StorageContext, load_index_from_storage
# load index
import os
from llama_index.core.memory import ChatMemoryBuffer
nest_asyncio.apply()

ollama = Ollama(model="llama3.1", request_timeout=420)
embed_model = OllamaEmbedding(model_name="nomic-embed-text")

Settings.llm = ollama
Settings.embed_model = embed_model
Settings.chunk_size = 1024 * 2

documents = SimpleDirectoryReader(input_files=['./data/result.md']).load_data()

# rebuild storage context

index = None

try:
    if os.path.isdir('./index') is True:
        storage_context = StorageContext.from_defaults(persist_dir="./index")
        index = load_index_from_storage(storage_context=storage_context)
    else:
        transformations = [
            SentenceSplitter(
                chunk_size=1024 * 2,
                chunk_overlap=20,
            ),
        ]
        index = VectorStoreIndex.from_documents(documents, transformations=transformations)
        index.storage_context.persist('./index')
except Exception as e:
    print(e)

query_engine = index.as_query_engine(llm=ollama, similarity_top_k=5)

tools = [
    QueryEngineTool(
      query_engine=query_engine,
        metadata=ToolMetadata(
            name="query_tool",
            description="A tool that is Useful when you want to query through the documents"
        )
    ),
]

from llama_index.core.agent import ReActAgent
from llama_index.core.prompts import PromptTemplate

chat_memory = ChatMemoryBuffer.from_defaults(
    token_limit=3000,
)

prompt = PromptTemplate(
"""
You are a smart assistant designed to analyze complex insurance-related product data from an Excel file. You support the user by understanding data structures, modeling them, and translating them into suitable formats for stakeholders like domain experts, database admins, or frontend developers.

The Excel file includes structured extracts of insurance product configurations. Each contract component (e.g. savings part, risk module, or additional coverage) is modeled as a separate entity. Inside each entity, there are multiple template types representing grouped technical attributes (e.g. benefits, conditions, calculation parameters). These groups are represented as attribute bundles with validity constraints (temporal and/or logical).

Your job is to reason step-by-step through user queries, potentially using tools in a chain of thought manner to:
- Understand the schema
- Identify entities and relationships
- Recommend modeling approaches (relational, NoSQL, frontend structure, etc.)
- Rephrase for different target audiences

## Tools

You have access to the following tools:
{tool_desc}

You can use the tools in any sequence and combination. Break the user's problem down into sub-tasks as needed. If the user uploads a file or asks a question, think aloud first.

## Output Format

Please answer in the same language as the user's input.
Please use a valid JSON format for the Action Input. Do NOT do this {{'input': 'hello world', 'num_beams': 5}}.
Use the following format strictly:

Thought: The current language of the user is: (user's language). I need to (explain what you're doing step by step).
Action: tool name (one of {tool_names}) if using a tool.
Action Input: the input to the tool, in JSON format

After every tool usage, you'll receive:
Observation: tool response

Repeat the thought → action → observation loop until you have enough to respond.

Once ready, answer like this:

Thought: I can answer without using any more tools. I'll use the user's language to answer
Answer: [your answer here – same language as user]

If you cannot answer:
Thought: I cannot answer the question with the provided tools.
Answer: [your answer here – same language as user]


## Current Conversation
Below is the current conversation consisting of interleaving human and assistant messages.
"""
)

agent = ReActAgent.from_llm(tools=tools, llm=ollama, memory=chat_memory, verbose=True, max_iterations=50)
agent.update_prompts({"agent_worker:system_prompt": prompt})
agent.reset()

INFO:llama_index.core.indices.loading:Loading all indices.
Loading all indices.


In [None]:
print(agent.chat('Was ist das für Daten? Kannst du mir etwas so modellieren?'))

In [None]:
print(agent.chat("Ja, aber modelliere es für mich so, dass ich auf Blatt papier machen kann-"
                 ))

In [None]:
print(agent.chat("Zeig mir eine ERD-Skizze dazu oder gib mir Tabellenstruktur-Vorschläge (Name, Spalten, Relationen)."))

In [None]:
print(agent.chat("Kannst du mir eine Vorlage machen, wie ich die Tabellen gestalten kann? Mit Tabellenname und ihre Spalten bitte!"))

In [None]:
print(agent.chat("Kannst du in einfachen Worten zusammenfasse, wie man das einem Senior Software Engineer mit Spring Boot Erfahrung beschreiben kann, damit er dies entwickeln kann?"))

In [None]:
print(agent.chat("Kannst du das dem Senior Developer eklären, wie man das umsetzt und entwickelt? Mit Java Spring Boot"))