In [1]:
from llama_index.core import (
    VectorStoreIndex,
    SimpleDirectoryReader,
    Settings,
    ServiceContext,
    StorageContext,
    load_index_from_storage,
)
from llama_index.core.embeddings.utils import resolve_embed_model
from llama_index.llms.ollama import Ollama

In [2]:
import json

In [3]:
!nvidia-smi

Sun Mar  3 14:28:07 2024       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 525.105.17   Driver Version: 525.105.17   CUDA Version: 12.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  NVIDIA GeForce ...  Off  | 00000000:01:00.0 Off |                  N/A |
| 26%   34C    P8    13W / 215W |     15MiB /  8192MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [4]:
# %conda install -c conda-forge ipywidgets

In [5]:
# documents = SimpleDirectoryReader("data/docs").load_data()

# embed_model = resolve_embed_model("local:BAAI/bge-small-en-v1.5")

# service_context = ServiceContext.from_defaults(embed_model=embed_model, llm=None)

# index = VectorStoreIndex.from_documents(
#     documents, show_progress=True, service_context=service_context
# )

In [6]:
# index.storage_context.persist(persist_dir="data/persist_dir")

memory consumption when loading from storage: 672MiB \
memory consumption when loading in-memory: 5080MiB

In [7]:
from llama_index.core import Settings

In [8]:
Settings.llm = Ollama(model="mistral", request_timeout=180.0)
Settings.embed_model = resolve_embed_model("local:BAAI/bge-small-en-v1.5")

In [9]:
# embed_model = resolve_embed_model("local:BAAI/bge-small-en-v1.5")

# llm = Ollama(model="mistral", request_timeout=180.0)

# service_context = ServiceContext.from_defaults(embed_model=embed_model, llm=llm)

# rebuild storage context
storage_context = StorageContext.from_defaults(persist_dir="data/persist_dir")

# load index
index = load_index_from_storage(storage_context, show_progress=True) # , service_context=service_context

In [10]:
from llama_index.core import VectorStoreIndex, get_response_synthesizer
from llama_index.core.retrievers import VectorIndexRetriever
from llama_index.core.query_engine import RetrieverQueryEngine
from llama_index.core.postprocessor import SimilarityPostprocessor

In [32]:
retriever = VectorIndexRetriever(
    index=index,
    similarity_top_k=1,
)

In [33]:
nodes = retriever.retrieve("Get temperature in the room")

DEBUG:llama_index.core.indices.utils:> Top 1 nodes:
> [Node 1923e2fe-6356-4826-96cc-2b9946ca1ac3] [Similarity score:             0.730295] Method name: Temperature.GetConfig
Method description: Properties:
{"id": {"type": "number", "des...
> Top 1 nodes:
> [Node 1923e2fe-6356-4826-96cc-2b9946ca1ac3] [Similarity score:             0.730295] Method name: Temperature.GetConfig
Method description: Properties:
{"id": {"type": "number", "des...


In [34]:
for n in nodes:
    print(n.get_content())

Method name: Temperature.GetConfig
Method description: Properties:
{"id": {"type": "number", "description": "Id of the Temperature component instance"}}
Find the Temperature.GetConfig response properties in config section


In [14]:
def escape_curly_braces(input_string: str) -> str:
    # Replace '{' with '{{' and '}' with '}}' to escape curly braces
    escaped_string = input_string.replace("{", "{{").replace("}", "}}")
    return escaped_string

In [15]:
user_prompts = ["""Devices: Cover id=1
Functions: {context_str}
Command: {query_str}
JSON:
"""]

In [16]:
# user_prompt_long = """Sentence: Set the kitchen temperature to 25 degrees, adjust the humidity in the living room to 50%,
# turn on the lights in the bedroom, and turn off the fan in the bathroom.
# JSON: """

# user_prompt_very_long = """Sentence: Turn on the lights in the kitchen, set the living room temperature to 22 degrees,
# close the curtains in the bedroom, set the bathroom fan to auto mode, turn off the desk lamp in the office,
# close the garage door, water the garden manually, dim the lights on the bookshelves in the library,
# turn on the air conditioning in the gym, and put the computer in the study to sleep.
# JSON: """

json_scheme_general = {
    "method": {
        "type": "string"
    },
    "params": {
        "type": "object"
    }
}

example_1_json = {
  "method":"Cover.Open",
  "params":
  {
    "id":2
  }
}

example_2_json = {
  "method":"Cover.GetStatus",
  "params":
  {
    "id":0
  }
}

variables = {
    "instruction": """You are 'Al', a helpful AI Assistant that controls the devices in a house. Create a JSON object representing given command. Strictly output in JSON format.""",
    "json_scheme": "The output JSON should follow the next scheme: " + json.dumps(json_scheme_general),
    "devices": """Cover id=1""",
    "user_prompt": user_prompts[0],
    "example_1": """Devices: Cover id=2
Functions: Name: Cover.Open
Description: Preconditions: Cover will not accept the command if: An overvoltage error is set at the time of the request. An undervoltage error is set at the time of the request. An overtemp error is set at the time of the request. An engaged safety_switch prohibits movement in the requested direction. Cover calibration is running at the time of the request. Properties: [{'name': 'id', 'type': 'number', 'description': 'The numeric ID of the Cover component instance'}, {'name': 'duration', 'type': 'number', 'description': 'If duration is not provided, Cover will fully open, unless it times out because of maxtime_open first. If duration (seconds) is provided, Cover will move in the open direction for the specified time. duration must be in the range [0.1..maxtime_open]Optional'}] Response: null on success; error if the request can not be executed or failed 
Command: Open the cover.
JSON: """ + json.dumps(example_1_json),
    "example_2": """Devices: Cover id=0
Functions: Name: Cover.GetStatus
Description: Properties: [{'name': 'id', 'type': 'number', 'description': 'The numeric ID of the Cover component instance'}] Find more about the status properties in status section 
Command: Get cover status.
JSON: """ + json.dumps(example_2_json),
#     "example_3": """
# Sentence: Please set the humidity level in the bedroom to 60 percents, turn on the fan in the bathroom.
# JSON: {
#   "locations": [
#     {
#       "name": "bedroom",
#       "humidity": 60
#     },
#     {
#       "name": "bathroom",
#       "fan": true
#     }
#   ]
# }
# """
}

In [17]:
prompt_template = """
{instruction}
{json_scheme}

{example_1}

{example_2}
"""

final_prompt = escape_curly_braces(prompt_template.format(**variables)) + '\n\n' + variables['user_prompt']

# final_prompt = variables['user_prompt'] + '{cont}'

print(final_prompt)


You are 'Al', a helpful AI Assistant that controls the devices in a house. Create a JSON object representing given command. Strictly output in JSON format.
The output JSON should follow the next scheme: {{"method": {{"type": "string"}}, "params": {{"type": "object"}}}}

Devices: Cover id=2
Functions: Name: Cover.Open
Description: Preconditions: Cover will not accept the command if: An overvoltage error is set at the time of the request. An undervoltage error is set at the time of the request. An overtemp error is set at the time of the request. An engaged safety_switch prohibits movement in the requested direction. Cover calibration is running at the time of the request. Properties: [{{'name': 'id', 'type': 'number', 'description': 'The numeric ID of the Cover component instance'}}, {{'name': 'duration', 'type': 'number', 'description': 'If duration is not provided, Cover will fully open, unless it times out because of maxtime_open first. If duration (seconds) is provided, Cover will 

In [18]:
from llama_index.core import PromptTemplate

qa_prompt = PromptTemplate(final_prompt)

In [35]:
# configure response synthesizer
response_synthesizer = get_response_synthesizer(text_qa_template=qa_prompt) # service_context=service_context

# assemble query engine
query_engine = RetrieverQueryEngine(
    retriever=retriever,
    response_synthesizer=response_synthesizer,
    # node_postprocessors=[SimilarityPostprocessor(similarity_cutoff=0.7)],
)

In [20]:
import logging
import sys

logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)
logger = logging.getLogger()
logger.addHandler(logging.StreamHandler(stream=sys.stdout))

In [21]:
# logger.disabled = True

In [22]:
from llama_index.core import (
    VectorStoreIndex,
    SimpleDirectoryReader,
    load_index_from_storage,
    StorageContext,
)
from IPython.display import Markdown, display

In [23]:
# define prompt viewing function
def display_prompt_dict(prompts_dict):
    for k, p in prompts_dict.items():
        text_md = f"**Prompt Key**: {k}<br>" f"**Text:** <br>"
        display(Markdown(text_md))
        print(p.get_template())
        display(Markdown("<br><br>"))

In [24]:
prompts_dict = query_engine.get_prompts()
display_prompt_dict(prompts_dict)

**Prompt Key**: response_synthesizer:text_qa_template<br>**Text:** <br>


You are 'Al', a helpful AI Assistant that controls the devices in a house. Create a JSON object representing given command. Strictly output in JSON format.
The output JSON should follow the next scheme: {{"method": {{"type": "string"}}, "params": {{"type": "object"}}}}

Devices: Cover id=2
Functions: Name: Cover.Open
Description: Preconditions: Cover will not accept the command if: An overvoltage error is set at the time of the request. An undervoltage error is set at the time of the request. An overtemp error is set at the time of the request. An engaged safety_switch prohibits movement in the requested direction. Cover calibration is running at the time of the request. Properties: [{{'name': 'id', 'type': 'number', 'description': 'The numeric ID of the Cover component instance'}}, {{'name': 'duration', 'type': 'number', 'description': 'If duration is not provided, Cover will fully open, unless it times out because of maxtime_open first. If duration (seconds) is provided, Cover will 

<br><br>

**Prompt Key**: response_synthesizer:refine_template<br>**Text:** <br>

The original query is as follows: {query_str}
We have provided an existing answer: {existing_answer}
We have the opportunity to refine the existing answer (only if needed) with some more context below.
------------
{context_msg}
------------
Given the new context, refine the original answer to better answer the query. If the context isn't useful, return the original answer.
Refined Answer: 


<br><br>

In [25]:
queries = ["Close the cover.",
           "I wanna cover to be closed.",
           "Close cover during 20 seconds.",
           "Calibrate cover, please."]

In [36]:
response = query_engine.query(queries[0])

DEBUG:llama_index.core.indices.utils:> Top 1 nodes:
> [Node 8e07999e-d056-4a79-be39-8c3e284783a2] [Similarity score:             0.716042] Method name: Cover.Close
Method description: Preconditions:
Cover will not accept the command if:...
> Top 1 nodes:
> [Node 8e07999e-d056-4a79-be39-8c3e284783a2] [Similarity score:             0.716042] Method name: Cover.Close
Method description: Preconditions:
Cover will not accept the command if:...
DEBUG:httpx:load_ssl_context verify=True cert=None trust_env=True http2=False
load_ssl_context verify=True cert=None trust_env=True http2=False
DEBUG:httpx:load_verify_locations cafile='/home/vpo/miniconda3/envs/assistant/lib/python3.10/site-packages/certifi/cacert.pem'
load_verify_locations cafile='/home/vpo/miniconda3/envs/assistant/lib/python3.10/site-packages/certifi/cacert.pem'
DEBUG:httpcore.connection:connect_tcp.started host='localhost' port=11434 local_address=None timeout=180.0 socket_options=None
connect_tcp.started host='localhost' port=11

In [38]:
response.source_nodes[0].metadata['file_name']

[NodeWithScore(node=TextNode(id_='8e07999e-d056-4a79-be39-8c3e284783a2', embedding=None, metadata={'file_path': 'data/docs/Cover.Close.md', 'file_name': 'Cover.Close.md', 'file_type': 'text/markdown', 'file_size': 894, 'creation_date': '2024-02-22', 'last_modified_date': '2024-02-22', 'last_accessed_date': '2024-02-22'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='a36a5ee9-bbed-4cac-bc3b-b00c20ccc068', node_type=<ObjectType.DOCUMENT: '4'>, metadata={'file_path': 'data/docs/Cover.Close.md', 'file_name': 'Cover.Close.md', 'file_type': 'text/markdown', 'file_size': 894, 'creation_date': '2024-02-22', 'last_modified_date': '2024-02-22', 'last_accessed_date': '2024-02-22'}, hash='9b826e739db6c478480d0f511db1

In [26]:
for q in queries:
    response = query_engine.query(q)
    print(response)
    print('---------------------\n')

DEBUG:llama_index.core.indices.utils:> Top 4 nodes:
> [Node 8e07999e-d056-4a79-be39-8c3e284783a2] [Similarity score:             0.716042] Method name: Cover.Close
Method description: Preconditions:
Cover will not accept the command if:...
> [Node 6657c503-fc35-45c9-9efb-065bd453ef2d] [Similarity score:             0.667318] Cover config object description: The configuration of the Cover component contains information ab...
> [Node 49a07e8b-dd3f-4583-969a-a8262b4b2992] [Similarity score:             0.666196] Method name: Cover.Open
Method description: Preconditions:
Cover will not accept the command if:
...
> [Node f86c93bd-5e43-4d15-9ae8-5a202a851691] [Similarity score:             0.650387] Cover status object description: The status of the Cover component contains information about the...
> Top 4 nodes:
> [Node 8e07999e-d056-4a79-be39-8c3e284783a2] [Similarity score:             0.716042] Method name: Cover.Close
Method description: Preconditions:
Cover will not accept the command 

DEBUG:httpcore.http11:receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Content-Type', b'application/json; charset=utf-8'), (b'Date', b'Thu, 22 Feb 2024 08:57:58 GMT'), (b'Content-Length', b'533')])
receive_response_headers.complete return_value=(b'HTTP/1.1', 200, b'OK', [(b'Content-Type', b'application/json; charset=utf-8'), (b'Date', b'Thu, 22 Feb 2024 08:57:58 GMT'), (b'Content-Length', b'533')])
INFO:httpx:HTTP Request: POST http://localhost:11434/api/chat "HTTP/1.1 200 OK"
HTTP Request: POST http://localhost:11434/api/chat "HTTP/1.1 200 OK"
DEBUG:httpcore.http11:receive_response_body.started request=<Request [b'POST']>
receive_response_body.started request=<Request [b'POST']>
DEBUG:httpcore.http11:receive_response_body.complete
receive_response_body.complete
DEBUG:httpcore.http11:response_closed.started
response_closed.started
DEBUG:httpcore.http11:response_closed.complete
response_closed.complete
DEBUG:httpcore.connection:close.started
close.started
DEB