In [11]:
from llama_index.core import (
    VectorStoreIndex,
    SimpleDirectoryReader,
    Settings,
    ServiceContext,
    StorageContext,
    load_index_from_storage,
)
from llama_index.core.embeddings.utils import resolve_embed_model
from llama_index.llms.ollama import Ollama
from llama_index.core import VectorStoreIndex, get_response_synthesizer
from llama_index.core.retrievers import VectorIndexRetriever
from llama_index.core.query_engine import RetrieverQueryEngine
from llama_index.core.postprocessor import SimilarityPostprocessor
from llama_index.core import Settings
from llama_index.core.node_parser.text.sentence import SentenceSplitter

from transformers import AutoTokenizer

import json
import pandas as pd
import random

from llama_cpp import Llama
from llama_cpp import LlamaGrammar

from pathlib import Path

In [12]:
GENERATE_METHODS_DIR = Path('data/docs/manual')
METHODS_DIR = Path('data/docs/methods')
PROMPT_SEEDS_DIR = Path('data/prompts/generation/components')
PROMPT_COMPONENTS_DIR = Path('data/prompts/generation/components')
VAL_PROMPT_COMPONENTS_DIR = Path('data/prompts/validation/components')
GEN_PROMPTS_DIR = Path('data/prompts/generation/output')
VAL_PROMPTS_DIR = Path('data/prompts/validation/output')
PERSIST_DIR = Path("data/persist_dir")
OUTPUT_DIR = Path("output/")

# index generation & loading

In [13]:
# default tokenizer is for gpt-3.5
# llama_index.core.global_tokenizer

In [14]:
# documents = SimpleDirectoryReader(METHODS_DIR).load_data()

# embed_tokenizer = AutoTokenizer.from_pretrained(
#     "BAAI/bge-base-en-v1.5"
# )
# Settings.text_splitter = SentenceSplitter(chunk_size=678, tokenizer=embed_tokenizer)

# Settings.embed_model = resolve_embed_model("local:BAAI/bge-base-en-v1.5")

# index = VectorStoreIndex.from_documents(
#     documents, show_progress=True
# )

# index.storage_context.persist(persist_dir=PERSIST_DIR)

In [15]:
Settings.embed_model = resolve_embed_model("local:BAAI/bge-base-en-v1.5")

# rebuild storage context
storage_context = StorageContext.from_defaults(persist_dir=PERSIST_DIR)

# load index
index = load_index_from_storage(storage_context, show_progress=True)

In [16]:
retriever = VectorIndexRetriever(
    index=index,
    similarity_top_k=1,
)

# check index

In [17]:
# Assuming you have an instance of TreeIndex named tree_index
ref_doc_info = index.ref_doc_info

# Now you can iterate over the ref_doc_info to view each node's details
with open('temp/index.txt', 'w') as f:
    for node_id, node_info in ref_doc_info.items():
        f.write(f"Node ID: {node_id}\n")
        f.write(f"Node Info: {node_info}\n")

In [18]:
# r = retriever.retrieve('Get status of a cover.')
# print(r)

# llm loading

In [20]:
llm = Llama('models/mistral-7b-instruct-v0.2.Q6_K.gguf', n_ctx=3000, verbose=True, n_gpu_layers=-1) # mistral-7b-instruct-v0.2.Q4_0.gguf mistral-7b-instruct-v0.2.Q4_K_M.gguf

ggml_init_cublas: GGML_CUDA_FORCE_MMQ:   no
ggml_init_cublas: CUDA_USE_TENSOR_CORES: yes
ggml_init_cublas: found 1 CUDA devices:
  Device 0: NVIDIA GeForce RTX 2080, compute capability 7.5, VMM: yes
llama_model_loader: loaded meta data with 24 key-value pairs and 291 tensors from models/mistral-7b-instruct-v0.2.Q6_K.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = mistralai_mistral-7b-instruct-v0.2
llama_model_loader: - kv   2:                       llama.context_length u32              = 32768
llama_model_loader: - kv   3:                     llama.embedding_length u32              = 4096
llama_model_loader: - kv   4:                          llama.block_count u32              = 32
llama_model_loader: - kv   5:        

In [21]:
prompt = """
Command: Please, close the living room blinds slowly for 20 seconds.

What is the function user want to call in smart home?
"""
llm.create_chat_completion(messages=[{'role': 'user', 'content': prompt}], max_tokens=100)


llama_print_timings:        load time =     106.52 ms
llama_print_timings:      sample time =      37.16 ms /   100 runs   (    0.37 ms per token,  2691.21 tokens per second)
llama_print_timings: prompt eval time =     106.00 ms /    43 tokens (    2.47 ms per token,   405.68 tokens per second)
llama_print_timings:        eval time =    1698.39 ms /    99 runs   (   17.16 ms per token,    58.29 tokens per second)
llama_print_timings:       total time =    2013.33 ms /   142 tokens


{'id': 'chatcmpl-f786b778-41ef-48f4-8f05-57fb3ed3ef4a',
 'object': 'chat.completion',
 'created': 1710750205,
 'model': 'models/mistral-7b-instruct-v0.2.Q6_K.gguf',
 'choices': [{'index': 0,
   'message': {'role': 'assistant',
    'content': ' The user wants to control the living room blinds to close them gradually over a period of 20 seconds. In a smart home system, this function can typically be achieved by sending a command to the blinds controller or automation hub to close the blinds and specifying a slow speed or duration setting. This could be implemented using various protocols such as Z-Wave, Zigbee, Wi-Fi, or Bluetooth, depending on the specific smart home system being used.'},
   'finish_reason': 'length'}],
 'usage': {'prompt_tokens': 43, 'completion_tokens': 100, 'total_tokens': 143}}

In [22]:
!nvidia-smi

Mon Mar 18 10:23:29 2024       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 525.105.17   Driver Version: 525.105.17   CUDA Version: 12.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  NVIDIA GeForce ...  Off  | 00000000:01:00.0 Off |                  N/A |
| 22%   26C    P2    51W / 215W |   7670MiB /  8192MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

# prompt preparation

In [23]:
json_scheme_prompt = {
    "method": {
        "type": "string"
    },
    "params": {
        "type": "object"
    }
}

example_1_json = {
  "method":"Cover.Open",
  "params":
  {
    "id":2
  }
}

example_2_json = {
  "method":"Cover.Close",
  "params":
  {
    "id":0,
    "duration":5,
  }
}

with open(VAL_PROMPT_COMPONENTS_DIR / 'instruction.md') as f:
  instruction = f.read()

variables = {
    "instruction": instruction,
    "json_scheme": "The output JSON should follow the next scheme: " + json.dumps(json_scheme_prompt),
    "devices": """Cover id=1""",
    "example_1": """Devices: Cover id=2
Methods:
API method 1:
Method name: Cover.Open
Method description: Preconditions:
Cover will not accept the command if:
An  overvoltage  error is set at the time of the request.
An  undervoltage  error is set at the time of the request.
An  overtemp  error is set at the time of the request.
An engaged  safety_switch  prohibits movement in the requested direction.
Cover  calibration is running at the time of the request
Properties:
{"id": {"type": "number", "description": "The numeric ID of the Cover component instance"}, "duration": {"type": "number", "description": "If duration is not provided, Cover will fully open, unless it times out because of maxtime_open first. If duration (seconds) is provided, Cover will move in the open direction for the specified time. duration must be in the range [0.1..maxtime_open]Optional"}}
Response:
null on success; error if the request can not be executed or failed

Command: Open the cover.
JSON: """ + json.dumps(example_1_json),

    "example_2": """Devices: Cover id=0
Methods: 
API method 1:
Method name: Cover.Close
Method description: Preconditions:
Cover will not accept the command if:
An  overvoltage  error is set at the time of the request.
An  undervoltage  error is set at the time of the request.
An  overtemp  error is set at the time of the request.
An engaged  safety_switch  prohibits movement in the requested direction.
Cover  calibration is running at the time of the request
Properties:
{"id": {"type": "number", "description": "The numeric ID of the Cover component instance"}, "duration": {"type": "number", "description": "If duration is not provided, Cover will fully close, unless it times out because of maxtime_close first. If duration (seconds) is provided, Cover will move in the close direction for the specified time. duration must be in the range [0.1..maxtime_open]Optional"}}
Response:
null on success; error if the request can not be executed or failed

Command: Close the kitchen cover quickly (for 5 seconds).
JSON: """ + json.dumps(example_2_json),
}

In [24]:
base_prompt_template = """
{instruction}
{json_scheme}

{example_1}

{example_2}
"""

user_prompt_template = """Devices: {env}
Methods:
{methods_description}
Command: {user_cmd}
JSON:
"""

base_prompt = base_prompt_template.format(**variables)

print(base_prompt)


You are a helpful AI Assistant that controls the devices in a house. For a given user command create a corresponding JSON object. Don't add properties with null value in output JSON object. Output must be strictly in JSON format.
The output JSON should follow the next scheme: {"method": {"type": "string"}, "params": {"type": "object"}}

Devices: Cover id=2
Methods:
API method 1:
Method name: Cover.Open
Method description: Preconditions:
Cover will not accept the command if:
An  overvoltage  error is set at the time of the request.
An  undervoltage  error is set at the time of the request.
An  overtemp  error is set at the time of the request.
An engaged  safety_switch  prohibits movement in the requested direction.
Cover  calibration is running at the time of the request
Properties:
{"id": {"type": "number", "description": "The numeric ID of the Cover component instance"}, "duration": {"type": "number", "description": "If duration is not provided, Cover will fully open, unless it time

# logging

In [25]:
# import logging
# import sys

# logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)
# logger = logging.getLogger()
# logger.addHandler(logging.StreamHandler(stream=sys.stdout))

In [26]:
# logger.disabled = True

# JSON generation

In [27]:
with open('data/grammars/json.gbnf') as f:
    grammar_str = f.read()
llama_grammar = LlamaGrammar.from_string(grammar_str)
print(llama_grammar)

<llama_cpp.llama_grammar.LlamaGrammar object at 0x7f8ad49679a0>


from_string grammar:
root ::= object 
object ::= [{] ws object_11 [}] ws 
value ::= object | array | string | number | value_6 ws 
array ::= [[] ws array_15 []] ws 
string ::= ["] string_18 ["] ws 
number ::= number_19 number_25 number_29 ws 
value_6 ::= [t] [r] [u] [e] | [f] [a] [l] [s] [e] | [n] [u] [l] [l] 
ws ::= ws_31 
object_8 ::= string [:] ws value object_10 
object_9 ::= [,] ws string [:] ws value 
object_10 ::= object_9 object_10 | 
object_11 ::= object_8 | 
array_12 ::= value array_14 
array_13 ::= [,] ws value 
array_14 ::= array_13 array_14 | 
array_15 ::= array_12 | 
string_16 ::= [^"\<U+0000>-<U+001F>] | [\] string_17 
string_17 ::= ["\/bfnrt] | [u] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] 
string_18 ::= string_16 string_18 | 
number_19 ::= number_20 number_21 
number_20 ::= [-] | 
number_21 ::= [0-9] | [1-9] number_22 
number_22 ::= [0-9] number_22 | 
number_23 ::= [.] number_24 
number_24 ::= [0-9] number_24 | [0-9] 
number_25 ::= number_23 | 
number_26 ::= [eE

In [28]:
limit_rows = -1 # 20
selected_devices = None # ['Smoke', 'Humidity']

df = pd.read_csv('data/datasets/dataset_v2.csv')
devices = list(df['device'].unique())
if selected_devices:
    df = df[df['device'].isin(selected_devices)].sort_index()
    # print(df)

output_df = pd.DataFrame(columns=['id', 'device', 'user_cmd', 'mtd', 'json_cmd'])
for i, row in df.iterrows():
    # if i < 100: # or i > 16:
    #     continue
    user_cmd = row['user_cmd']

    device = row['device']
    sample_devices = devices.copy()
    sample_devices.remove(device)
    sample_devices = random.sample(sample_devices, k=2)
    env = f'{sample_devices[0]} id=1, {sample_devices[1]} id=2, {device} id=444'

    retrieval_prompt = "Represent this sentence for searching relevant passages: " + user_cmd
    retrieved_nodes = retriever.retrieve(retrieval_prompt)
    methods_description = f'API method 1:\n{retrieved_nodes[0].text}'
    method_name = retrieved_nodes[0].metadata['file_name'].replace('.md', '')

    user_prompt = user_prompt_template.format(**{'env': env, 
                                                 'methods_description': methods_description, 
                                                 'user_cmd': user_cmd})
    llm_prompt = base_prompt + '\n\n' + user_prompt

    response = llm.create_chat_completion(
        messages=[
            {'role': 'user', 'content': llm_prompt}
        ],
        grammar=llama_grammar
    )
    response_text = response['choices'][0]['message']['content']
    
    response_text = response_text.replace('\_', '_')
    try:
        json_cmd = json.dumps(json.loads(response_text))
    except Exception as ex:
        print(response_text)
        print(ex)
        continue

    output_df.loc[len(output_df)] = pd.Series({'id': row['id'], 'device': row['device'], 'user_cmd': user_cmd, 'mtd': method_name, 'json_cmd': json_cmd})

    # print(user_cmd)
    # print(response_text)

    if limit_rows > 0 and i == limit_rows - 1:
        break

output_num = list(sorted([int(d.name.replace('output', '')) for d in list(OUTPUT_DIR.iterdir())]))[-1] + 1
CUR_OUTPUT_DIR = OUTPUT_DIR / f'output{output_num}'
CUR_OUTPUT_DIR.mkdir(exist_ok=True, parents=True)
output_df.to_csv(CUR_OUTPUT_DIR / 'output.csv', index=False, header=True, mode='w')

Llama.generate: prefix-match hit

llama_print_timings:        load time =     106.52 ms
llama_print_timings:      sample time =     231.05 ms /    34 runs   (    6.80 ms per token,   147.15 tokens per second)
llama_print_timings: prompt eval time =     483.96 ms /   857 tokens (    0.56 ms per token,  1770.82 tokens per second)
llama_print_timings:        eval time =     605.48 ms /    33 runs   (   18.35 ms per token,    54.50 tokens per second)
llama_print_timings:       total time =    1412.47 ms /   890 tokens
Llama.generate: prefix-match hit

llama_print_timings:        load time =     106.52 ms
llama_print_timings:      sample time =     230.47 ms /    34 runs   (    6.78 ms per token,   147.52 tokens per second)
llama_print_timings: prompt eval time =     163.45 ms /   215 tokens (    0.76 ms per token,  1315.40 tokens per second)
llama_print_timings:        eval time =     603.69 ms /    33 runs   (   18.29 ms per token,    54.66 tokens per second)
llama_print_timings:       to

In [29]:
output_df.tail()

Unnamed: 0,id,device,user_cmd,mtd,json_cmd
192,192,Temperature,Could you lower the temperature to 18 degrees ...,Temperature.SetTemperature,"{""method"": ""Temperature.SetTemperature"", ""para..."
193,193,Temperature,Set the temperature to 20 degrees in the bedroom.,Temperature.SetTemperature,"{""method"": ""Temperature.SetTemperature"", ""para..."
194,194,Humidity,Can you increase the humidity in the living ro...,Humidity.SetHumidity,"{""method"": ""Humidity.SetHumidity"", ""params"": {..."
195,195,Humidity,Set the humidity to 50% in the bedroom.,Humidity.SetHumidity,"{""method"": ""Humidity.SetHumidity"", ""params"": {..."
196,196,Humidity,Decrease the humidity in the kitchen to 40 per...,Humidity.SetHumidity,"{""method"": ""Humidity.SetHumidity"", ""params"": {..."


# evaluation

In [38]:
df = pd.read_csv('data/datasets/dataset_v2.csv')
output_df = pd.read_csv(CUR_OUTPUT_DIR / 'output.csv')
merged_df = df.merge(output_df, how='inner', on='id', suffixes=("_gt", "_pred"))

incorrect_output_df = pd.DataFrame(columns=['device', 'user_cmd', 'gt_mtd', 'pred_mtd', 'gt_json_cmd', 'pred_json_cmd'])
correct_output_df = pd.DataFrame(columns=['device', 'user_cmd', 'gt_mtd', 'pred_mtd', 'gt_json_cmd', 'pred_json_cmd'])
correct_methods = 0
correct_json_cmds = 0
for _, row in merged_df.iterrows():
    correct_ouput = True
    if row['mtd_gt'] == row['mtd_pred']:
        correct_methods += 1
    else:
        correct_ouput = False
    try:
        if json.loads(row['json_cmd_gt']) == json.loads(row['json_cmd_pred']):
            correct_json_cmds += 1
        else:
            correct_ouput = False
    except Exception as ex:
        print(ex)
        print(row['id'])
        print(row['json_cmd_gt'])
        print(row['json_cmd_pred'])

    if not correct_ouput:
        incorrect_output_df.loc[len(incorrect_output_df)] = pd.Series({'device': row['device_gt'],
        'user_cmd': row['user_cmd_gt'], 'gt_mtd': row['mtd_gt'],
        'pred_mtd': row['mtd_pred'], 'gt_json_cmd': row['json_cmd_gt'], 'pred_json_cmd': row['json_cmd_pred']})
    else:
        correct_output_df.loc[len(correct_output_df)] = pd.Series({'device': row['device_gt'],
        'user_cmd': row['user_cmd_gt'], 'gt_mtd': row['mtd_gt'],
        'pred_mtd': row['mtd_pred'], 'gt_json_cmd': row['json_cmd_gt'], 'pred_json_cmd': row['json_cmd_pred']})
acc_methods = round(correct_methods / len(merged_df), 2)
acc_json_cmds = round(correct_json_cmds / len(merged_df), 2)

incorrect_output_df.to_csv(CUR_OUTPUT_DIR / 'incorrect_output.csv', index=False)
correct_output_df.to_csv(CUR_OUTPUT_DIR / 'correct_output.csv', index=False)

with open(CUR_OUTPUT_DIR / 'results.txt', 'a') as f:
    f.write(f'Acc of methods: {acc_methods}\n'
            f'Acc of json cmds: {acc_json_cmds}\n\n'
            f'-----------------------------------\n\n')

print(len(df))
print(len(merged_df))

197
197
