In [1]:
from llama_index.core import (
    VectorStoreIndex,
    SimpleDirectoryReader,
    Settings,
    ServiceContext,
    StorageContext,
    load_index_from_storage,
)
from llama_index.core.embeddings.utils import resolve_embed_model
from llama_index.llms.ollama import Ollama
from llama_index.core import VectorStoreIndex, get_response_synthesizer
from llama_index.core.retrievers import VectorIndexRetriever
from llama_index.core.query_engine import RetrieverQueryEngine
from llama_index.core.postprocessor import SimilarityPostprocessor
from llama_index.core import Settings
import json
import pandas as pd
import random
from llama_cpp import Llama

In [2]:
# documents = SimpleDirectoryReader("data/docs/methods/").load_data()

# embed_model = resolve_embed_model("local:BAAI/bge-small-en-v1.5")

# service_context = ServiceContext.from_defaults(embed_model=embed_model, llm=None)

# index = VectorStoreIndex.from_documents(
#     documents, show_progress=True, service_context=service_context
# )

# index.storage_context.persist(persist_dir="data/persist_dir")

In [3]:
# llm = Ollama(model="mistral", request_timeout=180.0) # , base_url="http://62.80.172.138:11434"
llm = Llama('models/mistral-7b-instruct-v0.2.Q5_K_M.gguf', n_ctx=2048, verbose=True, n_gpu_layers=-1) # mistral-7b-instruct-v0.2.Q4_0.gguf mistral-7b-instruct-v0.2.Q4_K_M.gguf

ggml_init_cublas: GGML_CUDA_FORCE_MMQ:   no
ggml_init_cublas: CUDA_USE_TENSOR_CORES: yes
ggml_init_cublas: found 1 CUDA devices:
  Device 0: NVIDIA GeForce RTX 2080, compute capability 7.5, VMM: yes
llama_model_loader: loaded meta data with 24 key-value pairs and 291 tensors from models/mistral-7b-instruct-v0.2.Q5_K_M.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = mistralai_mistral-7b-instruct-v0.2
llama_model_loader: - kv   2:                       llama.context_length u32              = 32768
llama_model_loader: - kv   3:                     llama.embedding_length u32              = 4096
llama_model_loader: - kv   4:                          llama.block_count u32              = 32
llama_model_loader: - kv   5:      

In [4]:
llm.create_chat_completion(messages=[{'role': 'user', 'content': 'Why is sky blue?'}], max_tokens=100)


llama_print_timings:        load time =     124.81 ms
llama_print_timings:      sample time =      38.93 ms /   100 runs   (    0.39 ms per token,  2568.78 tokens per second)
llama_print_timings: prompt eval time =     124.69 ms /    14 tokens (    8.91 ms per token,   112.28 tokens per second)
llama_print_timings:        eval time =    1505.38 ms /    99 runs   (   15.21 ms per token,    65.76 tokens per second)
llama_print_timings:       total time =    1837.02 ms /   113 tokens


{'id': 'chatcmpl-b3c5b341-cff1-44ea-9529-6843334ca973',
 'object': 'chat.completion',
 'created': 1710246469,
 'model': 'models/mistral-7b-instruct-v0.2.Q5_K_M.gguf',
 'choices': [{'index': 0,
   'message': {'role': 'assistant',
    'content': " The color of the sky appears blue due to a process called Rayleigh scattering. When sunlight, which is made up of different colors, enters Earth's atmosphere, it interacts with molecules and particles in the air, such as nitrogen and oxygen. These molecules scatter the sunlight in all directions. Blue light is scattered more easily than other colors because it travels in smaller, shorter waves. As a result, when we look up at the sky, we primarily see the blue light that"},
   'finish_reason': 'length'}],
 'usage': {'prompt_tokens': 14, 'completion_tokens': 100, 'total_tokens': 114}}

In [8]:
!nvidia-smi

Wed Mar  6 16:10:04 2024       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 525.105.17   Driver Version: 525.105.17   CUDA Version: 12.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  NVIDIA GeForce ...  Off  | 00000000:01:00.0 Off |                  N/A |
| 22%   25C    P2    51W / 215W |    918MiB /  8192MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [7]:
Settings.embed_model = resolve_embed_model("local:BAAI/bge-small-en-v1.5")

# rebuild storage context
storage_context = StorageContext.from_defaults(persist_dir="data/persist_dir/")

# load index
index = load_index_from_storage(storage_context, show_progress=True) # , service_context=service_context

In [9]:
retriever = VectorIndexRetriever(
    index=index,
    similarity_top_k=1,
)

In [10]:
json_scheme_prompt = {
    "method": {
        "type": "string"
    },
    "params": {
        "type": "object"
    }
}

example_1_json = {
  "method":"Cover.Open",
  "params":
  {
    "id":2
  }
}

example_2_json = {
  "method":"Cover.GetStatus",
  "params":
  {
    "id":0
  }
}

with open('data/prompts/validation/instruction.md') as f:
  instruction = f.read()

variables = {
    "instruction": instruction,
    "json_scheme": "The output JSON should follow the next scheme: " + json.dumps(json_scheme_prompt),
    "devices": """Cover id=1""",
    "example_1": """Devices: Cover id=2
Methods:
API method 1: Cover.Open
Description: Preconditions: Cover will not accept the command if: An overvoltage error is set at the time of the request. An undervoltage error is set at the time of the request. An overtemp error is set at the time of the request. An engaged safety_switch prohibits movement in the requested direction. Cover calibration is running at the time of the request. Properties: [{'name': 'id', 'type': 'number', 'description': 'The numeric ID of the Cover component instance'}, {'name': 'duration', 'type': 'number', 'description': 'If duration is not provided, Cover will fully open, unless it times out because of maxtime_open first. If duration (seconds) is provided, Cover will move in the open direction for the specified time. duration must be in the range [0.1..maxtime_open]Optional'}] Response: null on success; error if the request can not be executed or failed 
Command: Open the cover.
JSON: """ + json.dumps(example_1_json),
    "example_2": """Devices: Cover id=0
Methods: 
API method 1: Cover.GetStatus
Description: Properties: [{'name': 'id', 'type': 'number', 'description': 'The numeric ID of the Cover component instance'}] Find more about the status properties in status section 
Command: Get cover status.
JSON: """ + json.dumps(example_2_json),
}

In [11]:
base_prompt_template = """
{instruction}
{json_scheme}

{example_1}

{example_2}
"""

user_prompt_template = """Devices: {env}
Methods:
{methods_description}
Command: {user_cmd}
JSON:
"""

base_prompt = base_prompt_template.format(**variables)

print(base_prompt)


You are a helpful AI Assistant that controls the devices in a house. For a given user command create a corresponding JSON object. Don't add properties with null value in output JSON object. Output must be strictly in JSON format.
The output JSON should follow the next scheme: {"method": {"type": "string"}, "params": {"type": "object"}}

Devices: Cover id=2
Methods:
API method 1: Cover.Open
Description: Preconditions: Cover will not accept the command if: An overvoltage error is set at the time of the request. An undervoltage error is set at the time of the request. An overtemp error is set at the time of the request. An engaged safety_switch prohibits movement in the requested direction. Cover calibration is running at the time of the request. Properties: [{'name': 'id', 'type': 'number', 'description': 'The numeric ID of the Cover component instance'}, {'name': 'duration', 'type': 'number', 'description': 'If duration is not provided, Cover will fully open, unless it times out becaus

In [12]:
# import logging
# import sys

# logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)
# logger = logging.getLogger()
# logger.addHandler(logging.StreamHandler(stream=sys.stdout))

In [13]:
# logger.disabled = True

Larger Q4_K_M model generates more complex output with unnecessary parameters. Possible fix: parameters tuning. \
Whereas Q4_0 model tries to escape `_` character with `\` inside object property. Possible fix: grammar.

In [14]:
from llama_cpp import LlamaGrammar

In [15]:
# json_scheme_general = {
#     "type": "object",
#     "properties": {
#         "method": {
#             "type": "string"
#         },
#         "params": {
#             "type": "object",
#             # doesn't work without properties defined.
#             # however, json schema is valid even without 'properties' field
#             # https://stackoverflow.com/questions/42977208/json-schema-without-properties-keyword
#             # "properties": {
#             #     "config": {
#             #         "type": "string"
#             #     }
#             # }
#         }
#     }
# }
# s = str(json_scheme_general).replace("'", '"')
# print(s)
# gr = LlamaGrammar.from_json_schema(s)
# print(gr)

In [16]:
with open('data/grammars/json.gbnf') as f:
    grammar_str = f.read()
print(grammar_str)

root   ::= object
value  ::= object | array | string | number | ("true" | "false" | "null") ws

object ::=
  "{" ws (
            string ":" ws value
    ("," ws string ":" ws value)*
  )? "}" ws

array  ::=
  "[" ws (
            value
    ("," ws value)*
  )? "]" ws

string ::=
  "\"" (
    [^"\\\x7F\x00-\x1F] |
    "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]) # escapes
  )* "\"" ws

number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? ws

# Optional space: by convention, applied in this grammar after literal chars when allowed
# Removed recursive whitespace
ws ::= ([ \t\n])?



In [17]:
grammar = LlamaGrammar.from_string(grammar_str)
print(grammar)

<llama_cpp.llama_grammar.LlamaGrammar object at 0x7f403d2b3b20>


from_string grammar:
root ::= object 
object ::= [{] ws object_11 [}] ws 
value ::= object | array | string | number | value_6 ws 
array ::= [[] ws array_15 []] ws 
string ::= ["] string_18 ["] ws 
number ::= number_19 number_25 number_29 ws 
value_6 ::= [t] [r] [u] [e] | [f] [a] [l] [s] [e] | [n] [u] [l] [l] 
ws ::= ws_31 
object_8 ::= string [:] ws value object_10 
object_9 ::= [,] ws string [:] ws value 
object_10 ::= object_9 object_10 | 
object_11 ::= object_8 | 
array_12 ::= value array_14 
array_13 ::= [,] ws value 
array_14 ::= array_13 array_14 | 
array_15 ::= array_12 | 
string_16 ::= [^"\<U+0000>-<U+001F>] | [\] string_17 
string_17 ::= ["\/bfnrt] | [u] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] 
string_18 ::= string_16 string_18 | 
number_19 ::= number_20 number_21 
number_20 ::= [-] | 
number_21 ::= [0-9] | [1-9] number_22 
number_22 ::= [0-9] number_22 | 
number_23 ::= [.] number_24 
number_24 ::= [0-9] number_24 | [0-9] 
number_25 ::= number_23 | 
number_26 ::= [eE

In [18]:
limit_rows = -1 # 20
selected_devices = ['Smoke', 'Humidity']

df = pd.read_csv('data/datasets/dataset_v1.csv')
devices = list(df['device'].unique())
output_df = pd.DataFrame(columns=['device', 'user_cmd', 'mtd', 'json_cmd'])
if selected_devices:
    df = df[df['device'].isin(selected_devices)].sort_index()
    # print(df)

for i, row in df.iterrows():
    # if i < 8 or i > 16:
    #     continue
    user_cmd = row['user_cmd']

    device = row['device']
    sample_devices = devices.copy()
    sample_devices.remove(device)
    sample_devices = random.sample(sample_devices, k=2)
    env = f'{sample_devices[0]} id=1, {sample_devices[1]} id=2, {device} id=444'

    retrieved_nodes = retriever.retrieve(user_cmd)
    methods_description = f'API method 1:\n{retrieved_nodes[0].text}'
    method = retrieved_nodes[0].metadata['file_name'].replace('.md', '')

    user_prompt = user_prompt_template.format(**{'env': env, 
                                                 'methods_description': methods_description, 
                                                 'user_cmd': user_cmd})
    query = base_prompt + '\n\n' + user_prompt
    response = llm.create_chat_completion(
        messages=[
            {'role': 'user', 'content': query}
        ],
        grammar=grammar
        # response_format={"type": "json_object",
        #                 "schema": json_scheme_general} # 
    )
    response_text = response['choices'][0]['message']['content']
    response_text = response_text.replace('\_', '_')
    json_cmd = json.dumps(json.loads(response_text))

    output_df.loc[len(output_df)] = pd.Series({'device': row['device'], 'user_cmd': user_cmd, 'mtd': method, 'json_cmd': json_cmd})

    # print(user_cmd)
    # print(response_text)

    if limit_rows > 0 and i == limit_rows - 1:
        break

output_df.to_csv('output/output.csv', index=False, header=False, mode='a')

In [18]:
output_df.head()

Unnamed: 0,device,user_cmd,mtd,json_cmd
0,Switch,Set the dining room switch to flip mode.,Switch.SetConfig,"{""method"": ""Switch.SetConfig"", ""params"": {""id""..."
1,Switch,Set the living room switch to cycle mode.,Switch.SetConfig,"{""method"": ""Switch.SetConfig"", ""params"": {""id""..."
2,Switch,Enable Automatic OFF for the kitchen switch wi...,Switch.GetConfig,"{""method"": ""Switch.SetConfig"", ""params"": {""id""..."
3,Switch,Set the bedroom switch name to 'Bed Lamp'.,Light.SetConfig,"{""method"": ""Switch.SetConfig"", ""params"": {""id""..."
4,Switch,Set the hallway switch power limit to 50 Watts.,Switch.SetConfig,"{""method"": ""Switch.SetConfig"", ""params"": {""id""..."


In [21]:
import ast

df = pd.read_csv('data/datasets/dataset_v1.csv')
output_df = pd.read_csv('output/output.csv')
incorrect_output_df = pd.DataFrame(columns=['device', 'user_cmd', 'gt_mtd', 'output_mtd', 'gt_json_cmd', 'output_json_cmd'])
correct_output_df = pd.DataFrame(columns=['device', 'user_cmd', 'gt_mtd', 'output_mtd', 'gt_json_cmd', 'output_json_cmd'])

correct_methods = 0
correct_json_cmds = 0
for i in range(len(output_df)):
    gt = df.iloc[i]
    output = output_df.iloc[i]

    correct_ouput = True
    if gt['mtd'] == output['mtd']:
        correct_methods += 1
    else:
        correct_ouput = False
    try:
        if ast.literal_eval(gt['json_cmd'].replace("'", '"')) == json.loads(output['json_cmd']):
            correct_json_cmds += 1
        else:
            correct_ouput = False
    except Exception as ex:
        print(ex)
        print(gt['json_cmd'].replace("'", '"'))
        print(i, output['json_cmd'])

    if not correct_ouput:
        incorrect_output_df.loc[len(incorrect_output_df)] = pd.Series({'device': gt['device'], 'user_cmd': gt['user_cmd'], 'gt_mtd': gt['mtd'],
                                                              'output_mtd': output['mtd'], 'gt_json_cmd': gt['json_cmd'], 'output_json_cmd': output['json_cmd']})
    else:
        correct_output_df.loc[len(correct_output_df)] = pd.Series({'device': gt['device'], 'user_cmd': gt['user_cmd'], 'gt_mtd': gt['mtd'],
                                                              'output_mtd': output['mtd'], 'gt_json_cmd': gt['json_cmd'], 'output_json_cmd': output['json_cmd']})
acc_methods = round(correct_methods / len(output_df), 2)
acc_json_cmds = round(correct_json_cmds / len(output_df), 2)

incorrect_output_df.to_csv('output/incorrect_output.csv', index=False)
correct_output_df.to_csv('output/correct_output.csv', index=False)

with open('output/results.txt', 'a') as f:
    f.write(f'Acc of methods: {acc_methods}\n'
            f'Acc of json cmds: {acc_json_cmds}\n\n'
            f'-----------------------------------\n\n')