In [23]:
from llama_index.core import (
    VectorStoreIndex,
    SimpleDirectoryReader,
    Settings,
    ServiceContext,
    StorageContext,
    load_index_from_storage,
)
from llama_index.core.embeddings.utils import resolve_embed_model
from llama_index.llms.ollama import Ollama
from llama_index.core import VectorStoreIndex, get_response_synthesizer
from llama_index.core.retrievers import VectorIndexRetriever
from llama_index.core.query_engine import RetrieverQueryEngine
from llama_index.core.postprocessor import SimilarityPostprocessor
from llama_index.core import Settings
import json
import pandas as pd
import random
from llama_cpp import Llama
from llama_cpp import LlamaGrammar

from pathlib import Path

In [10]:
GENERATE_METHODS_DIR = Path('data/docs/manual')
METHODS_DIR = Path('data/docs/methods')
PROMPT_SEEDS_DIR = Path('data/prompts/generation/components')
PROMPT_COMPONENTS_DIR = Path('data/prompts/generation/components')
VAL_PROMPT_COMPONENTS_DIR = Path('data/prompts/validation/components')
GEN_PROMPTS_DIR = Path('data/prompts/generation/output')
VAL_PROMPTS_DIR = Path('data/prompts/validation/output')
PERSIST_DIR = "data/persist_dir"

# index generation

In [3]:
# documents = SimpleDirectoryReader(METHODS_DIR).load_data()

# embed_model = resolve_embed_model("local:BAAI/bge-base-en-v1.5")

# service_context = ServiceContext.from_defaults(embed_model=embed_model, llm=None)

# index = VectorStoreIndex.from_documents(
#     documents, show_progress=True, service_context=service_context
# )

# index.storage_context.persist(persist_dir=PERSIST_DIR)

In [4]:
# llm = Ollama(model="mistral", request_timeout=180.0) # , base_url="http://62.80.172.138:11434"
llm = Llama('models/mistral-7b-instruct-v0.2.Q6_K.gguf', n_ctx=2048, verbose=True, n_gpu_layers=-1) # mistral-7b-instruct-v0.2.Q4_0.gguf mistral-7b-instruct-v0.2.Q4_K_M.gguf

ggml_init_cublas: GGML_CUDA_FORCE_MMQ:   no
ggml_init_cublas: CUDA_USE_TENSOR_CORES: yes
ggml_init_cublas: found 1 CUDA devices:
  Device 0: NVIDIA GeForce RTX 2080, compute capability 7.5, VMM: yes
llama_model_loader: loaded meta data with 24 key-value pairs and 291 tensors from models/mistral-7b-instruct-v0.2.Q6_K.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = mistralai_mistral-7b-instruct-v0.2
llama_model_loader: - kv   2:                       llama.context_length u32              = 32768
llama_model_loader: - kv   3:                     llama.embedding_length u32              = 4096
llama_model_loader: - kv   4:                          llama.block_count u32              = 32
llama_model_loader: - kv   5:        

In [4]:
prompt = """
Why is sky blue?
"""
llm.create_chat_completion(messages=[{'role': 'user', 'content': prompt}], max_tokens=100)


llama_print_timings:        load time =     152.90 ms
llama_print_timings:      sample time =      38.31 ms /   100 runs   (    0.38 ms per token,  2610.49 tokens per second)
llama_print_timings: prompt eval time =     152.83 ms /    17 tokens (    8.99 ms per token,   111.24 tokens per second)
llama_print_timings:        eval time =    1698.72 ms /    99 runs   (   17.16 ms per token,    58.28 tokens per second)
llama_print_timings:       total time =    2060.35 ms /   116 tokens


{'id': 'chatcmpl-ec3d3e90-400d-4f88-907f-b229d53e201a',
 'object': 'chat.completion',
 'created': 1710604169,
 'model': 'models/mistral-7b-instruct-v0.2.Q6_K.gguf',
 'choices': [{'index': 0,
   'message': {'role': 'assistant',
    'content': " The color of the sky appears blue due to a process called Rayleigh scattering. When sunlight, which is made up of different colors, enters the Earth's atmosphere, it interacts with molecules and particles in the air, such as nitrogen and oxygen. Blue light is scattered more easily than other colors because it travels in smaller, shorter waves. This scattered blue light is what we see when we look up at the sky. Additionally, other factors such as the presence of dust, water"},
   'finish_reason': 'length'}],
 'usage': {'prompt_tokens': 17, 'completion_tokens': 100, 'total_tokens': 117}}

In [8]:
!nvidia-smi

Sat Mar 16 17:52:30 2024       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 525.105.17   Driver Version: 525.105.17   CUDA Version: 12.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  NVIDIA GeForce ...  Off  | 00000000:01:00.0 Off |                  N/A |
| 23%   30C    P2    52W / 215W |   7236MiB /  8192MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [6]:
Settings.embed_model = resolve_embed_model("local:BAAI/bge-base-en-v1.5")

# rebuild storage context
storage_context = StorageContext.from_defaults(persist_dir=PERSIST_DIR)

# load index
index = load_index_from_storage(storage_context, show_progress=True)

In [7]:
retriever = VectorIndexRetriever(
    index=index,
    similarity_top_k=1,
)

In [13]:
json_scheme_prompt = {
    "method": {
        "type": "string"
    },
    "params": {
        "type": "object"
    }
}

example_1_json = {
  "method":"Cover.Open",
  "params":
  {
    "id":2
  }
}

example_2_json = {
  "method":"Cover.Close",
  "params":
  {
    "id":0,
    "duration":5,
  }
}

with open(VAL_PROMPT_COMPONENTS_DIR / 'instruction.md') as f:
  instruction = f.read()

variables = {
    "instruction": instruction,
    "json_scheme": "The output JSON should follow the next scheme: " + json.dumps(json_scheme_prompt),
    "devices": """Cover id=1""",
    "example_1": """Devices: Cover id=2
Methods:
API method 1:
Method name: Cover.Open
Method description: Preconditions:
Cover will not accept the command if:
An  overvoltage  error is set at the time of the request.
An  undervoltage  error is set at the time of the request.
An  overtemp  error is set at the time of the request.
An engaged  safety_switch  prohibits movement in the requested direction.
Cover  calibration is running at the time of the request
Properties:
{"id": {"type": "number", "description": "The numeric ID of the Cover component instance"}, "duration": {"type": "number", "description": "If duration is not provided, Cover will fully open, unless it times out because of maxtime_open first. If duration (seconds) is provided, Cover will move in the open direction for the specified time. duration must be in the range [0.1..maxtime_open]Optional"}}
Response:
null on success; error if the request can not be executed or failed

Command: Open the cover.
JSON: """ + json.dumps(example_1_json),

    "example_2": """Devices: Cover id=0
Methods: 
API method 1:
Method name: Cover.Close
Method description: Preconditions:
Cover will not accept the command if:
An  overvoltage  error is set at the time of the request.
An  undervoltage  error is set at the time of the request.
An  overtemp  error is set at the time of the request.
An engaged  safety_switch  prohibits movement in the requested direction.
Cover  calibration is running at the time of the request
Properties:
{"id": {"type": "number", "description": "The numeric ID of the Cover component instance"}, "duration": {"type": "number", "description": "If duration is not provided, Cover will fully close, unless it times out because of maxtime_close first. If duration (seconds) is provided, Cover will move in the close direction for the specified time. duration must be in the range [0.1..maxtime_open]Optional"}}
Response:
null on success; error if the request can not be executed or failed

Command: Close the kitchen cover quickly (for 5 seconds).
JSON: """ + json.dumps(example_2_json),
}

In [14]:
base_prompt_template = """
{instruction}
{json_scheme}

{example_1}

{example_2}
"""

user_prompt_template = """Devices: {env}
Methods:
{methods_description}
Command: {user_cmd}
JSON:
"""

base_prompt = base_prompt_template.format(**variables)

print(base_prompt)


You are a helpful AI Assistant that controls the devices in a house. For a given user command create a corresponding JSON object. Don't add properties with null value in output JSON object. Output must be strictly in JSON format.
The output JSON should follow the next scheme: {"method": {"type": "string"}, "params": {"type": "object"}}

Devices: Cover id=2
Methods:
API method 1:
Method name: Cover.Open
Method description: Preconditions:
Cover will not accept the command if:
An  overvoltage  error is set at the time of the request.
An  undervoltage  error is set at the time of the request.
An  overtemp  error is set at the time of the request.
An engaged  safety_switch  prohibits movement in the requested direction.
Cover  calibration is running at the time of the request
Properties:
{"id": {"type": "number", "description": "The numeric ID of the Cover component instance"}, "duration": {"type": "number", "description": "If duration is not provided, Cover will fully open, unless it time

In [15]:
# import logging
# import sys

# logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)
# logger = logging.getLogger()
# logger.addHandler(logging.StreamHandler(stream=sys.stdout))

In [16]:
# logger.disabled = True

In [25]:
with open('data/grammars/json.gbnf') as f:
    grammar_str = f.read()
llama_grammar = LlamaGrammar.from_string(grammar_str)
print(llama_grammar)

<llama_cpp.llama_grammar.LlamaGrammar object at 0x7f12c07b6620>


from_string grammar:
root ::= object 
object ::= [{] ws object_11 [}] ws 
value ::= object | array | string | number | value_6 ws 
array ::= [[] ws array_15 []] ws 
string ::= ["] string_18 ["] ws 
number ::= number_19 number_25 number_29 ws 
value_6 ::= [t] [r] [u] [e] | [f] [a] [l] [s] [e] | [n] [u] [l] [l] 
ws ::= ws_31 
object_8 ::= string [:] ws value object_10 
object_9 ::= [,] ws string [:] ws value 
object_10 ::= object_9 object_10 | 
object_11 ::= object_8 | 
array_12 ::= value array_14 
array_13 ::= [,] ws value 
array_14 ::= array_13 array_14 | 
array_15 ::= array_12 | 
string_16 ::= [^"\<U+0000>-<U+001F>] | [\] string_17 
string_17 ::= ["\/bfnrt] | [u] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] 
string_18 ::= string_16 string_18 | 
number_19 ::= number_20 number_21 
number_20 ::= [-] | 
number_21 ::= [0-9] | [1-9] number_22 
number_22 ::= [0-9] number_22 | 
number_23 ::= [.] number_24 
number_24 ::= [0-9] number_24 | [0-9] 
number_25 ::= number_23 | 
number_26 ::= [eE

Larger Q4_K_M model generates more complex output with unnecessary parameters. Possible fix: parameters tuning. \
Whereas Q4_0 model tries to escape `_` character with `\` inside object property. Possible fix: grammar.

In [26]:
limit_rows = -1 # 20
selected_devices = None # ['Smoke', 'Humidity']

df = pd.read_csv('data/datasets/dataset_v2.csv')
devices = list(df['device'].unique())
if selected_devices:
    df = df[df['device'].isin(selected_devices)].sort_index()
    # print(df)

output_df = pd.DataFrame(columns=['device', 'user_cmd', 'mtd', 'json_cmd'])
for i, row in df.iterrows():
    # if i < 8 or i > 16:
    #     continue
    user_cmd = row['user_cmd']

    device = row['device']
    sample_devices = devices.copy()
    sample_devices.remove(device)
    sample_devices = random.sample(sample_devices, k=2)
    env = f'{sample_devices[0]} id=1, {sample_devices[1]} id=2, {device} id=444'

    retrieved_nodes = retriever.retrieve(user_cmd)
    methods_description = f'API method 1:\n{retrieved_nodes[0].text}'
    method = retrieved_nodes[0].metadata['file_name'].replace('.md', '')

    user_prompt = user_prompt_template.format(**{'env': env, 
                                                 'methods_description': methods_description, 
                                                 'user_cmd': user_cmd})
    query = base_prompt + '\n\n' + user_prompt
    response = llm.create_chat_completion(
        messages=[
            {'role': 'user', 'content': query}
        ],
        grammar=llama_grammar
    )
    response_text = response['choices'][0]['message']['content']
    response_text = response_text.replace('\_', '_')
    json_cmd = json.dumps(json.loads(response_text))

    output_df.loc[len(output_df)] = pd.Series({'device': row['device'], 'user_cmd': user_cmd, 'mtd': method, 'json_cmd': json_cmd})

    # print(user_cmd)
    # print(response_text)

    if limit_rows > 0 and i == limit_rows - 1:
        break

output_df.to_csv('output/output.csv', index=False, header=True, mode='w')

Llama.generate: prefix-match hit

llama_print_timings:        load time =     248.41 ms
llama_print_timings:      sample time =     198.42 ms /    29 runs   (    6.84 ms per token,   146.16 tokens per second)
llama_print_timings: prompt eval time =     223.24 ms /   303 tokens (    0.74 ms per token,  1357.27 tokens per second)
llama_print_timings:        eval time =     515.70 ms /    28 runs   (   18.42 ms per token,    54.30 tokens per second)
llama_print_timings:       total time =    1017.60 ms /   331 tokens
Llama.generate: prefix-match hit

llama_print_timings:        load time =     248.41 ms
llama_print_timings:      sample time =     285.99 ms /    42 runs   (    6.81 ms per token,   146.86 tokens per second)
llama_print_timings: prompt eval time =     547.93 ms /   961 tokens (    0.57 ms per token,  1753.86 tokens per second)
llama_print_timings:        eval time =     793.64 ms /    41 runs   (   19.36 ms per token,    51.66 tokens per second)
llama_print_timings:       to

In [27]:
output_df.head()

Unnamed: 0,device,user_cmd,mtd,json_cmd
0,Switch,Set the dining room switch to flip mode.,Light.SetConfig,"{""method"": ""Switch.SetMode"", ""params"": {""id"": ..."
1,Switch,Set the living room switch to cycle mode.,Switch.SetConfig,"{""method"": ""Switch.SetConfig"", ""params"": {""id""..."
2,Switch,Enable Automatic OFF for the kitchen switch wi...,Switch.SetConfig,"{""method"": ""Switch.SetConfig"", ""params"": {""id""..."
3,Switch,Set the bedroom switch name to 'Bed Lamp'.,Light.SetConfig,"{""method"": ""Light.SetConfig"", ""params"": {""id"":..."
4,Switch,Set the hallway switch power limit to 50 Watts.,Cover.GetConfig,"{""method"": ""Cover.Configure"", ""params"": {""id"":..."


In [28]:
import ast

df = pd.read_csv('data/datasets/dataset_v2.csv')
output_df = pd.read_csv('output/output.csv')
incorrect_output_df = pd.DataFrame(columns=['device', 'user_cmd', 'gt_mtd', 'output_mtd', 'gt_json_cmd', 'output_json_cmd'])
correct_output_df = pd.DataFrame(columns=['device', 'user_cmd', 'gt_mtd', 'output_mtd', 'gt_json_cmd', 'output_json_cmd'])

correct_methods = 0
correct_json_cmds = 0
for i in range(len(output_df)):
    gt = df.iloc[i]
    output = output_df.iloc[i]

    correct_ouput = True
    if gt['mtd'] == output['mtd']:
        correct_methods += 1
    else:
        correct_ouput = False
    try:
        if json.loads(gt['json_cmd']) == json.loads(output['json_cmd']):
            correct_json_cmds += 1
        else:
            correct_ouput = False
    except Exception as ex:
        print(ex)
        print(gt['json_cmd'])
        print(i, output['json_cmd'])

    if not correct_ouput:
        incorrect_output_df.loc[len(incorrect_output_df)] = pd.Series({'device': gt['device'], 'user_cmd': gt['user_cmd'], 'gt_mtd': gt['mtd'],
                                                              'output_mtd': output['mtd'], 'gt_json_cmd': gt['json_cmd'], 'output_json_cmd': output['json_cmd']})
    else:
        correct_output_df.loc[len(correct_output_df)] = pd.Series({'device': gt['device'], 'user_cmd': gt['user_cmd'], 'gt_mtd': gt['mtd'],
                                                              'output_mtd': output['mtd'], 'gt_json_cmd': gt['json_cmd'], 'output_json_cmd': output['json_cmd']})
acc_methods = round(correct_methods / len(output_df), 2)
acc_json_cmds = round(correct_json_cmds / len(output_df), 2)

incorrect_output_df.to_csv('output/incorrect_output.csv', index=False)
correct_output_df.to_csv('output/correct_output.csv', index=False)

with open('output/results.txt', 'a') as f:
    f.write(f'Acc of methods: {acc_methods}\n'
            f'Acc of json cmds: {acc_json_cmds}\n\n'
            f'-----------------------------------\n\n')