In [5]:
from pathlib import Path
import numpy as np
import pandas as pd
import random
import copy
import ast
import pprint
import json
import re
import time

In [6]:
devices = ["cover", "switch", "light", "input", "temperature", 'smoke', 'humidity']
devices = [device.capitalize() for device in devices]

In [7]:
GENERATE_METHODS_DIR = Path('data/docs/manual')
METHODS_DIR = Path('data/docs/methods')
PROMPT_SEEDS_DIR = Path('data/prompts/generation/components')
PROMPT_COMPONENTS_DIR = Path('data/prompts/generation/components')
GEN_PROMPTS_DIR = Path('data/prompts/generation/output')
VAL_PROMPTS_DIR = Path('data/prompts/validation/output')

# seeds generation

In [13]:
target_device = devices[2]

with open('prompts/prompt.md', "w") as f:
    f.write(f'')

with open('prompts/instruction_onedevice.md', "r") as f:
    instruction = f.read()
with open('prompts/seeds_multiapi.md', "r") as f:
    seeds = f.read()

with open('prompts/prompt.md', "a") as f:
    f.write(f'{instruction}\n\n'
            f'Example:\n\n{seeds}\n\n'
            f'For generation: \n\n'
            f'Device: {target_device} id=XXX\n\n')

row_i = 0
for p in Path('docs/').iterdir():
    if target_device in p.name and p.name != f'{target_device}.md':
        with open(p, "r") as f:
            method_str = f.read()
        with open('prompts/prompt.md', "a") as f:
            f.write(f'API method {row_i}: {method_str}\n')
        row_i += 1
    if row_i == 5:
        break

# with open('prompts/prompt.md', "a") as f:
#     f.write(f'User command 0:')

In [49]:
output = """[{"User command 0.1": "Reset the energy counter for the main light.", "JSON command 0.1": {"method": "Light.ResetCounters", "params": {"id": "id", "type": ["energy"]}}}, {"User command 0.2": "Reset the all counter for the light, please.", "JSON command 0.2": {"method": "Light.ResetCounters", "params": {"id": "id"}}}, {"User command 1.1" : "Could you please fetch the configuration details for the living room light, including its name, night mode settings, and transition duration?", "JSON command 1.1": {"method": "Light.GetConfig", "params": {"id": "id"}}}, {"User command 1.2" : "Tell me, what is transition duration for the main light?", "JSON command 1.2": {"method": "Light.GetConfig", "params": {"id": "id"}}}]"""
output = output.replace("XXX", '"id"')

In [50]:
output = re.sub(r'(User command \d\.\d)', 'User command', output)
output = re.sub(r'(JSON command \d\.\d)', 'JSON command', output)

In [51]:
print(output)

[{"User command": "Reset the energy counter for the main light.", "JSON command": {"method": "Light.ResetCounters", "params": {"id": "id", "type": ["energy"]}}}, {"User command": "Reset the all counter for the light, please.", "JSON command": {"method": "Light.ResetCounters", "params": {"id": "id"}}}, {"User command" : "Could you please fetch the configuration details for the living room light, including its name, night mode settings, and transition duration?", "JSON command": {"method": "Light.GetConfig", "params": {"id": "id"}}}, {"User command" : "Tell me, what is transition duration for the main light?", "JSON command": {"method": "Light.GetConfig", "params": {"id": "id"}}}]


In [52]:
import json

In [53]:
cmds = json.loads(output)

In [54]:
json.dumps(cmds)

'[{"User command": "Reset the energy counter for the main light.", "JSON command": {"method": "Light.ResetCounters", "params": {"id": "id", "type": ["energy"]}}}, {"User command": "Reset the all counter for the light, please.", "JSON command": {"method": "Light.ResetCounters", "params": {"id": "id"}}}, {"User command": "Could you please fetch the configuration details for the living room light, including its name, night mode settings, and transition duration?", "JSON command": {"method": "Light.GetConfig", "params": {"id": "id"}}}, {"User command": "Tell me, what is transition duration for the main light?", "JSON command": {"method": "Light.GetConfig", "params": {"id": "id"}}}]'

In [55]:
df_dict = {'device': [], 'user_cmd': [], 'used_mtd': [], 'json_cmd': []}
for row_i in range(len(cmds)):
    df_dict['user_cmd'].append(cmds[row_i][f'User command'])
    df_dict['json_cmd'].append(cmds[row_i][f'JSON command'])
    df_dict['used_mtd'].append(f'{cmds[row_i][f"JSON command"]["method"]}')
    df_dict['device'].append(target_device)
print(df_dict)

{'device': ['Light', 'Light', 'Light', 'Light'], 'user_cmd': ['Reset the energy counter for the main light.', 'Reset the all counter for the light, please.', 'Could you please fetch the configuration details for the living room light, including its name, night mode settings, and transition duration?', 'Tell me, what is transition duration for the main light?'], 'used_mtd': ['Light.ResetCounters', 'Light.ResetCounters', 'Light.GetConfig', 'Light.GetConfig'], 'json_cmd': [{'method': 'Light.ResetCounters', 'params': {'id': 'id', 'type': ['energy']}}, {'method': 'Light.ResetCounters', 'params': {'id': 'id'}}, {'method': 'Light.GetConfig', 'params': {'id': 'id'}}, {'method': 'Light.GetConfig', 'params': {'id': 'id'}}]}


In [56]:
data_path = Path('data/')

In [57]:

df = pd.DataFrame(df_dict)
df.to_csv(data_path / f'prompt_seeds.csv', index=False, mode='a', header=False)

# prompt generation

In [4]:
import pprint

In [7]:
device_methods_dict = {device: {'basic': [], 'advance': []} for device in devices}
advance_methods = ['GetConfig', 'SetConfig', 'GetStatus']
for p in GENERATE_METHODS_DIR.iterdir():
    for d in devices:
        if d in p.name:
            if p.stem.split('.')[1] in advance_methods:
                device_methods_dict[d]['advance'].append(p.stem)
            else:
                device_methods_dict[d]['basic'].append(p.stem)         
pprint.pprint(device_methods_dict)

{'Cover': {'advance': [], 'basic': []},
 'Humidity': {'advance': [], 'basic': ['Humidity.SetHumidity']},
 'Input': {'advance': [], 'basic': []},
 'Light': {'advance': [], 'basic': []},
 'Smoke': {'advance': [], 'basic': []},
 'Switch': {'advance': [], 'basic': []},
 'Temperature': {'advance': [], 'basic': ['Temperature.SetTemperature']}}


In [8]:
def generate_seed(seeds_df, num_methods=2, num_examples=2):
    rand_device = random.choice(list(seeds_df['device'].unique()))
    seed_methods = random.sample(list(seeds_df[seeds_df['device'] == rand_device]['used_mtd'].unique()), num_methods)
    methods_description = ''
    cmds = {}
    for method_i, method in enumerate(seed_methods):
        with open(METHODS_DIR / f'{method}.md', "r") as f:
            method_str = f.read()
        methods_description += f'API method {method_i+1}: {method_str}\n'
        example_ids = random.sample(list(seeds_df[seeds_df['used_mtd'] == method].index), num_examples)
        for example_i, row_i in enumerate(example_ids):
            cmds[f'Example {method_i+1}.{example_i+1}'] = {f'User command': seeds_df.loc[row_i, 'user_cmd'],
                 f'JSON command': ast.literal_eval(seeds_df.loc[row_i, 'json_cmd'].replace("'", '"'))}
    seed_description = (
        f'The next device, methods and JSON are example:\n\n'
        f'Device: {rand_device} id=33\n\n'
        f'{methods_description}'
        f'{json.dumps({"commands": cmds})}'
    )
    return seed_description

def generate_advance_seed(seeds_df):
    rand_device = random.choice(list(seeds_df['device'].unique()))
    method = random.choice(list(seeds_df[seeds_df['device'] == rand_device]['used_mtd'].unique()))
    cmds = {}
    with open(METHODS_DIR / f'{method}.md', "r") as f:
        method_str = f.read()
    method_description = f'API method 1:\n{method_str}\n'
    sample_method_ids = list(seeds_df[seeds_df['used_mtd'] == method].index)
    example_ids = random.sample(sample_method_ids, min(4, len(sample_method_ids)))
    for example_i, row_i in enumerate(example_ids):
        cmds[f'Example {example_i+1}'] = {f'User command': seeds_df.loc[row_i, 'user_cmd'],
            f'JSON command': ast.literal_eval(seeds_df.loc[row_i, 'json_cmd'].replace("'", '"'))}
    seed_description = (
        f'The next device, methods and JSON are example:\n\n'
        f'Device: {rand_device} id=33\n\n'
        f'{method_description}'
        f'{json.dumps({"commands": cmds})}'
    )
    return seed_description

In [12]:
seeds_df = pd.read_csv(PROMPT_SEEDS_DIR / 'prompt_seeds.csv')
# json.loads(seeds_df.loc[0, 'json_cmd'].replace("'", '"'))['method']
# f"dict: {json.dumps(ast.literal_eval(seeds_df.loc[0, 'json_cmd']))}"
print(generate_seed(seeds_df, num_examples=3))

The next device, methods and JSON are example:

Device: Cover id=33

API method 1: Method name: Cover.Open
Method description: Preconditions:
Cover will not accept the command if:
An  overvoltage  error is set at the time of the request.
An  undervoltage  error is set at the time of the request.
An  overtemp  error is set at the time of the request.
An engaged  safety_switch  prohibits movement in the requested direction.
Cover  calibration is running at the time of the request
Properties:
{"id": {"type": "number", "description": "The numeric ID of the Cover component instance"}, "duration": {"type": "number", "description": "If duration is not provided, Cover will fully open, unless it times out because of maxtime_open first. If duration (seconds) is provided, Cover will move in the open direction for the specified time. duration must be in the range [0.1..maxtime_open]Optional"}}
Response:
null on success; error if the request can not be executed or failed

API method 2: Method name:

In [13]:
NUM_METHODS_GEN = 2
NUM_METHODS_SEED = 2
NUM_EXAMPLES_SEED = 3
EXAMPLES_BASIC_METHOD = NUM_EXAMPLES_SEED
EXAMPLES_ADVANCE_METHOD = 8

In [19]:
seeds_df = pd.read_csv(PROMPT_SEEDS_DIR / 'prompt_seeds.csv')

gen_prompts_df = pd.DataFrame(columns=['type', 'device', 'last_mtd_i', 'text'])
for device, methods in device_methods_dict.items():
    method_i = 0
    methods_description = ''

    for i, method_name in enumerate(methods['basic']):
        with open(GENERATE_METHODS_DIR / f'{method_name}.md', "r") as f:
            method_str = f.read()
        methods_description += f'API method {method_i+1}: {method_str}\n'
        method_i += 1

        if (method_i % NUM_METHODS_GEN == 0 and method_i != 0) or i == len(methods['basic']) - 1:
            with open(PROMPT_COMPONENTS_DIR / 'instruction_onedevice.md', "r") as f:
                instruction = f.read()
            instruction = instruction.replace('{NUM_EXAMPLE_COMMANDS}', str(method_i*EXAMPLES_BASIC_METHOD))
            instruction = instruction.replace('{EXAMPLES_BASIC_METHOD}', str(EXAMPLES_BASIC_METHOD))

            seed_1 = generate_seed(seeds_df, NUM_METHODS_SEED, NUM_EXAMPLES_SEED)
            # seed_description_2 = generate_seed('Light', seeds_df, 1)
            
            # with open(f'data/prompts/prompt_basic_{device}_{i}.md', "w") as f:
            #     f.write(f'{instruction}\n\n'
            #             f'{seed_1}\n\n'
            #             # f'{seed_description_2}\n\n'
            #             f'The next device and methods are for you to generate commands:\n\n'
            #             f'Device: {device} id=444\n\n'
            #             f'{methods_description}')
            prompt_text = (f'{instruction}\n\n'
                        f'{seed_1}\n\n'
                        # f'{seed_description_2}\n\n'
                        f'The next device and methods are for you to generate commands:\n\n'
                        f'Device: {device} id=444\n\n'
                        f'{methods_description}')
            gen_prompts_df.loc[len(gen_prompts_df)] = pd.Series({'type': 'basic',
        'device': device,
        'last_mtd_i': i,
        'text': prompt_text})

            method_i = 0
            methods_description = ''
    
    for i, method_name in enumerate(methods['advance']):
        with open(GENERATE_METHODS_DIR / f'{method_name}.md', "r") as f:
            method_str = f.read()
        method_description = f'API method 1:\n{method_str}\n'

        with open(PROMPT_COMPONENTS_DIR / 'instruction_advance.md', "r") as f:
            instruction = f.read()
        instruction = instruction.replace('{NUM_EXAMPLE_COMMANDS}', str(EXAMPLES_ADVANCE_METHOD))

        seed_1 = generate_advance_seed(seeds_df)
        
        # with open(f'data/prompts/prompt_adv_{device}_{i}.md', "w") as f:
        #     f.write(f'{instruction}\n\n'
        #             f'{seed_1}\n\n'
        #             f'The next device and method are for you to generate commands:\n\n'
        #             f'Device: {device} id=444\n\n'
        #             f'{method_description}')

        prompt_text = (f'{instruction}\n\n'
                    f'{seed_1}\n\n'
                    f'The next device and method are for you to generate commands:\n\n'
                    f'Device: {device} id=444\n\n'
                    f'{method_description}')
        gen_prompts_df.loc[len(gen_prompts_df)] = pd.Series({'type': 'advance',
        'device': device,
        'last_mtd_i': i,
        'text': prompt_text})

gen_prompts_df.to_csv(GEN_PROMPTS_DIR / 'prompts.csv', index=False)

# inspection

In [12]:
def parse_output(output):
    output = json.loads(output)['commands']
    df_dict = {'device': [], 'user_cmd': [], 'mtd': [], 'json_cmd': []}
    for cmds in list(output.values()):
        df_dict['user_cmd'].append(cmds[f'User command'])
        df_dict['json_cmd'].append(json.dumps(cmds[f'JSON command']))
        df_dict['mtd'].append(f'{cmds[f"JSON command"]["method"]}')
        df_dict['device'].append(f'{cmds[f"JSON command"]["method"].split(".")[0]}')
    df = pd.DataFrame(df_dict)
    header = not (VAL_PROMPTS_DIR / 'prompts.csv').exists()
    df.to_csv(VAL_PROMPTS_DIR / 'prompts.csv', index=False, mode='a', header=header)

# dataset generation

In [9]:
from openai import OpenAI
with open('data/keys/openai.txt') as f:
    key = f.read()
client = OpenAI(api_key=key)

In [10]:
# t = """[
#     {"User command": "Set the power limit and voltage limit of the bedroom cover to a custom value.", "JSON command": {"method": "Cover.SetConfig", "params": {"id": "id", "config": {"power_limit": 500, "voltage_limit": 220}}}},
#     {"User command": "Adjust the idle power threshold and confirmation period for the living room cover motor.", "JSON command": {"method": "Cover.SetConfig", "params": {"id": "id", "config": {"motor.idle_power_thr": 5, "motor.idle_confirm_period": 0.5}}}},
#     {"User command": "Change the default open and close timeout for the kitchen cover.", "JSON command": {"method": "Cover.SetConfig", "params": {"id": "id", "config": {"maxtime_open": 90, "maxtime_close": 90}}}},
#     {"User command": "Enable obstruction detection and set the power threshold for the bathroom cover.", "JSON command": {"method": "Cover.SetConfig", "params": {"id": "id", "config": {"obstruction_detection": {"enable": true, "obstruction_detection.power_thr": 1200}}}},
#     {"User command": "Activate the safety switch feature for the balcony cover and define the direction and action to take if engaged.", "JSON command": {"method": "Cover.SetConfig", "params": {"id": "id", "config": {"safety_switch": {"enable": true, "direction": "close", "action": "stop"}}}}]"""
# json.loads(t)

In [16]:
gen_prompts_df = pd.read_csv(GEN_PROMPTS_DIR / 'prompts.csv')

parsed = []
for _, r in gen_prompts_df.iterrows():
    prompt = r['text']

    print(prompt)
    print('---------------------------------------')

    completion = client.chat.completions.create(
        model="gpt-3.5-turbo-0125",
        response_format={"type": "json_object"},
        messages=[
            {"role": "user", "content": prompt}
        ]
    )
    parse_output(completion.choices[0].message.content)

    print(f'{r["device"]}:{r["last_mtd_i"]} parsed!\n\n')

    print(f'{completion.choices[0].message.content}\n\n')
    print('---------------------------------------')

    parsed.append(f'{r["device"]}:{r["last_mtd_i"]}')

    time.sleep(10)

You are provided with a device in a smart home, its API methods, their descriptions and required parameters. Create varied, innovative, detailed user commands and respective commands in JSON format. User commands must sound naturally. Command to device must be strictly in JSON format. Generate exactly 3 example commands. For each API method generate 3 commands that use this method. You mustn't hallucinate new API methods. In each pair of user command with JSON command use at least half of possible parametrs. Ask about parameters from reponse output if they exist. Don't add to JSON command parameters that are not mentioned in the user command. Output must be strictly JSON array: {"commands": {"Example 1.1": {"User command": "...", "JSON command": {...}}, "Example 1.2": {"User command": "...", "JSON command": {...}}, "Example 1.3": {"User command": "...", "JSON command": {...}}, ...}}. There is example below.


The next device, methods and JSON are example:

Device: Cover id=33

API meth

# merge dataset

In [20]:
dataset_df = pd.read_csv('data/datasets/dataset_v1.csv')
generated_df = pd.read_csv('data/prompts/validation/output/prompts.csv')
new_df = pd.concat([dataset_df, generated_df], ignore_index=True, axis=0)
# new_df['json_cmd'] = new_df['json_cmd'].apply(lambda x: json.dumps(ast.literal_eval(x)))
new_df.to_csv('data/datasets/dataset_v2.csv', index=False)