In [1]:
import transformers
import torch
from typing import List, Dict
from pprint import pprint
model_id = "meta-llama/Meta-Llama-3.1-8B-Instruct"

pipeline = transformers.pipeline(
    "text-generation",
    model=model_id,
    model_kwargs={"torch_dtype": torch.bfloat16},
    device_map="auto",
)

messages = [
    {"role": "system", "content": "You are a BDD testing expert that can write scenarios using Gherkin language, Python language and behave library"},
    {"role": "user", "content": "Show me a scenario for testing Mario like games"},
]

terminators = [
    pipeline.tokenizer.eos_token_id,
    pipeline.tokenizer.convert_tokens_to_ids("<|eot_id|>")
]

def inference(prompt: str, max_tokens: int ) -> str:
    messages[-1]["content"] = prompt
    outputs = pipeline(
        messages,
        max_new_tokens=max_tokens,
        eos_token_id=terminators,
        do_sample=True,
        temperature=0.1,
        top_p=0.9,
    )
    
    return outputs[0]["generated_text"][-1]


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [51]:
USE_DEBUG = False 
from typing import Union, Tuple

# Takes an input step in natural language and tries to match against a set of available input steps in a Gherkin file.
# Two steps are used in the process:
    # Step 1: try to find the closest in terms of matching
    # Step 2: try to match parameters. Report the error if not succeeded
def _match_input_step_to_set(self, 
                            USER_INPUT_STEP: str, 
                            USER_INPUT_AVAILABLE_STEPS: str,
                            max_generated_tokens : int = 1024) -> Tuple[bool, str]:
    
    match_rewrite_prompt_template= """Given the below Gherkin available steps, check if any can match the input step. 
    
    Use Json syntax for response. Use the following format if any step can be matched:
    {{
      "found": true,
      "step_found":  "the step you found closest"
    }}
    
    If no available option is OK, then use:
    {{
        "found": false,
    }}
    
    Do not provide any other information or examples.
    
    ### Input step: 
    {user_input_step_to_match}
    
    ### Available steps:
    {user_input_available_steps}"""
    
    prompt_matching_params_template = """Can you match the parameters in the input text step with the target step ?
    
    Example:
    ### Input step: The plane has a travel speed of 123 km/h and a lenght of 500 m
    ### Output step: @given(A plane that has a (?P<speed>\d+) km/h, length (?P<size>\d+) m
    Response:
    {{
     "speed" : "123 km/h",
     "size" : "500 m"
    }}
    
    Question:
    ### Input step: {user_input_step}
    ### Output step: {step_str}
    Response:    your response 
    
    Do not write anything else.
    """
    
    match_rewrite_prompt_template.format(user_input_step_to_match=USER_INPUT_STEP, 
                                         user_input_available_steps=USER_INPUT_AVAILABLE_STEPS)
    
    import json 
    res = inference(match_rewrite_prompt, max_generated_tokens)["content"]
    pprint(f"Plain result: {res}")
    
    # Loading in json
    res_json = None
    try:
        dir = json.loads(res2)
        res_json = dir
    except Exception as e:
        pprint(f"exception occured while reading the output: {e}")
    
    
    step_found_str = res_json.get("step_found", None)
    
    if step_found_str:
        prompt_matching_params = prompt_matching_params_template.format(user_input_step=USER_INPUT_STEP, step_str=step_found_str)
        res22 = inference(prompt_matching_params, max_generated_tokens)["content"]
        
        RESPONSE_TAG = "Response:"
        resp_json_begin_index = res22.find(RESPONSE_TAG)
        resp_json_end_index = res22.rfind("}")
        
        if resp_json_begin_index !=-1 and resp_json_end_index!= -1:
            resp_json_str = res22[resp_json_begin_index + len(RESPONSE_TAG) : resp_json_end_index + 1]
            
            try:
                resp_json = json.loads(resp_json_str)
                pprint(resp_json)

                return (True, resp_json)
            except Exception as e:
                msg = f"Error {e} when parsing for parameters:\n{resp_json_str}"
                pprint(msg)
                
                return (False, msg)
    
    
    msg = "The model didn't find any match. The raw output is {temp_resp}".format(temp_resp=res22)
    pprint(msg)
    return (False, msg)

def match_input_step_to_set(self, 
                            USER_INPUT_STEP: str, 
                            max_generated_tokens : int = 1024) -> Tuple[bool, str]:
    
    # TODO: take from file with source code 
    USER_INPUT_AVAILABLE_STEPS = """@given("the car has (?P<engine_power>\d+) kw, weighs (?P<weight>\d+) kg, has a drag coefficient of (?P<drag>[\.\d]+)")
    
    @given("a frontal area of (?P<area>.+) m\^2")
    
    @when("I accelerate to (?P<speed>\d+) km/h")
    
    @then("the time should be within (?P<precision>[\d\.]+)s of (?P<time>[\d\.]+)s")
    
    @given("that the car is moving at (?P<speed>\d+) m/s")
    
    @when("I brake at (?P<brake_force>\d+)% force")
    
    @step("(?P<seconds>\d+) seconds? pass(?:es)?")
    
    @then("I should have traveled less than (?P<distance>\d+) meters")
    
    @given("that the car's heading is (?P<heading>\d+) deg")
    
    @when("I turn (?P<direction>left|right) at a yaw rate of (?P<rate>\d+) deg/sec for (?P<duration>\d+) seconds")
    
    @then("the car's heading should be (?P<heading>\d+) deg")"""
    
    is_matched, resp = _match_input_step_to_set(self, USER_INPUT_STEP, USER_INPUT_AVAILABLE_STEPS, max_generated_tokens=1024)

    return is_matched, resp 


is_matched, resp = match_input_step_to_set(None, USER_INPUT_STEP = "A drag of 123, a mass of 12345 kg, and an engine of 124kw the Yoda's vehicle has!")
pprint(f"The model matched the step: {is_matched}\n \
            Response: {resp}\n")



Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128009 for open-end generation.


('Plain result: {\n'
 '  "found": true,\n'
 '  "step_found":  "the car has (?P<engine_power>\\d+) kw, weighs '
 '(?P<weight>\\d+) kg, has a drag coefficient of (?P<drag>[\\.\\d]+)"\n'
 '}')
("The model didn't find any match. The raw output is {\n"
 '     "engine_power" : "124 kw",\n'
 '     "weight" : "12345 kg",\n'
 '     "drag" : "123"\n'
 '}')
('The model matched the step: False\n'
 "             Response: The model didn't find any match. The raw output is {\n"
 '     "engine_power" : "124 kw",\n'
 '     "weight" : "12345 kg",\n'
 '     "drag" : "123"\n'
 '}\n')
