# Logging huggingface for auth 

In [2]:
# Check requirements
!pip3 install -q -r requirements.txt
from huggingface_hub import notebook_login

notebook_login()

[0m

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [3]:
import os

notebook_directory = os.path.dirname(os.path.abspath("__file__"))
directory = os.path.join(notebook_directory, "generation_result")
if not os.path.exists(directory):
	os.makedirs(directory)


# Import needed library and model

In [4]:
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
from peft import PeftModel
import torch, accelerate

# Models path
codellama_model = 'codellama/CodeLlama-7b-Instruct-hf'

# Adapters path
codellama_adapter = 'AIRLab-POLIMI/codellama-7b-instruct-hf-btgenbot-adapter'

# Postprocess function

In [5]:
import re
def extract_behavior_tree(result):
    # Define regular expression pattern to extract the behavior tree from the complete output
    pattern = r'<root .*?</root>'
    matches = re.findall(pattern, result, re.DOTALL)

    if matches:
        final_tree = matches[-1]
        print(final_tree)
        return final_tree
    else:
        print("No start root of behavior tree please check the output")
    return None 



# Load pretrained codellama_model 

In [10]:
# Select here the model and the corresponding adapter
cache_dir = '/home/arg/BTGenBot/model'
model_id = codellama_model
adapter_id = codellama_adapter

quantization_config = BitsAndBytesConfig(load_in_8bit=True)

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(
    pretrained_model_name_or_path = model_id,
    cache_dir=cache_dir
)

# Load base model
base_model = AutoModelForCausalLM.from_pretrained(
    pretrained_model_name_or_path = model_id,
    quantization_config = quantization_config,
    torch_dtype = torch.float16,
    device_map = "auto",
    trust_remote_code = True,
    cache_dir=cache_dir
)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

# Load Task description, similar example task

## Sample task of paper

In [11]:
context = "<<SYS>> You will be provided a summary of a task performed by a behavior tree, and your objective is to express this behavior tree in XML format.\n <</SYS>>"
task = """The behavior tree represents a robot's navigation system with arm activity. The robot must visit the location "ArucoStand", then follow the arucos given their IDs: 10, 1, and 7. Finally the robot reset the manipulator to the parking position and goes to the location "Parking". The only available actions that must be used in the behavior tree are: "MoveTo", "FollowAruco" and "ResetManipulator"."""

# One-shot example
example_task = """The behavior tree represents a robot's navigation system with arm activity. The robot must visit the location "Station A", then follow the aruco with ID=7. The only available actions that must be used in the behavior tree are: "MoveTo", "FollowAruco"."""
example_output = """
<root main_tree_to_execute = "MainTree" >
    <BehaviorTree ID="MainTree">
        <Sequence>
            <MoveTo location="Station A"/>
            <FollowAruco id="7"/>
        </Sequence>
    </BehaviorTree>
</root>
"""

# Inference of pretrain base model

In [12]:
eval_prompt = "<s>[INST]" + context + example_task + "[/INST]</s>" + example_output + "[INST]" + task + "[/INST]"
model_input = tokenizer(eval_prompt, return_tensors="pt").to("cuda")

In [13]:
base_model.eval()
with torch.no_grad():
    result = tokenizer.decode(base_model.generate(**model_input, max_new_tokens=1000)[0], skip_special_tokens=True)
    print("Generation result of base model")
    print(result)
    print("--------------------------------------------------------------------")
    xml_tree_base  = extract_behavior_tree(result)
    print(xml_tree_base)
    print("--------------------------------------------------------------------")

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Generation result of base model
[INST]<<SYS>> You will be provided a summary of a task performed by a behavior tree, and your objective is to express this behavior tree in XML format.
 <</SYS>>The behavior tree represents a robot's navigation system with arm activity. The robot must visit the location "Station A", then follow the aruco with ID=7. The only available actions that must be used in the behavior tree are: "MoveTo", "FollowAruco".[/INST] 
<root main_tree_to_execute = "MainTree" >
    <BehaviorTree ID="MainTree">
        <Sequence>
            <MoveTo location="Station A"/>
            <FollowAruco id="7"/>
        </Sequence>
    </BehaviorTree>
</root>
[INST]The behavior tree represents a robot's navigation system with arm activity. The robot must visit the location "ArucoStand", then follow the arucos given their IDs: 10, 1, and 7. Finally the robot reset the manipulator to the parking position and goes to the location "Parking". The only available actions that must be used

In [14]:
response_base = os.path.join(directory, "response_base.txt")
with open(response_base, "w+") as file:
    file.write(result)
    
xml_base_filename = os.path.join(directory, "response_base_xml_tree.txt")
with open(xml_base_filename, "w+") as file:
    file.write(xml_tree_base)

# Inference of finetune model

In [15]:
# Load fine-tuned model
finetuned_model = PeftModel.from_pretrained(base_model, adapter_id)
finetuned_model = finetuned_model.merge_and_unload()

adapter_config.json:   0%|          | 0.00/578 [00:00<?, ?B/s]

adapter_model.bin:   0%|          | 0.00/80.1M [00:00<?, ?B/s]



In [16]:
finetuned_model.eval()
with torch.no_grad():
    fine_tune_result = tokenizer.decode(finetuned_model.generate(**model_input, max_new_tokens=1000)[0], skip_special_tokens=True)
    print("Generation result of finetune model")
    print(fine_tune_result)
    print("--------------------------------------------------------------------")
    xml_tree_finetune = extract_behavior_tree(fine_tune_result)
    print(xml_tree_finetune)
    print("--------------------------------------------------------------------")

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Generation result of finetune model
[INST]<<SYS>> You will be provided a summary of a task performed by a behavior tree, and your objective is to express this behavior tree in XML format.
 <</SYS>>The behavior tree represents a robot's navigation system with arm activity. The robot must visit the location "Station A", then follow the aruco with ID=7. The only available actions that must be used in the behavior tree are: "MoveTo", "FollowAruco".[/INST] 
<root main_tree_to_execute = "MainTree" >
    <BehaviorTree ID="MainTree">
        <Sequence>
            <MoveTo location="Station A"/>
            <FollowAruco id="7"/>
        </Sequence>
    </BehaviorTree>
</root>
[INST]The behavior tree represents a robot's navigation system with arm activity. The robot must visit the location "ArucoStand", then follow the arucos given their IDs: 10, 1, and 7. Finally the robot reset the manipulator to the parking position and goes to the location "Parking". The only available actions that must be 

In [17]:
response_finetune = os.path.join(directory, "response_finetune.txt")
with open(response_finetune, "w+") as file:
    file.write(fine_tune_result)
    
xml_fintune_filename = os.path.join(directory, "response_finetune_xml_tree.txt")
with open(xml_fintune_filename, "w+") as file:
    file.write(xml_tree_finetune)

# Turn XML behavior tree to CMU symbolic tree version 