# LLM-Supported Natural Language to Bash Translation

## Load Dataset

In [None]:
from datasets import load_dataset

# note the config parameter, NOT the split parameter, selects the train/test data
train_dataset = load_dataset("westenfelder/NL2SH-ALFA", "train", split="train")
test_dataset = load_dataset("westenfelder/NL2SH-ALFA", "test", split="train")

print(f"Train dataset size: {len(train_dataset)} rows")
print(f"Test dataset size: {len(test_dataset)} rows")

print("\nExample Row")
print(f"Natural Language Task: {train_dataset[0]['nl']}")
print(f"Bash Command: {train_dataset[0]['bash']}")

## Load Model

In [2]:
import torch
import random
from transformers import AutoTokenizer, AutoModelForCausalLM

model_id = "meta-llama/Llama-3.2-1B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(model_id)
tokenizer.pad_token = tokenizer.eos_token
model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.bfloat16).to("cuda")

# for reproducibility
seed = 123
torch.manual_seed(seed)
random.seed(seed)
torch.cuda.manual_seed_all(seed)

def translate(prompt, system_prompt="Your task is to translate a natural language instruction to a Bash command. You will receive an instruction in English and output a Bash command that can be run in a Linux terminal."):
    messages = [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": f"{prompt}"},
    ]

    tokens = tokenizer.apply_chat_template(
        messages,
        add_generation_prompt=True,
        tokenize=True,
        return_tensors="pt"
    ).to(model.device)

    attention_mask = torch.ones_like(tokens)

    terminators = [
        tokenizer.eos_token_id,
        tokenizer.convert_tokens_to_ids("<|eot_id|>")
    ]

    outputs = model.generate(
        tokens,
        attention_mask=attention_mask,
        max_new_tokens=100,
        eos_token_id=terminators,
        pad_token_id=tokenizer.eos_token_id,
        do_sample=False,
        temperature=None,
        top_p=None,
    )
    
    # remove the prompt from the output
    response = outputs[0][tokens.shape[-1]:]
    return tokenizer.decode(response, skip_special_tokens=True)

In [3]:
import re

# strip markdown formatting
def parse_bash(text):
    patterns = [
        r"```bash\s*(.*?)\s*```",
        r"```(.*?)```",
        r"`(.*?)`",
    ]
    
    for pattern in patterns:
        match = re.search(pattern, text, re.DOTALL)
        if match:
            return match.group(1).strip()
    
    return text

In [None]:
# example usage
natural_language_task = train_dataset[0]["nl"]
ground_truth_command = train_dataset[0]["bash"]
model_output = translate(natural_language_task)
model_command = parse_bash(model_output)

print(f"Natural Language Task: {natural_language_task}")
print(f"Ground Truth Command: {ground_truth_command}")
print(f"Model Command: {model_command}")

## Benchmark Model

In [None]:
from icalfa import submit_command
from tqdm import tqdm

num_correct = 0
total = len(test_dataset)

for index, row in tqdm(enumerate(test_dataset), total=total):
    natural_language_task = row['nl']
    model_output = translate(natural_language_task)
    model_command = parse_bash(model_output)
    num_correct += submit_command(index=index, command=model_command, eval_mode="embed", eval_param=0.75)

print(f"Model Accuracy: {(num_correct/total):0.2f}")