In [24]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# Configuration

In [60]:
datasets = ["rico", "publaynet"]  # choices
tasks = ["gent", "gents", "genr", "completion", "refinement"]
dataset = datasets[0]
task = tasks[0]
input_format = "seq"
output_format = "html"
add_unk_token = False
add_index_token = True
add_sep_token = True
candidate_size = -1  # -1 represents the complete training set
num_prompt = 2

# Process raw data

In [None]:
import os

from preprocess import create_processor
from utils import RAW_DATA_PATH, read_pt, write_pt
from tqdm import tqdm


processor = create_processor(dataset=dataset, task=task)
base_dir = os.path.dirname(os.getcwd())


def get_processed_data(split):
    filename = os.path.join(
        base_dir, "dataset", dataset, "processed", task, f"{split}.pt"
    )
    if os.path.exists(filename):
        processed_data = read_pt(filename)
    else:
        processed_data = []
        os.makedirs(os.path.dirname(filename), exist_ok=True)
        raw_path = os.path.join(RAW_DATA_PATH(dataset), f"{split}.pt")
        raw_data = read_pt(raw_path)
        for rd in tqdm(raw_data, desc=f"{split} data processing..."):
            processed_data.append(processor(rd))
        write_pt(filename, processed_data)
    return processed_data


processed_train_data = get_processed_data("train")
processed_val_data = get_processed_data("val")
processed_test_data = get_processed_data("test")

# Dynamic exemplar selection

In [62]:
from selection import create_selector

selector = create_selector(
    task=task,
    train_data=processed_train_data,
    candidate_size=candidate_size,
    num_prompt=num_prompt,
)

test_idx = 0
exemplars = selector(processed_test_data[test_idx])

# Input-output serialization

In [63]:
from serialization import create_serializer, build_prompt

serializer = create_serializer(
    dataset=dataset,
    task=task,
    input_format=input_format,
    output_format=output_format,
    add_index_token=add_index_token,
    add_sep_token=add_sep_token,
    add_unk_token=add_unk_token
)
prompt = build_prompt(serializer, exemplars, processed_test_data[test_idx], dataset)

In [64]:
print(prompt)

Please generate a layout based on the given Image. The task that finds each element in the corresponding image and makes it into an HTML structure.You need to ensure that the generated layout looks realistic, with elements well aligned and avoiding unnecessary overlap.
Task Description: generation conditioned on given element types
Layout Domain: android layout
Canvas Size: canvas width is 90px, canvas height is 160px

Element Type Constraint: icon 0 | icon 1 | icon 2 | image 3 | image 4 | text 5 | text button 6 | text button 7 | video 8
<html>
<body>
<div class="canvas" style="left: 0px; top: 0px; width: 90px; height: 160px"></div>
<div class="icon" style="index: 0; left: 2px; top: 8px; width: 5px; height: 5px"></div>
<div class="icon" style="index: 1; left: 74px; top: 118px; width: 12px; height: 12px"></div>
<div class="icon" style="index: 2; left: 74px; top: 133px; width: 12px; height: 12px"></div>
<div class="image" style="index: 3; left: 0px; top: 5px; width: 90px; height: 12px"><

# Call GPT

In [None]:
model = "gpt-3.5-turbo-instruct"
temperature = 0.7
max_tokens = 800
top_p = 1
frequency_penalty = 0
presence_penalty = 0
num_return = 3
stop_token = "\n\n"
os.environ["OPENAI_API_KEY"] = ""

In [None]:
from openai import OpenAI


client = OpenAI()

response = client.completions.create(
    model=model,
    prompt=prompt,
    temperature=temperature,
    max_tokens=max_tokens,
    top_p=top_p,
    frequency_penalty=frequency_penalty,
    presence_penalty=presence_penalty,
    n=num_return,
    stop=[stop_token],
)

In [71]:
print(response.choices[0].text)

.


# Parsing

In [32]:
from parsing import Parser


parser = Parser(dataset=dataset, output_format=output_format)
parsed_response = parser(response)
print(f"filter {num_return - len(parsed_response)} invalid response")

filter 0 invalid response


# Layout ranking

In [33]:
from ranker import Ranker

val_path = os.path.join(RAW_DATA_PATH(dataset), "val.pt")
ranker = Ranker(val_path=val_path)
ranked_response = ranker(parsed_response)

ValueError: not enough values to unpack (expected 4, got 0)

In [38]:
parsed_response

[(tensor([]), tensor([])),
 (tensor([ 2,  2,  2,  2, 16,  1,  1,  5,  5]),
  tensor([[0.1333, 0.0312, 0.7333, 0.6562],
          [0.1333, 0.0312, 0.7333, 0.6562],
          [0.1333, 0.0312, 0.7333, 0.6562],
          [0.1333, 0.0312, 0.7333, 0.6562],
          [0.4333, 0.7375, 0.1333, 0.0125],
          [0.0000, 0.0375, 1.0000, 0.0625],
          [0.0000, 0.8625, 1.0000, 0.0625],
          [0.2889, 0.8875, 0.4444, 0.0625],
          [0.5333, 0.8875, 0.4444, 0.0625]])),
 (tensor([ 2,  2,  2,  2, 16,  1,  1,  5,  5]),
  tensor([[0.0000, 0.0000, 0.3333, 0.2500],
          [0.3333, 0.0000, 0.3333, 0.2500],
          [0.6667, 0.0000, 0.3333, 0.2500],
          [0.0000, 0.2500, 1.0000, 0.3750],
          [0.0000, 0.6250, 1.0000, 0.0312],
          [0.1111, 0.6875, 0.7778, 0.0312],
          [0.1111, 0.7500, 0.7778, 0.0312],
          [0.1111, 0.8125, 0.7778, 0.0938],
          [0.1111, 0.9375, 0.7778, 0.0625]])),
 (tensor([]), tensor([])),
 (tensor([ 2,  2,  2,  2, 16,  1,  1,  5,  5]),
  te

# Visualization

In [37]:
from visualization import Visualizer, create_image_grid


visualizer = Visualizer(dataset)
images = visualizer(ranked_response)
create_image_grid(images)

NameError: name 'ranked_response' is not defined