In [1]:
from utils import wholesale_solve, test_primitives, iterative_solve, build_and_eval, is_executable_asp

import os
from llm import LLM

api_key = os.getenv("OPENAI_API_KEY")
llm = LLM(api_key)

directory_path = "instances"

# Create test_run directory if it doesn't exist
os.makedirs("test_run", exist_ok=True)


### Define test case instances

In [None]:
instances_idxs = [1,7,10,16]
n_instances = len(instances_idxs)

In [2]:
instances_idxs = [1,7,10,16]
n_instances = len(instances_idxs)

### Run test via wholesale solve

In [3]:
os.makedirs("test_run", exist_ok=True)

input_primitives = []

for instance in instances_idxs:
    print(f"--- Instance {instance} ---")
    instance_to_run = directory_path+ "/" + "".join([str(instance)])
    print(instance_to_run)

    primitives_text = wholesale_solve(instance=instance_to_run) #finds primitives in examples (except test input)
    input_primitives.append((instance_to_run, primitives_text))
    print(primitives_text)
    print("\n")

--- Instance 1 ---
instances/1
Results for grid_3.lp:
Input grid:
 diagonal_line(input,start(0,1),end(1,2),color(gray),length(2)) diagonal_line(input,start(2,1),end(1,2),color(gray),length(2)) grid_size(input,3,3)

Output grid:
 grid_size(output,3,3) horizontal_line(output,start(0,0),end(0,2),color(yellow),length(3)) horizontal_line(output,start(1,0),end(1,2),color(green),length(3)) horizontal_line(output,start(2,0),end(2,2),color(yellow),length(3))


Results for grid_2.lp:
Input grid:
 diagonal_line(input,start(0,0),end(1,1),color(gray),length(2)) diagonal_line(input,start(2,0),end(1,1),color(gray),length(2)) grid_size(input,3,3)

Output grid:
 grid_size(output,3,3) horizontal_line(output,start(0,0),end(0,2),color(red),length(3)) horizontal_line(output,start(1,0),end(1,2),color(yellow),length(3)) horizontal_line(output,start(2,0),end(2,2),color(red),length(3))


Results for grid_1.lp:
Input grid:
 grid_size(input,3,3) vertical_line(input,start(0,2),end(2,2),color(gray),length(3))

Out

In [4]:
n_correct = 0

for instance_primitives in input_primitives:

    instance_path = instance_primitives[0]
    primitives = instance_primitives[1]

    total_txt = "STEP 1: Extracted Primitives\n\n"
    print(f"--- Instance {instance_path} ---")

    total_txt += instance_path + "\n" + primitives + "\n"
    print("\n")

    ######## First LLM to predict transformation rules from example primitives ########

    rule_explanation = llm.call("wholesale_pass", primitives=primitives, track_usage=False) #LLM call to predict transformation rules in examples
    total_txt += "STEP 2: Extracted Transformation Rules\n\n" + rule_explanation + "\n"
    print(f'Transformation rules predicted for examples:{rule_explanation}\n')


    ######## Clingo run to retrieve test grid primitives ########

    test_instance_path = os.path.join(instance_path, "grid_test.lp") 
    # print(test_instance_path)
    test_input_primitives = test_primitives(test_instance_path) #finds primitives in test input

    total_txt += "STEP 3: Retrieved Test Input Primitives\n\n" + test_input_primitives + "\n"
    print('-- Primitives found in test input --')
    # print(test_input_primitives)

    ######## Second LLM call to predict test output from test input primitives and predicted rules ########

    test_pass = llm.call("test_pass", rule_explanation=rule_explanation, test_input_primitives=test_input_primitives, track_usage=False)

    total_txt += "STEP 4: Predicted Test Output\n\n" + test_pass + "\n"
    # print(f'Test output predicted:{test_pass}\n')


    ######## Third LLM call to translate final grid prediction to ASP code primitives ########

    output_grid_primitives = llm.call("translate_to_asp", test_input_primitives=test_input_primitives,
                            test_output_prediction=test_pass, 
                            track_usage=False) #LLM call to predict transformation rules in examples

    total_txt += "STEP 5: Translated Test Output to ASP Code Primitives\n\n" + output_grid_primitives + "\n"

    ######## Verify ASP code is executable ASP, re-run if not ########

    outcome = is_executable_asp(output_grid_primitives)

    if outcome == True:
        print("The ASP code is executable and produces output.")
    else:
        print("Not executable:")
        revised_asp = llm.call("fix_asp_code", error_message=outcome, 
                            test_input_primitives=test_input_primitives,
                            test_output_prediction=test_pass,
                            track_usage=False)

        print(f'Revised ASP code:\n{revised_asp}')
        if is_executable_asp(revised_asp) == True:
            print("The revised ASP code is now executable and produces output.")
        else:
            print("The revised ASP code is still not executable.")
            print(f"Here are the attempted primitives:\n{revised_asp}")

        output_grid_primitives = revised_asp
    total_txt += "STEP 6: Verified/Corrected ASP Code Primitives\n\n" + output_grid_primitives + "\n"


    ######## Construct complete output grid from primitives, evaluate correctness ########

    ### Build & evaluate output grid from primitives ###
    # Compare predicted output grid to actual output grid in test input file

    result = build_and_eval(prediction=output_grid_primitives, instance=test_instance_path)

    if result == 'correct_grid':
        print("The predicted output grid matches the actual output grid. Success!")
        n_correct += 1
    else:
        print("The predicted output grid does not match the actual output grid. Failure.")
        print(f"Here are the attempted primitives:\n{output_grid_primitives}")
        print(f"Here are the errors in the output:\n{result}")

    total_txt += "STEP 7: Evaluation of Predicted Output Grid\n\n" + result + "\n"

    # Write total_txt to file
    instance_name = os.path.basename(instance_path)
    output_file = os.path.join("test_run", f"{instance_name}.txt")
    with open(output_file, 'w') as f:
        f.write(total_txt)
    print(f"Saved results to: {output_file}")

print(f"Final Score: {n_correct}/{n_instances} correct.")

--- Instance instances/1 ---


Transformation rules predicted for examples:Consistent rule (applies to all examples)

- Grid size is preserved (output grid_size = input grid_size).
- For each row r, find the input gray cell in that row (the diagonal/vertical gray cell present in that row). Let c be its column index (0..2).
- Map column -> color: 0 -> red, 1 -> yellow, 2 -> green.
- Create a horizontal_line in the output covering the whole row r (start(r,0) to end(r,2)) colored by the mapped color.
- Special case: if every row maps to the same color (i.e., all gray cells are in the same column), instead of three horizontal lines the output is a filled square of that color (square top_left(0,0) bottom_right(2,2)).

Primitives produced in outputs
- grid_size(output,3,3)
- For general case: horizontal_line(output,start(r,0),end(r,2),color(<mapped>),length(3)) for r=0..2
- If all rows share the same mapped color: square(output,top_left_corner(0,0),bottom_right_corner(2,2),size(3),color(<map