In [1]:
import os
import json

In [2]:
from syncode import Syncode
import warnings
warnings.filterwarnings('ignore')

model_name = "/data/share/models/hugging_face/models--Qwen--Qwen2.5-Coder-7B/snapshots/4c1c1611f30619a8695cf6d44b492a25c52b6f00/"
grammar_file = open("invariants.lark", "r") 
grammar = grammar_file.read()
grammar_file.close()

# Load the Syncode augmented model
syn_llm = Syncode(model = model_name, mode='grammar_mask', grammar=grammar, parse_output_only=True, log_level=2)

  from .autonotebook import tqdm as notebook_tqdm
Loading checkpoint shards: 100%|██████████| 4/4 [00:00<00:00, 10.88it/s]


Creating DFA mask store for Qwen2TokenizerFast and custom, may take more than 10 minutes. Caching at /home/adharsh/repos/syncode/loopy_expts/cache/mask_stores/Qwen2TokenizerFast/grammar_mask_3902411203_151643.pkl.
Ignore whitespace tokens is False


100%|██████████| 112/112 [04:34<00:00,  2.45s/it]


In [18]:
prompt = """You are a helpful AI software assistant that reasons about how code behaves. Given a program, you can find loop invariants, which can then be used to verify some property in the program. 
Frama-C is a software verification tool for C programs. The input to Frama-C is a C program file with ACSL (ANSI/ISO C Specification Language) annotations.
For the given program, find the necessary loop invariants of the while loop to help Frama-C verify the post-condition.

Instructions:
- Make a note of the pre-conditions or variable assignments in the program.
- Analyze the loop body and make a note of the loop condition. 
- Output loop invariants that are true 
(i) before the loop execution, 
(ii) in every iteration of the loop and 
(iii) after the loop termination, 
such that the loop invariants imply the post condition.
- If a loop invariant is a conjunction, split it into its parts.
- Output all the loop invariants in one code block. For example:
```
/*@ 
    loop invariant i1;
    loop invariant i2;
*/
```
Rules:
- **Do not use variables or functions that are not declared in the program.** 
- **Do not make any assumptions about functions whose definitions are not given.**
- **All undefined variables contain garbage values. Do not use variables that have garbage values.**
- **Do not use keywords that are not supported in ACSL annotations for loops.**
- **Variables that are not explicitly initialized, could have garbage values. Do not make any assumptions about such values.**
- **Do not use the \at(x, Pre) notation for any variable x.**
- **Do not use non-deterministic function calls.**

Consider the following C program:
```
{code}
```

You are allowed to use implication to take care of the conditional nature of the code. Use implication (==>) instead of using if-then.

For all variables, add conjunctions that bound the maximum and minimum values that they can take, if such bounds exist.

If a variable is always equal to or smaller or larger than another variable, add a conjunction for their relation.

If the assertion is guarded by a condition, use the guard condition in an implication.

If certain variables are non-deterministic at the beginning or end of the loop, use an implication to make the invariant trivially true at that location. 

Output the loop invariants for the loop in the program above. Let's think step by step.
"""

In [19]:
benchmarks = []
for i in range(1, 26):
    with open(f"dataset/code2inv/{i}.c") as f:
        code = f.read()
        code = code.split("\n")[1:]
        code = "\n".join(code)
        benchmarks.append({"file": f"dataset/code2inv/{i}.c", "code": code})

In [5]:
benchmarks[0]

{'file': 'dataset/code2inv/1.c',
 'code': '#include <stdlib.h>\n#define assume(e) if(!(e)) exit(-1);\n\nint main() {\n  \n  int x;\n  int y;\n  \n  (x = 1);\n  (y = 0);\n  \n  while ((y < 100000)) {\n    {\n    (x  = (x + y));\n    (y  = (y + 1));\n    }\n\n  }\n  \n{;\n//@ assert( (x >= y) );\n}\n\n}'}

In [11]:
benchmarks[1]["code"] == benchmarks[2]["code"]

False

In [6]:
if not os.path.exists("results"):
    os.makedirs("results")

In [17]:
from copy import deepcopy

In [20]:
for i in range(1, 26):
    print(f"Processing {i}.c")
    prompt_template = deepcopy(prompt)
    prompt_template = prompt_template.format(code=benchmarks[i-1]["code"])
    output = syn_llm.infer(prompt_template, stop_words=None)
    benchmarks[i-1]["invariants"] = output

Processing 1.c
Processing 2.c
Processing 3.c
Processing 4.c
Processing 5.c
Processing 6.c
Processing 7.c
Processing 8.c
Processing 9.c
Processing 10.c
Processing 11.c
Processing 12.c
Processing 13.c
Processing 14.c
Processing 15.c
Processing 16.c
Processing 17.c
Processing 18.c
Processing 19.c
Processing 20.c
Processing 21.c
Processing 22.c
Processing 23.c
Processing 24.c
Processing 25.c


In [21]:
import datetime
timestamp = datetime.datetime.now().strftime("%Y_%m_%d_%H_%M_%S.json")
with open(f"dataset/results/{timestamp}", "w") as f:
    json.dump(benchmarks, f, indent=4)