In [None]:
!pip install openai
!pip install slither-analyzer
!solc-select install latest
!solc-select use 0.8.25
!pip install mythril

Collecting eth-hash[pycryptodome]>=0.5.1 (from web3>=6.0.0->slither-analyzer)
  Using cached eth_hash-0.7.0-py3-none-any.whl (8.7 kB)
Installing collected packages: eth-hash
  Attempting uninstall: eth-hash
    Found existing installation: eth-hash 0.3.3
    Uninstalling eth-hash-0.3.3:
      Successfully uninstalled eth-hash-0.3.3
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
eth-bloom 1.0.4 requires eth-hash[pycryptodome]<0.4.0,>=0.3.1, but you have eth-hash 0.7.0 which is incompatible.
mythril 0.24.8 requires eth-hash<0.4.0,>=0.3.1, but you have eth-hash 0.7.0 which is incompatible.[0m[31m
[0mSuccessfully installed eth-hash-0.7.0
Installing solc '0.8.25'...
Version '0.8.25' installed.
Switched global version to 0.8.25
Collecting eth-hash<0.4.0,>=0.3.1 (from mythril)
  Using cached eth_hash-0.3.3-py3-none-any.whl (8.9 kB)
Installing collected packag

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import os
from openai import OpenAI
import pandas as pd
import os.path

In [None]:
# PRIVATE_KEY = ""
client = OpenAI(
    api_key=PRIVATE_KEY,
)

In [None]:
# helper functions to write and read files
def write_to_file(filename, content):
    with open(filename, "w") as file:
        file.write(content)
def read_file(filename):
    with open(filename,'r') as file:
        text = " ".join(line.rstrip() for line in file)
    return text

## Agent Using GPT-3.5

In [None]:
# load zero-shot contracts from GPT-3.5
gpt3_5 = pd.read_csv("/content/drive/MyDrive/ece473-final-project/ECE473 Final Project LLM Responses - GPT-3.5.csv")

total_trials = 10
total_prompts = 1
model = "gpt3_5"

# dictionary to keep track of the number of iterations through Slither and Mythril
num_iterations_slither = {}
num_iterations_mythril = {}

# the following is the prefix for all files generated
# filename = model + "_p" + prompt + "_t" + trial + "_" + iteration

# note: trials and prompts are 1-indexed
# note: zero-shot contracts are identified with the prompt and trial number

for j in range(total_prompts):
    prompt = j + 1
    num_iterations_slither[prompt] = []
    num_iterations_mythril[prompt] = []

    for i in range(total_trials):
        trial = i + 1
        iteration = 1
        prev_code = ""
        curr_code = gpt3_5["Trial Number " + str(trial)][prompt-1]

        # present model with current code
        trial_1_chat = client.chat.completions.create(
            messages=[
                {
                    "role": "user",
                    "content": "here is the current code: " + curr_code,
                }
            ],

            model="gpt-3.5-turbo",
        )

        # SLITHER
        # break out of the loop if the same code is generated
        while prev_code != curr_code:
            file_pre = model + "_p" + str(prompt) + "_t" + str(trial) + "_" + str(iteration)
            old_filename = file_pre + ".sol"
            slither_filename = "slither_" + file_pre + ".txt"

            # make sure that there is already a file for the current code to feed into Slither
            if (~os.path.isfile(old_filename)):
                write_to_file(old_filename, curr_code)

            # run Slither on current code
            !slither {old_filename} 2> {slither_filename}

            slither_output = read_file(slither_filename)

            # stop if Slither gives the same feedback twice in a row
            if iteration > 1:
                prev_filename = "slither_" + model + "_p" + str(prompt) + "_t" + str(trial) + "_" + str(iteration - 1) + ".txt"
                if (~os.path.isfile(prev_filename)):
                    prev_slither_output = read_file(prev_filename)
                    if slither_output == prev_slither_output:
                        break

            prev_code = curr_code

            # give the code back to GPT with comments to fix
            trial_1_chat = client.chat.completions.create(
                messages=[
                    {
                        "role": "user",
                        "content": "fix the code: " + curr_code + "according to these comments: " + slither_output + " and PLEASE RETURN ONLY THE CODE. Reminder, the code starts with '// SPDX-License-Identifier: MIT'.",
                    }
                ],

                model="gpt-3.5-turbo",
            )

            iteration+=1
            curr_code = trial_1_chat.choices[0].message.content

            # make sure there is code from the new output and trim the output for just the code if necessary
            start_index = curr_code.find("// SPDX")
            pragma_index = curr_code.find("pragma")

            if ((start_index == -1) and (pragma_index != -1)):
                 start_index = pragma_index

            # there is no code
            if (start_index == -1):
                # print("no code output at prompt %d, trial %d, iteration %d" % (prompt, trial, iteration))

                # this iteration does not have code, only count the last iteration (that has code)
                iteration -= 1;
                break
            else:
                # trim code from the front and back if necessary
                if (start_index != 0):
                    curr_code = curr_code[start_index:]

                markdown_index = curr_code.rfind("```")

                if (markdown_index != -1):
                    curr_code = curr_code[:markdown_index]

        # keep track of the number of iterations through Slither for this zero-shot contract
        num_iterations_slither[prompt].append(iteration)

        print("Prompt " + str(prompt) + " (Slither) iterations: " + str(iteration))


        # print("starting mythril on prompt %d, trial %d, iteration %d" % (prompt, trial, iteration))

        # make sure the model has the most recent verison of the contract
        curr_code = read_file(model + "_p" + str(prompt) + "_t" + str(trial) + "_" + str(iteration - 1) + ".sol")

        trial_1_chat = client.chat.completions.create(
            messages=[
                {
                    "role": "user",
                    "content": "here is the current code: " + curr_code,
                }
            ],

            model="gpt-3.5-turbo",

        )

        # MYTHRIL
        while prev_code != curr_code:
            file_pre = model + "_p" + str(prompt) + "_t" + str(trial) + "_" + str(iteration - 1)
            old_filename = file_pre + ".sol"
            mythril_filename = "mythril_" + file_pre + ".txt"

            # make sure that there is already a file for the current code to feed into Mythril
            if (~os.path.isfile(old_filename)):
                write_to_file(old_filename, curr_code)

            # run Mythril on current code
            !myth analyze {old_filename} 2> {mythril_filename}

            mythril_output = read_file(mythril_filename)

            # early stoppers for Mythril
            if mythril_output == "" or len(mythril_output) < 1:
                # print("no output from mythril at prompt  %d, trial %d, iteration %d" % (prompt, trial, iteration))
                break
            if (mythril_output == "The analysis was completed successfully. No issues were detected."):
                # print("no more errors found by mythril at iteration %d" % iteration);
                break;

            # stop if Mythril gives the same feedback twice in a row
            if iteration > num_iterations_slither[prompt][-1]:
                prev_filename = "mythril_" + model + "_p" + str(prompt) + "_t" + str(trial) + "_" + str(iteration - 1) + ".txt"
                if (~os.path.isfile(prev_filename)):
                    prev_mythril_output = read_file(prev_filename)
                    if mythril_output == prev_mythril_output:
                        break

            prev_code = curr_code

            # give the code back to GPT with comments to fix
            trial_1_chat = client.chat.completions.create(
                messages=[
                    {
                        "role": "user",
                        "content": "fix the code: " + curr_code + "according to these comments: " + mythril_output + " and PLEASE RETURN ONLY THE CODE. Reminder, the code starts with '// SPDX-License-Identifier: MIT'.",
                    }
                ],

                model="gpt-3.5-turbo",
            )

            iteration+=1
            curr_code = trial_1_chat.choices[0].message.content

            # make sure there is code from the new output and trim the output for just the code if necessary
            start_index = curr_code.find("// SPDX")
            pragma_index = curr_code.find("pragma")

            if ((start_index == -1) and (pragma_index != -1)):
                 start_index = pragma_index

            if (start_index == -1):
                # print("no code output at prompt %d, trial %d, iteration %d" % (prompt, trial, iteration))
                iteration -= 1
                break
            else:
                # trim code from front and back if necessary
                if (start_index != 0):
                    curr_code = curr_code[start_index:]

                markdown_index = curr_code.rfind("```")

                if (markdown_index != -1):
                    curr_code = curr_code[:markdown_index]

        print("Prompt " + str(prompt) + " (Slither and Mythril) iterations: " + str(iteration))

        num_iterations_mythril[prompt].append(iteration)
        i+=1
        # print("next trial")


print(num_iterations_slither)
print(num_iterations_mythril)

!cp /content/gpt* drive/MyDrive/ece473-final-project/round2-gpt3_5
!cp /content/slither* drive/MyDrive/ece473-final-project/round2-gpt3_5
!cp /content/mythril* drive/MyDrive/ece473-final-project/round2-gpt3_5


## Agent Using GPT-4

In [None]:
# load zero-shot contracts from GPT4
gpt4 = pd.read_csv("/content/drive/MyDrive/ece473-final-project/ECE473 Final Project LLM Responses - GPT-4.csv")

total_trials = 10
total_prompts = 11
model = "gpt4"

# dictionary to keep track of the number of iterations through Slither and Mythril
num_iterations_slither = {}

# this includes both slither and mythril
num_iterations_mythril = {}

# the following is the prefix for all files generated
# filename = model + "_p" + prompt + "_t" + trial + "_" + iteration

# note: trials and prompts are 1-indexed
# note: zero-shot contracts are identified with the prompt and trial number
# note: it is costly and time consuming to run all 11 trials at once. change the number of prompts if needed

for j in range(total_prompts):
    prompt = j + 1
    num_iterations_slither[prompt] = []
    num_iterations_mythril[prompt] = []

    for i in range(total_trials):
        trial = i + 1
        iteration = 1
        prev_code = ""
        curr_code = gpt4["Trial Number " + str(trial)][prompt-1]

        # present model with the zero-shot code
        trial_1_chat = client.chat.completions.create(
            messages=[
                {
                    "role": "user",
                    "content": "here is the current code: " + curr_code,
                }
            ],

            model="gpt-4",
        )

        # SLITHER
        while prev_code != curr_code:
            file_pre = model + "_p" + str(prompt) + "_t" + str(trial) + "_" + str(iteration)
            old_filename = file_pre + ".sol"
            slither_filename = "slither_" + file_pre + ".txt"
            if (~os.path.isfile(old_filename)):
                write_to_file(old_filename, curr_code)

            # run slither on current code
            !slither {old_filename} 2> {slither_filename}

            slither_output = read_file(slither_filename)

            # stop if Slither gives the same feedback twice in a row
            if iteration > 1:
                prev_filename = "slither_" + model + "_p" + str(prompt) + "_t" + str(trial) + "_" + str(iteration - 1) + ".txt"
                if (~os.path.isfile(prev_filename)):
                    prev_slither_output = read_file(prev_filename)
                    if slither_output == prev_slither_output:
                        break

            prev_code = curr_code

            # give the code back to GPT with comments to fix
            trial_1_chat = client.chat.completions.create(
                messages=[
                    {
                        "role": "user",
                        "content": "fix the code: " + curr_code + "according to these comments: " + slither_output + " and PLEASE RETURN ONLY THE CODE. Reminder, the code starts with '// SPDX-License-Identifier: MIT'.",
                    }
                ],

                model="gpt-4",
            )
            iteration+=1
            curr_code = trial_1_chat.choices[0].message.content

            # make sure there is code from the new output
            start_index = curr_code.find("// SPDX")
            pragma_index = curr_code.find("pragma")

            if ((start_index == -1) and (pragma_index != -1)):
                 start_index = pragma_index

            if (start_index == -1):
                # print("no code output at prompt %d, trial %d, iteration %d" % (prompt, trial, iteration))

                # this iteration does not have code, only count the last iteration (that has code)
                iteration -= 1;
                break
            else:
                if (start_index != 0):
                    curr_code = curr_code[start_index:]

                markdown_index = curr_code.rfind("```")

                if (markdown_index != -1):
                    curr_code = curr_code[:markdown_index]

                if iteration == 15:
                  # print("iteration upper bound reached for Slither")
                  break

        num_iterations_slither[prompt].append(iteration)
        print("Prompt " + str(prompt) + " (Slither) iterations: " + str(iteration))

        # print("starting mythril on prompt %d, trial %d, iteration %d" % (prompt, trial, iteration))

        curr_code = read_file(model + "_p" + str(prompt) + "_t" + str(trial) + "_" + str(iteration - 1) + ".sol")

        trial_1_chat = client.chat.completions.create(
            messages=[
                {
                    "role": "user",
                    "content": "here is the current code: " + curr_code,
                }
            ],

            model="gpt-4",

        )

        # MYTHRIL
        while prev_code != curr_code:
            # print("mythril prompt %d, trial %d, iteration %d" % (prompt, trial, iteration))
            file_pre = model + "_p" + str(prompt) + "_t" + str(trial) + "_" + str(iteration - 1)
            old_filename = file_pre + ".sol"
            mythril_filename = "mythril_" + file_pre + ".txt"
            if (~os.path.isfile(old_filename)):
                write_to_file(old_filename, curr_code)

            !myth analyze {old_filename} 2> {mythril_filename}

            mythril_output = read_file(mythril_filename)

            if mythril_output == "" or len(mythril_output) < 1:
                # print("no output from mythril at prompt  %d, trial %d, iteration %d" % (prompt, trial, iteration))
                break
            if (mythril_output == "The analysis was completed successfully. No issues were detected."):
                # print("no more errors found by myrthil at iteration %d" % iteration);
                break;

            # stop if Mythril gives the same feedback twice in a row
            if iteration > num_iterations_slither[prompt][-1]:
                prev_filename = "mythril_" + model + "_p" + str(prompt) + "_t" + str(trial) + "_" + str(iteration - 1) + ".txt"
                if (~os.path.isfile(prev_filename)):
                    prev_mythril_output = read_file(prev_filename)
                    if mythril_output == prev_mythril_output:
                        break

            prev_code = curr_code

            trial_1_chat = client.chat.completions.create(
                messages=[
                    {
                        "role": "user",
                        "content": "fix the code: " + curr_code + "according to these comments: " + mythril_output + " and PLEASE RETURN ONLY THE CODE. Reminder, the code starts with '// SPDX-License-Identifier: MIT'.",
                    }
                ],

                model="gpt-4",
            )

            iteration+=1
            curr_code = trial_1_chat.choices[0].message.content

            # make sure there is code from the new output and trim the output if necessary
            start_index = curr_code.find("// SPDX")
            pragma_index = curr_code.find("pragma")

            if ((start_index == -1) and (pragma_index != -1)):
                 start_index = pragma_index

            if (start_index == -1):
                # print("no code output at prompt %d, trial %d, iteration %d" % (prompt, trial, iteration))

                # this iteration does not have code, only count the last iteration (that has code)
                iteration -= 1
                break
            else:
                # trim from the front and back for the code if necessary
                if (start_index != 0):
                    curr_code = curr_code[start_index:]

                markdown_index = curr_code.rfind("```")

                if (markdown_index != -1):
                    curr_code = curr_code[:markdown_index]

                # if max iterations reached, stop
                if iteration == 15:
                    break

        num_iterations_mythril[prompt].append(iteration)
        print("Prompt " + str(prompt) + " (Slither and Mythril) iterations: " + str(iteration))
        i+=1

print(num_iterations_slither)
print(num_iterations_mythril)

!cp /content/gpt* drive/MyDrive/ece473-final-project/round2-gpt4
!cp /content/slither* drive/MyDrive/ece473-final-project/round2-gpt4
!cp /content/mythril* drive/MyDrive/ece473-final-project/round2-gpt4


Prompt 1 (Slither) iterations: 6
Prompt 1 (Mythril) iterations: 6
Prompt 1 (Slither) iterations: 9
The analysis was completed successfully. No issues were detected.

Prompt 1 (Mythril) iterations: 10
Prompt 1 (Slither) iterations: 14
Prompt 1 (Mythril) iterations: 15
Prompt 1 (Slither) iterations: 6
The analysis was completed successfully. No issues were detected.

Prompt 1 (Mythril) iterations: 8
Prompt 1 (Slither) iterations: 15
The analysis was completed successfully. No issues were detected.

Prompt 1 (Mythril) iterations: 16
Prompt 1 (Slither) iterations: 4
Prompt 1 (Mythril) iterations: 4
Prompt 1 (Slither) iterations: 6
The analysis was completed successfully. No issues were detected.

Prompt 1 (Mythril) iterations: 7
Prompt 1 (Slither) iterations: 6
Prompt 1 (Mythril) iterations: 15
Prompt 1 (Slither) iterations: 7
Prompt 1 (Mythril) iterations: 7
Prompt 1 (Slither) iterations: 15
The analysis was completed successfully. No issues were detected.

Prompt 1 (Mythril) iterations: 