<h1>Step 1: Load the dataset and send requests to GPT</h1>

In [None]:
import os
import re
import json
import time
import requests

# Read the content of the prompt file
file_path = 'prompt.md'
with open(file_path, 'r', encoding = 'utf-8') as file:
    file_content = file.read()

# You need to define your chat API and request headers here
# chat_api_url = 
# chat_headers = {}

# List of risk types
risk_types = ['fee', 'disabletrading', 'blacklist', 'reflect', 'maxtx', 'mint', 'honeypot', 'reward', 'rebase', 'maxsell']

# Function to fetch data from the data source API
def fetch_data(key):
    try:
        url = f"http://192.168.41.45:8081/data/intent?key={key}"
        response = requests.get(url)
        response.raise_for_status()
        data = response.json()
        if 'sourceCode' in data and 'risk' in data:
            print(f"Key {key}:")
            risk_presence = {rtype: 0 for rtype in risk_types}
            risk_types_list = []
            for risk in data['risk']:
                risk_type_lower = risk['type'].lower()
                if risk_type_lower in risk_presence:
                    risk_presence[risk_type_lower] = 1
                    risk_types_list.append(risk_type_lower)
            risk_presence_array = [risk_presence[rtype] for rtype in risk_types]
            print("Real Ans:", risk_types_list)
            print("Real Ans Array:", risk_presence_array)
            return key, risk_types_list, risk_presence_array, data['sourceCode']
    except requests.RequestException as e:
        print(f"Request failed for key {key}: {e}\n")

# Send the request to GPT
def send_to_chat_api(source_code):
    data = {
        "provider": "openai",
        "model": "gpt-4o-mini",
        "prompts": [
            {
                "role": "system",
                "content": file_content
            },
            {
                "role": "user",
                "content": source_code
            }
        ],
        "stream": False,
        "top": 0.5,
        "maxLength": 4096
    }

    try:
        response = requests.post(chat_api_url, headers=chat_headers, json=data)
        response.raise_for_status()  # If the response status code is not 200, raise an HTTPError
        response_data = response.json()
        print(f"GPT JSON: {response_data}")

        if response_data.get('status') == 1:
            content = response_data['data']['content']
            # Remove ```json\n and ```
            content = re.sub(r'```json\n|```', '', content)  
            content_json = json.loads(content)  # Convert string to dictionary or list

            if isinstance(content_json, dict) and 'intents' in content_json:
                intents = content_json['intents']  # Get the 'intents' list
            elif isinstance(content_json, list):
                intents = content_json
            else:
                intents = []

            print(f"GPT Response: {intents}")

            # Convert to lowercase for comparison
            intents_lower = [intent.lower() for intent in intents]
            risk_types_array = [1 if intent in intents_lower else 0 for intent in risk_types]

            print(f"GPT Response Array: {risk_types_array}")
            return response_data, intents, risk_types_array
        else:
            error_msg = response_data.get('msg', 'Unknown error')
            raise Exception(f"Error in GPT response: {error_msg}")
    except Exception as e:
        print(f"Request failed: {e}")
        intents = []
        risk_types_array = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
        print(f"GPT Response: {intents}")
        print(f"GPT Response Array: {risk_types_array}")
        return {}, intents, risk_types_array

# Save the result of a single key to a JSON file
def save_to_json(key, real_ans, real_ans_array, gpt_json, gpt_response, gpt_response_array):
    result = {
        "Key": key,
        "Real Ans": real_ans,
        "Real Ans Array": real_ans_array,
        "GPT JSON": gpt_json,
        "GPT Response": gpt_response,
        "GPT Response Array": gpt_response_array
    }
    filename = f'data_key_{key}.json'
    
    with open(filename, 'w', encoding='utf-8') as json_file:
        json.dump(result, json_file, ensure_ascii=False, indent=4)
    print(f"Data for key {key} saved to {filename}\n\n")
    return result

# Progress bar
def print_progress_bar(iteration, total, length=50):
    percent = ("{0:.1f}").format(100 * (iteration / float(total)))
    filled_length = int(length * iteration // total)
    bar = '█' * filled_length + '-' * (length - filled_length)
    print(f'\rProgress: |{bar}| {percent}% Complete', end='\r')
    if iteration == total:
        print()

# Main program
key = 20000
all_data = []
target_count = 10000

while len(all_data) < target_count:
    result = fetch_data(key)
    if result is not None:
        key, real_ans, real_ans_array, source_code = result
        # Send sourceCode to the chat API and get the risk type array
        gpt_json, gpt_response, gpt_response_array = send_to_chat_api(json.dumps(source_code))
        record = save_to_json(key, real_ans, real_ans_array, gpt_json, gpt_response, gpt_response_array)
        if record:
            all_data.append(record)
        completed_percentage = ((len(all_data)) / target_count) * 100
        print_progress_bar(len(all_data), target_count)
    key += 1

<h1>Step 2: Merge all JSON files</h1>

In [None]:
def load_and_aggregate_data(start_key, num_files, output_file):
    all_data = []
    key = start_key
    valid_file_count = 0
    counter = 0

    while valid_file_count < num_files:
        filename = f'data_key_{key}.json'
        try:
            with open(filename, 'r', encoding='utf-8') as json_file:
                data = json.load(json_file)
                all_data.append(data)
                valid_file_count += 1
                if "GPT JSON" in data and data["GPT JSON"] == {}:  # If GPT JSON is empty, re-fetch data
                    counter = counter + 1
                    print("Invalid key:", key)
        except FileNotFoundError:
            print(f"File {filename} not found, skipping.")
        
        key += 1

    with open(output_file, 'w', encoding='utf-8') as json_file:
        json.dump(all_data, json_file, ensure_ascii = False, indent = 4)
    print(f"Aggregated data saved to {output_file}")
    print("400 occurs", counter, "times!")

# Main program
start_key = 20000
num_files = 10000
output_file = 'gpt_experiment.json'

load_and_aggregate_data(start_key, num_files, output_file)

<h1>Step 3: Handle 400 errors</h1>

In [None]:
# Read the content of the prompt file and assign it to a variable string
file_path = 'prompt.md'
with open(file_path, 'r', encoding='utf-8') as file:
    file_content = file.read()

# Function to fetch data from the data source API
def fetch_data(key):
    try:
        url = f"http://192.168.41.45:8081/data/intent?key={key}"
        response = requests.get(url)
        response.raise_for_status()
        data = response.json()
        if 'sourceCode' in data and 'risk' in data:
            print(f"Key {key}:")
            risk_presence = {rtype: 0 for rtype in risk_types}
            risk_types_list = []
            for risk in data['risk']:
                risk_type_lower = risk['type'].lower()
                if risk_type_lower in risk_presence:
                    risk_presence[risk_type_lower] = 1
                    risk_types_list.append(risk_type_lower)
            risk_presence_array = [risk_presence[rtype] for rtype in risk_types]
            print("Real Ans:", risk_types_list)
            print("Real Ans Array:", risk_presence_array)
            return key, risk_types_list, risk_presence_array, data['sourceCode']
    except requests.RequestException as e:
        print(f"Request failed for key {key}: {e}\n")   

# Function to send a request to the GPT API
def send_to_chat_api(source_code):
    data = {
        "provider": "openai",
        "model": "gpt-4o-mini",
        "prompts": [
            {
                "role": "system",
                "content": file_content
            },
            {
                "role": "user",
                "content": source_code
            }
        ],
        "stream": False,
        "top": 0.5,
        "maxLength": 4096
    }

    trial = 1
    total_trial = 5

    while True:
        trial = trial + 1
        try:
            response = requests.post(chat_api_url, headers=chat_headers, json=data)
            response.raise_for_status()
            response_data = response.json()
            print(f"GPT JSON: {response_data}")

            if response_data.get('status') == 1:
                content = response_data['data']['content']
                content_json = json.loads(content)

                if isinstance(content_json, dict) and 'intents' in content_json:
                    intents = content_json['intents']
                elif isinstance(content_json, list):
                    intents = content_json
                else:
                    intents = []

                print(f"GPT Response: {intents}")

                intents_lower = [intent.lower() for intent in intents]
                risk_types_array = [1 if intent in intents_lower else 0 for intent in risk_types]

                print(f"GPT Response Array: {risk_types_array}")
                return response_data, intents, risk_types_array
            else:
                error_msg = response_data.get('msg', 'Unknown error')
                raise Exception(f"Error in GPT response: {error_msg}")
        
        except Exception as e:
            print(f"Request failed: {e}")

            if "429" in str(e):
                time.sleep(1)
                continue
            elif trial <= total_trial:
                # time.sleep(1)
                continue
            else:
                intents = []
                risk_types_array = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
                print(f"GPT Response: {intents}")
                print(f"GPT Response Array: {risk_types_array}")
                return {}, intents, risk_types_array

# Save the result of a single key to a JSON file
def save_to_json(key, real_ans, real_ans_array, gpt_json, gpt_response, gpt_response_array):
    result = {
        "Key": key,
        "Real Ans": real_ans,
        "Real Ans Array": real_ans_array,
        "GPT JSON": gpt_json,
        "GPT Response": gpt_response,
        "GPT Response Array": gpt_response_array
    }
    filename = f'data_key_{key}.json'
    
    with open(filename, 'w', encoding='utf-8') as json_file:
        json.dump(result, json_file, ensure_ascii=False, indent=4)
    print(f"Data for key {key} saved to {filename}\n\n")
    return result

# Progress bar function
def print_progress_bar(iteration, total, length=50):
    percent = ("{0:.1f}").format(100 * (iteration / float(total)))
    filled_length = int(length * iteration // total)
    bar = '█' * filled_length + '-' * (length - filled_length)
    print(f'\rProgress: |{bar}| {percent}% Complete', end='\r')
    if iteration == total:
        print()

# Main program
key = 20000
all_data = []
total_files = 30010 - 20000
processed_files = 0

# Loop through all files from 20000 to 30009
for key in range(key, key + total_files):  
    filename = f'data_key_{key}.json'
    
    if os.path.exists(filename):  # Check if the file exists
        with open(filename, 'r', encoding='utf-8') as json_file:
            data = json.load(json_file)
            if "GPT JSON" in data and data["GPT JSON"] == {}:  # If GPT JSON is empty, re-fetch data
                result = fetch_data(key)
                if result is not None:
                    key, real_ans, real_ans_array, source_code = result
                    # Send sourceCode to the chat API and get the risk type array
                    gpt_json, gpt_response, gpt_response_array = send_to_chat_api(json.dumps(json.dumps(source_code)))  # Send sourceCode to the chat API and get the risk type array
                    save_to_json(key, data['Real Ans'], data['Real Ans Array'], gpt_json, gpt_response, gpt_response_array)  # Save data to a JSON file
    else:
        print(f"File {filename} does not exist, skipping...\n")  # If the file does not exist, skip this file
    
    processed_files += 1
    print_progress_bar(processed_files, total_files)  # Update the progress bar

<h1>Step 4: Evaluate the performance of GPT</h1>

In [None]:
import tensorflow as tf
from tensorflow import keras

# Load data from the gpt_test.json file
def load_data(file_path):
    with open(file_path, 'r', encoding='utf-8') as file:
        data = json.load(file)
    return data

# Compute and print evaluation metrics
def evaluate(data):
    y_true = [item["Real Ans Array"] for item in data]
    y_pred = [item["GPT Response Array"] for item in data]

    print(len(y_true), len(y_pred))
    
    # Flatten the 2D arrays to 1D arrays for use with Keras evaluation functions
    y_true_flat = [label for sublist in y_true for label in sublist]
    y_pred_flat = [label for sublist in y_pred for label in sublist]

    # Convert to TensorFlow tensors
    y_true_tensor = tf.convert_to_tensor(y_true_flat, dtype=tf.float32)
    y_pred_tensor = tf.convert_to_tensor(y_pred_flat, dtype=tf.float32)

    # Use Keras evaluation functions to compute the metrics
    accuracy = tf.keras.metrics.BinaryAccuracy()(y_true_tensor, y_pred_tensor)
    precision = tf.keras.metrics.Precision()(y_true_tensor, y_pred_tensor)
    recall = tf.keras.metrics.Recall()(y_true_tensor, y_pred_tensor)
    f1 = 2 * (precision * recall) / (precision + recall)

    print("==========================================================")
    print("Total")
    # Print the evaluation metrics
    print("Accuracy:", accuracy.numpy())
    print("Precision:", precision.numpy())
    print("Recall:", recall.numpy())
    print("F1 Score:", f1.numpy())
    print("==========================================================")

# Main program logic
file_path = 'gpt_experiment.json'  # Replace with your file path
data = load_data(file_path)
evaluate(data)