In [1]:
import os
import pandas as pd
import itertools

In [16]:
# Path to the main folder
base_path = "finetuned_single"
print(os.listdir(base_path))
# Dictionary to store case results
case_results = {}

# Traverse subfolders and read general_determination.csv
for subfolder in os.listdir(base_path):
    subfolder_path = os.path.join(base_path, subfolder)
    if os.path.isdir(subfolder_path):  # Only process directories
        file_path = os.path.join(subfolder_path, "general_determination_auditor.csv")
        if os.path.exists(file_path):
            try:
                # Read the CSV file
                df = pd.read_csv(file_path)
                for _, row in df.iterrows():
                    dataname = row["dataname"]
                    match = row["match"]
                    function = row['function_name']
                    vul = row['vulnerability']
                    # Initialize case_results[dataname] if not already present
                    if dataname not in case_results:
                        case_results[dataname] = {}
                    # Store match result for the current subfolder
                    case_results[dataname]['vulnerability'] = vul
                    case_results[dataname]['function_name'] = function
                    case_results[dataname][subfolder] = 1 if match == 'True' else 0
                    
            except Exception as e:
                print(f"Error reading file {file_path}: {e}")

# Create a DataFrame from case_results
results_df = pd.DataFrame(case_results).T.fillna(0)


['gemma_ft', 'opencodeinterpreter_ft', 'codellama_ft', 'deepseek_ft', 'summary_metrics.csv', 'nxcode_ft', 'case_summary_table.csv']


  results_df = pd.DataFrame(case_results).T.fillna(0)


In [28]:
cve_list = [
    "CVE-2018-17968",
    "CVE-2018-12702",
    "CVE-2020-17753",
    "CVE-2018-12230",
    "CVE-2018-12703",
    "CVE-2018-14085",
    "CVE-2018-18425",
    "CVE-2018-19833",
    "CVE-2019-15078"
]


In [29]:
filtered_results_df = results_df.loc[results_df.index.isin(cve_list)]

In [30]:
filtered_results_df

Unnamed: 0,vulnerability,function_name,gemma_ft,opencodeinterpreter_ft,codellama_ft,deepseek_ft,nxcode_ft
CVE-2018-12702,Access Control,approveAndCallcode,0,0,0,0,0
CVE-2018-12230,Wrong Logic,transferFrom,0,0,0,0,0
CVE-2018-12703,Access Control,approveAndCallcode,0,0,0,0,0
CVE-2018-18425,Token Devalue,doAirdrop,0,0,0,0,0
CVE-2018-19833,Access Control,owned,0,0,0,0,0
CVE-2019-15078,Typo Constructor,Not Provided,0,0,0,0,0
CVE-2018-17968,Bad Randomness,random,0,0,0,0,0
CVE-2020-17753,Access Control,addMeByRC,0,0,0,0,0
CVE-2018-14085,Wrong Logic,Not Provided,0,0,0,0,0


In [33]:
output_file = os.path.join(base_path, "case_summary_table.csv")
results_df.to_csv(output_file)
print(output_file)

finetuned_single/case_summary_table.csv


In [37]:
results_df

Unnamed: 0,nxcode_ft_2,opencodeinterpreter_ft_2,codellama_ft_2,deepseek_ft_2,gemma_ft_2
CVE-2018-13722,0.0,1.0,1.0,1.0,0.0
CVE-2018-13132,0.0,0.0,0.0,1.0,0.0
CVE-2018-13128,1.0,0.0,0.0,1.0,1.0
CVE-2018-13777,1.0,0.0,1.0,1.0,1.0
CVE-2018-17071,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...
CVE-2018-13227,0.0,0.0,0.0,1.0,1.0
CVE-2018-14063,0.0,1.0,0.0,0.0,0.0
CVE-2021-34273,0.0,0.0,0.0,0.0,0.0
CVE-2018-14715,0.0,0.0,0.0,0.0,0.0


In [30]:

# Determine the case difficulty (easy, medium, hard)
easy_cases = []
medium_cases = []
hard_cases = []

for dataname, row in results_df.iterrows():
    total_correct = row.sum()
    if total_correct == len(row):  # All correct
        easy_cases.append(dataname)
    elif total_correct == 0:  # All incorrect
        hard_cases.append(dataname)
    else:  # Some correct, some incorrect
        medium_cases.append(dataname)

# Create a new DataFrame for the final output
final_df = pd.DataFrame(columns=results_df.columns)

# Add three easy cases
for case in easy_cases[:3]:
    final_df.loc[f"{case}_easy"] = results_df.loc[case]

# Add three medium cases
for case in medium_cases[:3]:
    final_df.loc[f"{case}_medium"] = results_df.loc[case]

# Add three hard cases
for case in hard_cases[:3]:
    final_df.loc[f"{case}_hard"] = results_df.loc[case]

# Save the final table to a CSV file
output_file = os.path.join(base_path, "case_difficulty_table.csv")

In [31]:
final_df

Unnamed: 0,nxcode_ft_2,opencodeinterpreter_ft_2,codellama_ft_2,deepseek_ft_2,gemma_ft_2
CVE-2018-13129_easy,1.0,1.0,1.0,1.0,1.0
CVE-2018-13079_easy,1.0,1.0,1.0,1.0,1.0
CVE-2018-13127_easy,1.0,1.0,1.0,1.0,1.0
CVE-2018-13722_medium,0.0,1.0,1.0,1.0,0.0
CVE-2018-13132_medium,0.0,0.0,0.0,1.0,0.0
CVE-2018-13128_medium,1.0,0.0,0.0,1.0,1.0
CVE-2018-17071_hard,0.0,0.0,0.0,0.0,0.0
CVE-2018-13325_hard,0.0,0.0,0.0,0.0,0.0
CVE-2020-17753_hard,0.0,0.0,0.0,0.0,0.0


In [3]:
# Path to the baseline folder
base_path = "finetuned_single"

# Initialize a list to store the extracted data
data = []

# Loop through all subfolders in the base_path
for subfolder in os.listdir(base_path):
    subfolder_path = os.path.join(base_path, subfolder)
    if os.path.isdir(subfolder_path):  # Only process directories
        file_path = os.path.join(subfolder_path, "general_determination_auditor.csv")
        print("reading: ",file_path)
        if os.path.exists(file_path):
            try:
                # Read the CSV file
                with open(file_path, "r") as f:
                    lines = f.readlines()
                
                # Extract the last two lines
                top_k_hit_rate = lines[-2].strip() if len(lines) >= 2 else "N/A"
                top_1_hit_rate = lines[-1].strip() if len(lines) >= 1 else "N/A"
                
                # Add to the data list
                data.append({
                    "Subfolder": subfolder,
                    "Metric": "Top k hit rate",
                    "Value": top_k_hit_rate
                })
                data.append({
                    "Subfolder": subfolder,
                    "Metric": "Top 1 hit rate",
                    "Value": top_1_hit_rate
                })
            except Exception as e:
                print(f"Error reading file {file_path}: {e}")

# Convert the data to a DataFrame
df = pd.DataFrame(data)
df

reading:  finetuned_single/gemma_ft/general_determination_auditor.csv
reading:  finetuned_single/opencodeinterpreter_ft/general_determination_auditor.csv
reading:  finetuned_single/codellama_ft/general_determination_auditor.csv
reading:  finetuned_single/deepseek_ft/general_determination_auditor.csv
reading:  finetuned_single/nxcode_ft/general_determination_auditor.csv


Unnamed: 0,Subfolder,Metric,Value
0,gemma_ft,Top k hit rate,"Top k hit rate:,0.50,(41/82)"
1,gemma_ft,Top 1 hit rate,"Top 1 hit rate:,0.50,0.47560975609756095,(39/82)"
2,opencodeinterpreter_ft,Top k hit rate,"Top k hit rate:,0.41,(44/107)"
3,opencodeinterpreter_ft,Top 1 hit rate,"Top 1 hit rate:,0.41,0.18691588785046728,(20/107)"
4,codellama_ft,Top k hit rate,"Top k hit rate:,0.44,(38/87)"
5,codellama_ft,Top 1 hit rate,"Top 1 hit rate:,0.44,0.13793103448275862,(12/87)"
6,deepseek_ft,Top k hit rate,"Top k hit rate:,0.56,(60/107)"
7,deepseek_ft,Top 1 hit rate,"Top 1 hit rate:,0.56,0.308411214953271,(33/107)"
8,nxcode_ft,Top k hit rate,"Top k hit rate:,0.35,(37/107)"
9,nxcode_ft,Top 1 hit rate,"Top 1 hit rate:,0.35,0.04672897196261682,(5/107)"


In [None]:
# Save the DataFrame to a CSV file in the same directory
output_file = os.path.join(base_path, "summary_metrics.csv")
df.to_csv(output_file, index=False)

print(f"Summary metrics saved to {output_file}")


In [39]:
    subfolders = [f for f in os.listdir(base_path) if os.path.isdir(os.path.join(base_path, f))]
    print(subfolders)

['gemma_ft', 'opencodeinterpreter_ft', 'codellama_ft', 'deepseek_ft', 'nxcode_ft']


In [11]:

def ensemble_results(folder_indices):
    # Initialize a dictionary to store the ensemble results
    ensemble_results = {}
    count = 0
    # Dynamically get a list of all subfolders
    subfolders = [f for f in os.listdir(base_path) if os.path.isdir(os.path.join(base_path, f))]
    # Filter subfolders based on the given indices
    selected_subfolders = [subfolders[idx] for idx in folder_indices if idx < len(subfolders)]

    # Iterate over the selected subfolders
    for subfolder in selected_subfolders:
        subfolder_path = os.path.join(base_path, subfolder)
        file_path = os.path.join(subfolder_path, "detailed_evaluation.csv")

        # Skip if the file doesn't exist
        if not os.path.exists(file_path):
            print(f"File not found: {file_path}")
            continue

        try:
            # Read the detailed_evaluation.csv
            df = pd.read_csv(file_path)

            # Process each row
            for _, row in df.iterrows():
                dataname = row["dataname"]
                vul = row["vulnerability"]
                func = row["function_name"]
                match = row["match"]

                # Initialize the dataname entry if not already present
                if dataname not in ensemble_results:
                    ensemble_results[dataname] = {}

                # Create a unique key for each vulnerability-function pair
                key = (vul, func)

                # Initialize the key if not already present
                if key not in ensemble_results[dataname]:
                    ensemble_results[dataname][key] = []

                # Append the match result to the key
                ensemble_results[dataname][key].append(match)
        except Exception as e:
            print(f"Error reading file {file_path}: {e}")

    # Finalize the ensemble results
    final_results = []
    for dataname, vul_func_dict in ensemble_results.items():

        sorted_vul_func_dict = dict(
            sorted(vul_func_dict.items(), key=lambda item: len(item[1]), reverse=True)
        )
  
        # Keep only the first 5 entries
        limited_vul_func_dict = dict(list(sorted_vul_func_dict.items())[:5])
        
        # Check if any of the value arrays contain a True
        has_true = any(True in matches for matches in limited_vul_func_dict.values())
        top_vul_func = next(iter(limited_vul_func_dict)) 
        top_one_match = True in limited_vul_func_dict[top_vul_func]
        # Append to summary results
        final_results.append({"dataname": dataname, "match": has_true, "top_one_match": top_one_match})

    # Save the final ensemble result
    final_results = pd.DataFrame(final_results)
    # output_path = os.path.join(base_path, "ensemble_detailed_evaluation.csv")
    # final_df.to_csv(output_path, index=False)
    # print(f"Ensemble results saved to {output_path}")
    return final_results





In [12]:

# Path to the main folder
base_path = "finetuned_single"


# Example usage
ensemble_indices =[0,1,2,3,4] # Replace with desired folder indices
power_set = []
for r in range(len(ensemble_indices) + 1):
    subsets = itertools.combinations(ensemble_indices, r)
    power_set.extend(subsets)
power_set=power_set[1:]
# Loop through each subset and generate permutations
best = 0
best_df = None
best_model_indices = None
best_top1 = None
for subset in power_set:
    for permutation in itertools.permutations(subset):
        # Process each permutation here
        # print(f"Processing permutation: {permutation}")
        # Example: Calculate ensemble_results for the current permutation
        ensemble_result_df = ensemble_results(list(permutation))
        hit_rate = ensemble_result_df["match"].mean()
        hits = ensemble_result_df["match"].sum()  # Sum of True values in the "match" column
        top_one_hits = ensemble_result_df["top_one_match"].mean()
        total = len(ensemble_result_df)           # Total number of rows in the DataFrame
        
        if hit_rate > best:
            best = hit_rate
            best_df = ensemble_result_df
            best_model_indices = permutation
            best_top1 = top_one_hits
        # Print results
        print(f"Number of hits: {hits}")
        print(f"Total entries: {total}")
        print(f"Hit rate: {hit_rate:.2%}")
        print(f"top 1 Hit rate: {top_one_hits:.2%}")
        print(f"Hit Rate for {permutation}: {hit_rate:.2f}")

Number of hits: 36
Total entries: 82
Hit rate: 43.90%
top 1 Hit rate: 9.76%
Hit Rate for (0,): 0.44
Number of hits: 44
Total entries: 107
Hit rate: 41.12%
top 1 Hit rate: 20.56%
Hit Rate for (1,): 0.41
Number of hits: 35
Total entries: 87
Hit rate: 40.23%
top 1 Hit rate: 35.63%
Hit Rate for (2,): 0.40
Number of hits: 60
Total entries: 107
Hit rate: 56.07%
top 1 Hit rate: 14.95%
Hit Rate for (3,): 0.56
Number of hits: 37
Total entries: 107
Hit rate: 34.58%
top 1 Hit rate: 34.58%
Hit Rate for (4,): 0.35
Number of hits: 52
Total entries: 107
Hit rate: 48.60%
top 1 Hit rate: 28.04%
Hit Rate for (0, 1): 0.49
Number of hits: 48
Total entries: 107
Hit rate: 44.86%
top 1 Hit rate: 31.78%
Hit Rate for (1, 0): 0.45
Number of hits: 49
Total entries: 104
Hit rate: 47.12%
top 1 Hit rate: 31.73%
Hit Rate for (0, 2): 0.47
Number of hits: 48
Total entries: 104
Hit rate: 46.15%
top 1 Hit rate: 32.69%
Hit Rate for (2, 0): 0.46
Number of hits: 60
Total entries: 107
Hit rate: 56.07%
top 1 Hit rate: 29.91%

In [10]:
# Path to the main folder
base_path = "finetuned_single"

ensemble_indices =[0,3,1] 
ensemble_result_df = ensemble_results(ensemble_indices)
hit_rate = ensemble_result_df["match"].mean()
hits = ensemble_result_df["match"].sum()  # Sum of True values in the "match" column
top_one_hits = ensemble_result_df["top_one_match"].mean()
total = len(ensemble_result_df)           # Total number of rows in the DataFrame

print(f"Number of hits: {hits}")
print(f"Total entries: {total}")
print(f"Hit rate: {hit_rate:.2%}")
print(f"top 1 Hit rate: {top_one_hits:.2%}")


Number of hits: 58
Total entries: 108
Hit rate: 53.70%
top 1 Hit rate: 35.19%


In [13]:
print(best_model_indices,best_top1, best)


(2, 1, 3, 0, 4) 0.37037037037037035 0.5833333333333334


In [14]:

ensemble_indices = [0,1,2,3,4]  # Replace with desired folder indices
power_set = []
for r in range(len(ensemble_indices) + 1):
    subsets = itertools.combinations(ensemble_indices, r)
    power_set.extend(subsets)
power_set = power_set[1:]  # Remove empty subset

# Track the best single model
best_single_hit_rate = 0
best_single_df = None
best_single_model = None

single_model_results = {}
best_index = 0
for model_idx in ensemble_indices:
    model_result_df = ensemble_results([model_idx])  # Get single model results
    hit_rate = model_result_df["match"].mean()
    single_model_results[model_idx] = model_result_df

    if hit_rate > best_single_hit_rate:
        best_index = model_idx
        best_single_hit_rate = hit_rate
        best_single_df = model_result_df
        best_single_model = model_idx

print(f"Best single model: {best_single_model} with hit rate: {best_single_hit_rate:.2%}")


Best single model: 3 with hit rate: 56.07%


In [27]:

best_ensemble_df = best_df
# Identify cases where the ensemble got correct but the best single model got wrong
if best_ensemble_df is not None and best_single_df is not None:
    comparison_df = best_ensemble_df.copy()
    best_single_df1 = best_single_df.rename(columns={'match': 'single_model_match'})
    comparison_df = comparison_df.merge(best_single_df1[['dataname', 'single_model_match']], on='dataname', how='left')


    # print(comparison_df)
    # Filter cases where ensemble got True but single model got False
    success_cases = comparison_df[(comparison_df["match"] == False) & (comparison_df["single_model_match"] == False)]

    # Display results
    print(success_cases)

           dataname  match  top_one_match single_model_match
1    CVE-2018-17968  False          False              False
11   CVE-2018-12702  False          False              False
12   CVE-2020-17753  False          False              False
13   CVE-2018-12230  False          False              False
14   CVE-2018-12703  False          False              False
15   CVE-2018-14085  False          False              False
16   CVE-2018-18425  False          False              False
33   CVE-2018-19833  False          False              False
34   CVE-2019-15078  False          False              False
39   CVE-2019-15079  False          False              False
40   CVE-2018-14087  False          False              False
42   CVE-2021-34272  False          False              False
45    CVE-2021-3004  False          False              False
49   CVE-2018-14084  False          False              False
50   CVE-2018-13327  False          False              False
51   CVE-2018-17987  Fal

In [14]:
best

0.5420560747663551

In [57]:
base = 'finetuned_single/'
data = '2018-10666'

# Traverse subfolders and read general_determination.csv
for subfolder in os.listdir(base_path):
    subfolder_path = os.path.join(base_path, subfolder)
    if os.path.isdir(subfolder_path):  # Only process directories
        file_path = os.path.join(subfolder_path, f"{data}/auditor_summary/NTQAI_Nxcode-CQ-7B-orpo_summarized_0.json")