In [1]:
%matplotlib inline
import os
import pickle
import json
import copy
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import csv
import dotenv
import tqdm
import subprocess as sp
import shutil
dotenv.load_dotenv()

True

In [2]:
plt.style.use('../style/style-formal.mplstyle')

In [3]:
cwd = os.getcwd()
ROOT_DIR = os.path.dirname(cwd)
SCRIPT_DIR = os.path.join(ROOT_DIR, "scripts")

RESEARCH_DATA = os.environ.get("RESEARCH_DATA")

EL = "attempt_1"
PIDS = [
    "NSFW_c_frw",
    "NSFW_c_timer",
    "NSFW_c_msg",
    "NSFW_cpp_cfg",
    "NSFW_cpp_file",
    "NSFW_cpp_thread"
]

SBFL_FORMULA = [
    "tarantula", "ochiai", "dstar",
    "naish1", "naish2", "gp13"
]

TRANSITION_TYPES = {"type1": "result_transition"}


dot_exp_config_file = os.path.join(ROOT_DIR, "configs/experiment_setup.rq2.json")
EXP_CONFIG = json.load(open(dot_exp_config_file, "r"))

TCS_REDUCTION = EXP_CONFIG["tcs_reduction"]
TCS_EXP_LIST = []

if len(EXP_CONFIG["target_lines"]) > 1:
    EXPERIMENT_TYPE = "lineCnt"
elif len(EXP_CONFIG["mutation_cnt"]) > 1:
    EXPERIMENT_TYPE = "mutCnt"
else:
    EXPERIMENT_TYPE = "tcsReduction"
    TCS_EXP_LIST.append("Reduced")

TOP_N = [1, 3, 5, 10]

# Create output directories for each PID and a combined results directory
PID_OUT_DIRS = {}
for PID in PIDS:
    PID_OUT_DIR = os.path.join(RESEARCH_DATA, EL, PID, "experiment_information_results")
    if not os.path.exists(PID_OUT_DIR):
        os.makedirs(PID_OUT_DIR, exist_ok=True)
    PID_OUT_DIRS[PID] = PID_OUT_DIR

# Create a combined results directory
COMBINED_OUT_DIR = os.path.join(RESEARCH_DATA, EL, "combined_experiment_results")
if not os.path.exists(COMBINED_OUT_DIR):
    os.makedirs(COMBINED_OUT_DIR, exist_ok=True)

print(f"Processing {len(PIDS)} projects: {PIDS}")
print(f"Individual project results will be saved to respective directories")
print(f"Combined results will be saved to: {COMBINED_OUT_DIR}")

Processing 6 projects: ['NSFW_c_frw', 'NSFW_c_timer', 'NSFW_c_msg', 'NSFW_cpp_cfg', 'NSFW_cpp_file', 'NSFW_cpp_thread']
Individual project results will be saved to respective directories
Combined results will be saved to: /ssd_home/yangheechan/cpp_research_data/attempt_1/combined_experiment_results


In [4]:
def set_ST_relevance(total_results, lineIdx2lineData, bid):
    total_results[bid] = []
    for lineIdx, lineData in lineIdx2lineData.items():
        total_results[bid].append({
            "line_idx": lineIdx,
            "is_buggy_line": lineData["is_buggy_line"],
            "st_relevance": lineData["st_relevance"],
            "st_relevance_linear": lineData["st_relevance_linear"],
            "st_distance": lineData["st_distance"],
        })


In [5]:
all_total_results = {}

for PID in PIDS:
    print(f"\nPorcessing project: {PID}")
    total_results = {}

    RID_DIR = "repeat_1"
    lineIdx2lineDataDir = os.path.join(RESEARCH_DATA, EL, "constructed_dataset", PID, RID_DIR)

    if not os.path.exists(lineIdx2lineDataDir):
        print(f"Warning: Directory {lineIdx2lineDataDir} does not exist.")
        raise FileNotFoundError

    for bid_res_file in os.listdir(lineIdx2lineDataDir):
        pck_file = os.path.join(lineIdx2lineDataDir, bid_res_file)
        with open(pck_file, "rb") as f:
            bid = int(bid_res_file.split("--")[0].split("ug")[1])
            lineIdx2lineData = pickle.load(f)
            set_ST_relevance(total_results, lineIdx2lineData, bid)

    all_total_results[PID] = total_results



Porcessing project: NSFW_c_frw

Porcessing project: NSFW_c_timer

Porcessing project: NSFW_c_msg

Porcessing project: NSFW_cpp_cfg

Porcessing project: NSFW_cpp_file

Porcessing project: NSFW_cpp_thread


In [6]:
good_case = []
bad_case = []
for PID in PIDS:
    fault_line_st_score = []
    normal_line_st_score = []
    fault_line_st_score_linear = []
    normal_line_st_score_linear = []
    fault_line_distance = []
    normal_line_distance = []
    fault_line_not_appeared = []
    normal_line_not_appeared = []

    for bid, lineDataList in all_total_results[PID].items():
        best_faulty_line_score = (-1.0, None)
        best_normal_line_score= (-1.0, None)
        for lineData in lineDataList:
            # if lineData["st_relevance"] == 0.0:
            #     continue

            if lineData["is_buggy_line"]:
                fault_line_st_score.append(lineData["st_relevance"])
                fault_line_st_score_linear.append(lineData["st_relevance_linear"])
                if lineData["st_relevance"] > best_faulty_line_score[0]:
                    best_faulty_line_score = (lineData["st_relevance"], lineData["line_idx"], lineData["st_distance"])
            else:
                normal_line_st_score.append(lineData["st_relevance"])
                normal_line_st_score_linear.append(lineData["st_relevance_linear"])
                if lineData["st_relevance"] > best_normal_line_score[0]:
                    best_normal_line_score = (lineData["st_relevance"], lineData["line_idx"], lineData["st_distance"])

            if lineData["st_distance"] is None:
                if lineData["is_buggy_line"]:
                    fault_line_not_appeared.append(lineData["st_distance"])
                else:
                    normal_line_not_appeared.append(lineData["st_distance"])
            else:
                if lineData["is_buggy_line"]:
                    fault_line_distance.append(lineData["st_distance"])
                else:
                    normal_line_distance.append(lineData["st_distance"])

        if best_faulty_line_score > best_normal_line_score:
            good_case.append((PID, bid, best_faulty_line_score, best_normal_line_score))
        else:
            bad_case.append((PID, bid, best_faulty_line_score, best_normal_line_score))

    print(f"PID: {PID}")
    print(f"\t # fault version count: {len(all_total_results[PID])}")
    print(f"\t fault_line cnt: {len(fault_line_st_score)}")
    print(f"\t normal_line cnt: {len(normal_line_st_score)}")
    print(f"\t fault_line not appeared cnt: {len(fault_line_not_appeared)}")
    print(f"\t normal_line not appeared cnt: {len(normal_line_not_appeared)}")
    print(f"\t fault_line distance: mean {np.mean(fault_line_distance):.4f}, std {np.std(fault_line_distance):.4f}, max {np.max(fault_line_distance):.4f}, min {np.min(fault_line_distance):.4f}")
    print(f"\t normal_line distance: mean {np.mean(normal_line_distance):.4f}, std {np.std(normal_line_distance):.4f}, max {np.max(normal_line_distance):.4f}, min {np.min(normal_line_distance):.4f}")
    print(f"\t fault_line st_relevance: mean {np.mean(fault_line_st_score):.4f}, std {np.std(fault_line_st_score):.4f}, max {np.max(fault_line_st_score):.4f}, min {np.min(fault_line_st_score):.4f}")
    print(f"\t normal_line st_relevance: mean {np.mean(normal_line_st_score):.4f}, std {np.std(normal_line_st_score):.4f}, max {np.max(normal_line_st_score):.4f}, min {np.min(normal_line_st_score):.4f}")
    print(f"\t fault_line st_relevance_linear: mean {np.mean(fault_line_st_score_linear):.4f}, std {np.std(fault_line_st_score_linear):.4f}, max {np.max(fault_line_st_score_linear):.4f}, min {np.min(fault_line_st_score_linear):.4f}")
    print(f"\t normal_line st_relevance_linear: mean {np.mean(normal_line_st_score_linear):.4f}, std {np.std(normal_line_st_score_linear):.4f}, max {np.max(normal_line_st_score_linear):.4f}, min {np.min(normal_line_st_score_linear):.4f}")
    print()

PID: NSFW_c_frw
	 # fault version count: 50
	 fault_line cnt: 50
	 normal_line cnt: 66391
	 fault_line not appeared cnt: 0
	 normal_line not appeared cnt: 65207
	 fault_line distance: mean 1.8800, std 2.3121, max 9.0000, min 0.0000
	 normal_line distance: mean 10.6740, std 7.7280, max 27.0000, min 0.0000
	 fault_line st_relevance: mean 0.4811, std 0.4586, max 1.0000, min 0.0000
	 normal_line st_relevance: mean 0.0008, std 0.0220, max 1.0000, min 0.0000
	 fault_line st_relevance_linear: mean 0.5946, std 0.3611, max 1.0000, min 0.1000
	 normal_line st_relevance_linear: mean 0.0023, std 0.0280, max 1.0000, min 0.0000

PID: NSFW_c_timer
	 # fault version count: 50
	 fault_line cnt: 50
	 normal_line cnt: 11404
	 fault_line not appeared cnt: 0
	 normal_line not appeared cnt: 10769
	 fault_line distance: mean 3.0200, std 3.3135, max 11.0000, min 0.0000
	 normal_line distance: mean 10.4079, std 7.3026, max 27.0000, min 0.0000
	 fault_line st_relevance: mean 0.3541, std 0.4400, max 1.0000, min 

In [7]:

print("good case:")
for i, case in enumerate(good_case):
    if i == 4:
        break
    print(case)


print("\nbad case:")
for i, case in enumerate(bad_case):
    # if i == 20:
    #     break
    print(case)

good case:
('NSFW_c_frw', 5091, (1.0, 992, 0), (0.36787944117144233, 991, 1))
('NSFW_c_frw', 42, (1.0, 8, 0), (0.01831563888873418, 9, 2))
('NSFW_c_frw', 1920, (1.0, 396, 0), (1.0, 394, 0))
('NSFW_c_frw', 5380, (1.0, 992, 0), (0.5, 982, 0))

bad case:
('NSFW_c_frw', 6555, (0.00012340980408667956, 1130, 3), (0.01831563888873418, 1131, 2))
('NSFW_c_frw', 5887, (1.1253517471925912e-07, 1133, 4), (0.00012340980408667956, 1134, 3))
('NSFW_c_frw', 6235, (0.36787944117144233, 1133, 1), (1.0, 1132, 0))
('NSFW_c_frw', 4600, (2.3195228302435696e-16, 839, 6), (1.0, 831, 0))
('NSFW_c_frw', 5532, (0.01831563888873418, 989, 2), (0.36787944117144233, 990, 1))
('NSFW_c_frw', 3199, (1.3887943864964021e-11, 546, 5), (1.0, 547, 0))
('NSFW_c_frw', 5246, (0.01831563888873418, 986, 2), (0.01831563888873418, 987, 2))
('NSFW_c_frw', 986, (6.639677199580735e-36, 219, 9), (0.36787944117144233, 215, 1))
('NSFW_c_frw', 5600, (0.01831563888873418, 998, 2), (0.36787944117144233, 999, 1))
('NSFW_c_frw', 6576, (0.367