In [1]:
import os
import pickle
import json
import copy
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import dotenv
dotenv.load_dotenv()

True

In [2]:
RESEARCH_DATA = os.environ.get("RESEARCH_DATA")

EL = "attempt_1"
PIDS = ["Lang", "Mockito"]

SBFL_FORMULA = [
    "tarantula", "ochiai", "dstar",
    "naish1", "naish2", "gp13"
]

TRANSITION_TYPES = {
    "type1": "result_transition",
    "type2": "exception_type_transition",
    "type3": "exception_msg_transition",
    "type4": "stacktrace_transition"
}

MUT_RANGE = (1, 11)

RID_RANGE = (1, 11)

TOP_N = [1, 3, 5, 10]

# Create output directories for each PID and a combined results directory
PID_OUT_DIRS = {}
for PID in PIDS:
    PID_OUT_DIR = os.path.join(RESEARCH_DATA, EL, PID, "experiment_information_results")
    if not os.path.exists(PID_OUT_DIR):
        os.makedirs(PID_OUT_DIR, exist_ok=True)
    PID_OUT_DIRS[PID] = PID_OUT_DIR

# Create a combined results directory
COMBINED_OUT_DIR = os.path.join(RESEARCH_DATA, EL, "combined_experiment_results")
if not os.path.exists(COMBINED_OUT_DIR):
    os.makedirs(COMBINED_OUT_DIR, exist_ok=True)

print(f"Processing {len(PIDS)} projects: {PIDS}")
print(f"Individual project results will be saved to respective directories")
print(f"Combined results will be saved to: {COMBINED_OUT_DIR}")

Processing 2 projects: ['Lang', 'Mockito']
Individual project results will be saved to respective directories
Combined results will be saved to: /ssd_home/yangheechan/d4j_research_data/attempt_1/combined_experiment_results


In [3]:
# Faulty lines are indicated with 'fault_line' as value 1.

# res = {bid: {std: [rank of faulty_line: 1, rank of faulty_line: 2, ...] ...} ...}
# std are the sbfl formulas and mbfl formulas in TRANSITION_TYPES and MUT_RANGE

def set_faulty_line_ranks(total_results, lineIdx2lineData, bid):
    total_results[bid] = {}

    for sbfl_form in SBFL_FORMULA:
        for lineIdx, lineData in lineIdx2lineData.items():
            fault_line = lineData.get("fault_line")
            if fault_line == 1:
                sbfl_key = f"{sbfl_form}_rank"
                rank = lineData.get(sbfl_key)
                if sbfl_key not in total_results[bid]:
                    total_results[bid][sbfl_key] = []
                total_results[bid][sbfl_key].append(rank)
    
    for mut_cnt in range(MUT_RANGE[0], MUT_RANGE[1]):
        for transition_type, transition_key in TRANSITION_TYPES.items():
            for lineIdx, lineData in lineIdx2lineData.items():
                fault_line = lineData.get("fault_line")
                if fault_line == 1:
                    muse_key = f"mutCnt{mut_cnt}_{transition_key}_final_muse_score_rank"
                    muse_rank = lineData.get(muse_key)
                    if muse_key not in total_results[bid]:
                        total_results[bid][muse_key] = []
                    total_results[bid][muse_key].append(muse_rank)
                    

                    metal_key = f"mutCnt{mut_cnt}_{transition_key}_final_metal_score_rank"
                    metal_rank = lineData.get(metal_key)
                    if metal_key not in total_results[bid]:
                        total_results[bid][metal_key] = []
                    total_results[bid][metal_key].append(metal_rank)

In [4]:
# Process data for all PIDs
all_total_results = {}  # Structure: {PID: {rid: {bid: {technique: [ranks...]}}}}

for PID in PIDS:
    print(f"\nProcessing project: {PID}")
    total_results = {}
    
    for rid in range(RID_RANGE[0], RID_RANGE[1]):
        RID_dir = f"repeat_{rid}"
        total_results[RID_dir] = {}

        lineIdx2lineDataDir = os.path.join(RESEARCH_DATA, EL, PID, "experiment_raw_results", RID_dir)
        
        if not os.path.exists(lineIdx2lineDataDir):
            print(f"Warning: Directory not found for {PID} - {RID_dir}: {lineIdx2lineDataDir}")
            continue

        for bid_res_file in os.listdir(lineIdx2lineDataDir):
            pck_file = os.path.join(lineIdx2lineDataDir, bid_res_file)
            with open(pck_file, 'rb') as f:
                bid = int(bid_res_file.split("_")[0])
                lineIdx2lineData = pickle.load(f)
                set_faulty_line_ranks(total_results[RID_dir], lineIdx2lineData, bid)
    
    # Store results for this PID
    all_total_results[PID] = total_results
    
    # Save individual PID results
    with open(os.path.join(PID_OUT_DIRS[PID], f"{PID}_faulty_line_ranks.json"), 'w') as f:
        json.dump(total_results, f, indent=4)
    
    print(f"Completed processing {PID}: {len(total_results)} experimental runs")

print(f"\nCompleted processing all {len(PIDS)} projects")
print(f"Individual results saved for each project")


Processing project: Lang
Completed processing Lang: 10 experimental runs

Processing project: Mockito
Completed processing Lang: 10 experimental runs

Processing project: Mockito
Completed processing Mockito: 10 experimental runs

Completed processing all 2 projects
Individual results saved for each project
Completed processing Mockito: 10 experimental runs

Completed processing all 2 projects
Individual results saved for each project


In [None]:
# TOP-N: # of faults where at least one its faulty statement is located within top-N. (Top-1, Top-3, and Top-5, Top-10).
# MFR: average of ranks of the first faulty statement for all faults
# MAR: average rank of all faulty statements for each faults

def calculate_exp_results(total_results):
    """Calculate experimental results for all techniques efficiently"""
    default = {
        "topN": {n: 0 for n in TOP_N},
        "mfr_list": [],
        "mar_list": []
    }

    exp_results = {}
    
    # Process SBFL techniques
    for sbfl_form in SBFL_FORMULA:
        sbfl_key = f"{sbfl_form}_rank"
        exp_results[sbfl_key] = copy.deepcopy(default)

        for bid, result in total_results.items():
            ranks = result.get(sbfl_key, [])
            if ranks:  # Only process if ranks exist
                # Update TOP-N counts
                for n in TOP_N:
                    if any(rank <= n for rank in ranks):
                        exp_results[sbfl_key]["topN"][n] += 1

                # Update MFR and MAR
                exp_results[sbfl_key]["mfr_list"].append(min(ranks))
                exp_results[sbfl_key]["mar_list"].extend(ranks)
    
    # Process MBFL techniques
    for mut_cnt in range(MUT_RANGE[0], MUT_RANGE[1]):
        for transition_type, transition_key in TRANSITION_TYPES.items():
            for technique in ["muse", "metal"]:
                key = f"mutCnt{mut_cnt}_{transition_key}_final_{technique}_score_rank"
                exp_results[key] = copy.deepcopy(default)

                for bid, result in total_results.items():
                    ranks = result.get(key, [])
                    if ranks:  # Only process if ranks exist
                        # Update TOP-N counts
                        for n in TOP_N:
                            if any(rank <= n for rank in ranks):
                                exp_results[key]["topN"][n] += 1

                        # Update MFR and MAR
                        exp_results[key]["mfr_list"].append(min(ranks))
                        exp_results[key]["mar_list"].extend(ranks)
    
    # Calculate averages efficiently
    for key, result in exp_results.items():
        if result["mfr_list"]:  # Avoid division by zero
            result["mfr"] = np.mean(result["mfr_list"])
            result["mar"] = np.mean(result["mar_list"])
        else:
            result["mfr"] = 0
            result["mar"] = 0

    return exp_results

In [6]:
# Calculate experimental results for all PIDs
print("Processing experimental results for all projects...")

# Structure: {PID: {rid: {technique: {topN, mfr, mar, ...}}}}
all_exp_results = {}

# Process each PID individually
for PID in PIDS:
    print(f"\nCalculating experimental results for {PID}...")
    
    total_exp_results = {}
    for rid_key, rid_results in all_total_results[PID].items():
        exp_results = calculate_exp_results(rid_results)
        total_exp_results[rid_key] = exp_results
    
    all_exp_results[PID] = total_exp_results
    print(f"Processed {len(total_exp_results)} experimental runs for {PID}")

print(f"\nCompleted experimental results calculation for all {len(PIDS)} projects")

Processing experimental results for all projects...

Calculating experimental results for Lang...
Processed 10 experimental runs for Lang

Calculating experimental results for Mockito...
Processed 10 experimental runs for Mockito

Completed experimental results calculation for all 2 projects


In [None]:
def calculate_average_results_across_runs(total_exp_results):
    """Calculate average results across all experimental runs for each technique"""
    
    # Get all technique keys from the first run
    first_run_key = list(total_exp_results.keys())[0]
    all_technique_keys = list(total_exp_results[first_run_key].keys())
    
    # Initialize aggregated results structure
    aggregated_results = {
        technique_key: {
            "topN_all_runs": {n: [] for n in TOP_N},
            "mfr_all_runs": [],
            "mar_all_runs": []
        } for technique_key in all_technique_keys
    }
    
    # Collect data from all runs efficiently
    for run_key, run_results in total_exp_results.items():
        for technique_key in all_technique_keys:
            if technique_key in run_results:
                technique_data = run_results[technique_key]
                
                # Collect TopN, MFR and MAR data
                for n in TOP_N:
                    aggregated_results[technique_key]["topN_all_runs"][n].append(technique_data["topN"][n])
                
                aggregated_results[technique_key]["mfr_all_runs"].append(technique_data["mfr"])
                aggregated_results[technique_key]["mar_all_runs"].append(technique_data["mar"])
    
    # Calculate final statistics
    final_results = {}
    for technique_key in all_technique_keys:
        final_results[technique_key] = {
            "topN_mean": {},
            "topN_std": {},
            "mfr_mean": 0,
            "mfr_std": 0,
            "mar_mean": 0,
            "mar_std": 0
        }
        
        # Calculate TopN statistics
        for n in TOP_N:
            values = aggregated_results[technique_key]["topN_all_runs"][n]
            final_results[technique_key]["topN_mean"][n] = np.mean(values)
            final_results[technique_key]["topN_std"][n] = np.std(values)
        
        # Calculate MFR and MAR statistics
        mfr_values = aggregated_results[technique_key]["mfr_all_runs"]
        final_results[technique_key]["mfr_mean"] = np.mean(mfr_values)
        final_results[technique_key]["mfr_std"] = np.std(mfr_values)
        
        mar_values = aggregated_results[technique_key]["mar_all_runs"]
        final_results[technique_key]["mar_mean"] = np.mean(mar_values)
        final_results[technique_key]["mar_std"] = np.std(mar_values)
    
    return final_results

def save_results_to_csv_for_pid(final_results, pid, out_dir, filename_suffix="comprehensive_results"):
    """Save the averaged results to CSV file for a specific PID"""
    
    data = []
    for technique_key, results in final_results.items():
        row = {
            "PID": pid,
            "Technique": technique_key,
            "Top-1_mean": results["topN_mean"][1],
            "Top-1_std": results["topN_std"][1],
            "Top-3_mean": results["topN_mean"][3],
            "Top-3_std": results["topN_std"][3],
            "Top-5_mean": results["topN_mean"][5],
            "Top-5_std": results["topN_std"][5],
            "Top-10_mean": results["topN_mean"][10],
            "Top-10_std": results["topN_std"][10],
            "MFR_mean": results["mfr_mean"],
            "MFR_std": results["mfr_std"],
            "MAR_mean": results["mar_mean"],
            "MAR_std": results["mar_std"]
        }
        data.append(row)
    
    df = pd.DataFrame(data)
    
    # Sort by technique type for better organization
    def sort_key(technique):
        if any(sbfl in technique for sbfl in SBFL_FORMULA):
            return (0, technique)  # SBFL techniques first
        else:
            return (1, technique)  # MBFL techniques second
    
    df['sort_key'] = df['Technique'].apply(sort_key)
    df = df.sort_values('sort_key').drop('sort_key', axis=1)
    
    # Save to CSV
    csv_path = os.path.join(out_dir, f"{pid}_{filename_suffix}.csv")
    df.to_csv(csv_path, index=False)
    
    print(f"Results for {pid} saved to: {csv_path}")
    return df

def calculate_combined_results_across_pids(all_final_results):
    """Calculate combined results across all PIDs
    - TopN metrics: Sum across all PIDs (since they represent counts)
    - MFR/MAR metrics: Average across all PIDs
    """
    
    # Get all technique keys from the first PID
    first_pid = list(all_final_results.keys())[0]
    all_technique_keys = list(all_final_results[first_pid].keys())
    
    # Initialize combined results structure
    combined_results = {}
    
    for technique_key in all_technique_keys:
        # Collect data from all PIDs
        topN_values = {n: [] for n in TOP_N}
        mfr_values = []
        mar_values = []
        
        for pid, pid_results in all_final_results.items():
            if technique_key in pid_results:
                # Collect TopN values (these are counts to be summed)
                for n in TOP_N:
                    topN_values[n].append(pid_results[technique_key]["topN_mean"][n])
                
                # Collect MFR and MAR values (these are averages to be averaged)
                mfr_values.append(pid_results[technique_key]["mfr_mean"])
                mar_values.append(pid_results[technique_key]["mar_mean"])
        
        # Calculate combined statistics
        combined_results[technique_key] = {
            "topN_mean": {},
            "topN_std": {},
            "mfr_mean": np.mean(mfr_values) if mfr_values else 0,
            "mfr_std": np.std(mfr_values) if mfr_values else 0,
            "mar_mean": np.mean(mar_values) if mar_values else 0,
            "mar_std": np.std(mar_values) if mar_values else 0
        }
        
        # Calculate TopN statistics - SUM for combined counts, STD for variance
        for n in TOP_N:
            if topN_values[n]:
                combined_results[technique_key]["topN_mean"][n] = np.sum(topN_values[n])  # SUM instead of MEAN
                combined_results[technique_key]["topN_std"][n] = np.std(topN_values[n])
            else:
                combined_results[technique_key]["topN_mean"][n] = 0
                combined_results[technique_key]["topN_std"][n] = 0
    
    return combined_results

def save_combined_results_to_csv(combined_final_results, all_final_results, out_dir):
    """Save combined results and individual PID results to CSV files"""
    
    # Save combined results
    combined_data = []
    for technique_key, results in combined_final_results.items():
        row = {
            "PID": "COMBINED",
            "Technique": technique_key,
            "Top-1_mean": results["topN_mean"][1],
            "Top-1_std": results["topN_std"][1],
            "Top-3_mean": results["topN_mean"][3],
            "Top-3_std": results["topN_std"][3],
            "Top-5_mean": results["topN_mean"][5],
            "Top-5_std": results["topN_std"][5],
            "Top-10_mean": results["topN_mean"][10],
            "Top-10_std": results["topN_std"][10],
            "MFR_mean": results["mfr_mean"],
            "MFR_std": results["mfr_std"],
            "MAR_mean": results["mar_mean"],
            "MAR_std": results["mar_std"]
        }
        combined_data.append(row)
    
    # Add individual PID results
    all_data = combined_data.copy()
    for pid, pid_results in all_final_results.items():
        for technique_key, results in pid_results.items():
            row = {
                "PID": pid,
                "Technique": technique_key,
                "Top-1_mean": results["topN_mean"][1],
                "Top-1_std": results["topN_std"][1],
                "Top-3_mean": results["topN_mean"][3],
                "Top-3_std": results["topN_std"][3],
                "Top-5_mean": results["topN_mean"][5],
                "Top-5_std": results["topN_std"][5],
                "Top-10_mean": results["topN_mean"][10],
                "Top-10_std": results["topN_std"][10],
                "MFR_mean": results["mfr_mean"],
                "MFR_std": results["mfr_std"],
                "MAR_mean": results["mar_mean"],
                "MAR_std": results["mar_std"]
            }
            all_data.append(row)
    
    # Create DataFrames
    df_combined = pd.DataFrame(combined_data)
    df_all = pd.DataFrame(all_data)
    
    # Sort function
    def sort_key(technique):
        if any(sbfl in technique for sbfl in SBFL_FORMULA):
            return (0, technique)
        else:
            return (1, technique)
    
    # Sort DataFrames
    df_combined['sort_key'] = df_combined['Technique'].apply(sort_key)
    df_combined = df_combined.sort_values('sort_key').drop('sort_key', axis=1)
    
    df_all['sort_key'] = df_all['Technique'].apply(sort_key)
    df_all = df_all.sort_values(['PID', 'sort_key']).drop('sort_key', axis=1)
    
    # Save to CSV files
    combined_csv_path = os.path.join(out_dir, "combined_comprehensive_results.csv")
    all_csv_path = os.path.join(out_dir, "all_projects_comprehensive_results.csv")
    
    df_combined.to_csv(combined_csv_path, index=False)
    df_all.to_csv(all_csv_path, index=False)
    
    print(f"Combined results saved to: {combined_csv_path}")
    print(f"All projects results saved to: {all_csv_path}")
    
    return df_all

In [8]:
# Calculate final averaged results for each PID individually
print("Calculating final averaged results for each project...")

# Structure: {PID: {technique: {topN_mean, topN_std, mfr_mean, mfr_std, mar_mean, mar_std}}}
all_final_results = {}

# Calculate results for each PID
for PID in PIDS:
    print(f"\nCalculating final results for {PID}...")
    final_results = calculate_average_results_across_runs(all_exp_results[PID])
    all_final_results[PID] = final_results
    
    # Save individual PID comprehensive results to CSV
    print(f"Saving comprehensive results for {PID}...")
    df_results = save_results_to_csv_for_pid(final_results, PID, PID_OUT_DIRS[PID], "comprehensive_results")
    print(f"Saved {len(df_results)} technique results for {PID}")

print(f"\nCompleted individual project results calculation")

# Calculate combined results across all PIDs
print("\nCalculating combined results across all projects...")
combined_final_results = calculate_combined_results_across_pids(all_final_results)

# Save combined comprehensive results to CSV
print("Saving combined comprehensive results...")
df_combined = save_combined_results_to_csv(combined_final_results, all_final_results, COMBINED_OUT_DIR)
print(f"Saved combined results with {len(df_combined)} technique entries")

Calculating final averaged results for each project...

Calculating final results for Lang...
Saving comprehensive results for Lang...
Results for Lang saved to: /ssd_home/yangheechan/d4j_research_data/attempt_1/Lang/experiment_information_results/Lang_comprehensive_results.csv
Saved 86 technique results for Lang

Calculating final results for Mockito...
Saving comprehensive results for Mockito...
Results for Mockito saved to: /ssd_home/yangheechan/d4j_research_data/attempt_1/Mockito/experiment_information_results/Mockito_comprehensive_results.csv
Saved 86 technique results for Mockito

Completed individual project results calculation

Calculating combined results across all projects...
Saving combined comprehensive results...
Combined results saved to: /ssd_home/yangheechan/d4j_research_data/attempt_1/combined_experiment_results/combined_comprehensive_results.csv
All projects results saved to: /ssd_home/yangheechan/d4j_research_data/attempt_1/combined_experiment_results/all_projects_c

In [9]:
def get_mut_cnt_results_multi_runs(total_exp_results, transition_type, technique="muse"):
    """Get results aggregated across multiple experimental runs for line graphs"""
    all_runs_data = {
        "topN_list": {n: [] for n in TOP_N},
        "mfr_list": [],
        "mar_list": []
    }
    
    # Collect data from all runs
    for rid_key, exp_results in total_exp_results.items():
        run_data = {
            "topN_list": {n: [] for n in TOP_N},
            "mfr_list": [],
            "mar_list": []
        }
        
        for mut_cnt in range(MUT_RANGE[0], MUT_RANGE[1]):
            key = f"mutCnt{mut_cnt}_{TRANSITION_TYPES[transition_type]}_final_{technique}_score_rank"
            
            if key in exp_results:
                topN = exp_results[key]["topN"]
                mfr = exp_results[key]["mfr"]
                mar = exp_results[key]["mar"]

                for n in TOP_N:
                    run_data["topN_list"][n].append(topN[n])
                run_data["mfr_list"].append(mfr)
                run_data["mar_list"].append(mar)
        
        # Add this run's data to all runs
        for n in TOP_N:
            all_runs_data["topN_list"][n].append(run_data["topN_list"][n])
        all_runs_data["mfr_list"].append(run_data["mfr_list"])
        all_runs_data["mar_list"].append(run_data["mar_list"])
    
    # Calculate statistics for each mutant count
    results = {
        "topN_mean": {n: [] for n in TOP_N},
        "topN_std": {n: [] for n in TOP_N},
        "mfr_mean": [],
        "mfr_std": [],
        "mar_mean": [],
        "mar_std": []
    }
    
    mut_count_range = len(range(MUT_RANGE[0], MUT_RANGE[1]))
    for mut_idx in range(mut_count_range):
        # TopN statistics
        for n in TOP_N:
            values = [run_data[mut_idx] for run_data in all_runs_data["topN_list"][n]]
            results["topN_mean"][n].append(np.mean(values))
            results["topN_std"][n].append(np.std(values))
        
        # MFR statistics
        mfr_values = [run_data[mut_idx] for run_data in all_runs_data["mfr_list"]]
        results["mfr_mean"].append(np.mean(mfr_values))
        results["mfr_std"].append(np.std(mfr_values))
        
        # MAR statistics
        mar_values = [run_data[mut_idx] for run_data in all_runs_data["mar_list"]]
        results["mar_mean"].append(np.mean(mar_values))
        results["mar_std"].append(np.std(mar_values))
    
    return results

def plot_topN_line_graphs(total_exp_results, save_graphs=True):
    """Plot line graphs showing mutation count trends with variance"""
    
    if save_graphs:
        graphs_dir = os.path.join(OUT_DIR, "graphs")
        os.makedirs(graphs_dir, exist_ok=True)
    
    for transition_type, transition_key in TRANSITION_TYPES.items():
        plt.figure(figsize=(15, 10))

        # Get results for both techniques
        muse_res = get_mut_cnt_results_multi_runs(total_exp_results, transition_type, technique="muse")
        metal_res = get_mut_cnt_results_multi_runs(total_exp_results, transition_type, technique="metal")

        x_values = list(range(MUT_RANGE[0], MUT_RANGE[1]))

        # Plot Top-N graphs
        for i, top_n in enumerate(TOP_N, 1):
            plt.subplot(2, 2, i)
            
            # MUSE line with variance
            muse_mean = muse_res["topN_mean"][top_n]
            muse_std = muse_res["topN_std"][top_n]
            line1 = plt.plot(x_values, muse_mean, label=f'MUSE', marker='x', linewidth=2)
            muse_color = line1[0].get_color()
            plt.fill_between(x_values, 
                           [m - s for m, s in zip(muse_mean, muse_std)], 
                           [m + s for m, s in zip(muse_mean, muse_std)], 
                           alpha=0.3, color=muse_color, label='MUSE ±1σ')

            
            # METAL line with variance
            metal_mean = metal_res["topN_mean"][top_n]
            metal_std = metal_res["topN_std"][top_n]
            line2 = plt.plot(x_values, metal_mean, label=f'METAL', marker='o', linewidth=2)
            metal_color = line2[0].get_color()
            plt.fill_between(x_values, 
                           [m - s for m, s in zip(metal_mean, metal_std)], 
                           [m + s for m, s in zip(metal_mean, metal_std)], 
                           alpha=0.3, color=metal_color, label='METAL ±1σ')
            
            plt.ylim(0, 10)
            # plt.title(f'Top-{top_n} ({transition_type})', fontsize=14)
            plt.title(f'Top-{top_n}', fontsize=14)
            plt.xlabel('# of Mutants per line', fontsize=12)
            plt.ylabel(f'Top-{top_n}', fontsize=12)
            plt.legend(fontsize=9)
            plt.grid(True, alpha=0.3)
            plt.tick_params(axis='both', which='major', labelsize=10)

        plt.tight_layout()
        
        if save_graphs:
            filename = f"{PID}_topN_{transition_type}_line_graph.png"
            filepath = os.path.join(graphs_dir, filename)
            plt.savefig(filepath, dpi=300, bbox_inches='tight')
            print(f"Line graph saved: {filepath}")
        
        plt.show()

def plot_mfr_mar_line_graphs(total_exp_results, save_graphs=True):
    """Plot MFR and MAR line graphs"""
    
    if save_graphs:
        graphs_dir = os.path.join(OUT_DIR, "graphs")
        os.makedirs(graphs_dir, exist_ok=True)
    
    for transition_type, transition_key in TRANSITION_TYPES.items():
        plt.figure(figsize=(12, 4))

        # Get results for both techniques
        muse_res = get_mut_cnt_results_multi_runs(total_exp_results, transition_type, technique="muse")
        metal_res = get_mut_cnt_results_multi_runs(total_exp_results, transition_type, technique="metal")

        x_values = list(range(MUT_RANGE[0], MUT_RANGE[1]))

        # Plot MFR
        plt.subplot(1, 2, 1)
        line1 = plt.plot(x_values, muse_res["mfr_mean"], label='MFR MUSE', marker='x', linewidth=2)
        muse_color = line1[0].get_color()
        plt.fill_between(x_values, 
                       [m - s for m, s in zip(muse_res["mfr_mean"], muse_res["mfr_std"])], 
                       [m + s for m, s in zip(muse_res["mfr_mean"], muse_res["mfr_std"])], 
                       alpha=0.3, color=muse_color, label='MUSE ±1σ')
        
        line2 = plt.plot(x_values, metal_res["mfr_mean"], label='MFR METAL', marker='o', linewidth=2)
        metal_color = line2[0].get_color()
        plt.fill_between(x_values, 
                       [m - s for m, s in zip(metal_res["mfr_mean"], metal_res["mfr_std"])], 
                       [m + s for m, s in zip(metal_res["mfr_mean"], metal_res["mfr_std"])], 
                       alpha=0.3, color=metal_color, label='METAL ±1σ')
        
        plt.title(f'MFR ({transition_type})', fontsize=14)
        plt.xlabel('# of Mutants per line', fontsize=12)
        plt.ylabel('MFR', fontsize=12)
        plt.legend(fontsize=9)
        plt.grid(True, alpha=0.3)

        # Plot MAR
        plt.subplot(1, 2, 2)
        line1 = plt.plot(x_values, muse_res["mar_mean"], label='MAR MUSE', marker='x', linewidth=2)
        muse_color = line1[0].get_color()
        plt.fill_between(x_values, 
                       [m - s for m, s in zip(muse_res["mar_mean"], muse_res["mar_std"])], 
                       [m + s for m, s in zip(muse_res["mar_mean"], muse_res["mar_std"])], 
                       alpha=0.3, color=muse_color, label='MUSE ±1σ')
        
        line2 = plt.plot(x_values, metal_res["mar_mean"], label='MAR METAL', marker='o', linewidth=2)
        metal_color = line2[0].get_color()
        plt.fill_between(x_values, 
                       [m - s for m, s in zip(metal_res["mar_mean"], metal_res["mar_std"])], 
                       [m + s for m, s in zip(metal_res["mar_mean"], metal_res["mar_std"])], 
                       alpha=0.3, color=metal_color, label='METAL ±1σ')
        
        plt.title(f'MAR ({transition_type})', fontsize=14)
        plt.xlabel('# of Mutants per line', fontsize=12)
        plt.ylabel('MAR', fontsize=12)
        plt.legend(fontsize=9)
        plt.grid(True, alpha=0.3)

        plt.tight_layout()
        
        if save_graphs:
            filename = f"{PID}_mfr_mar_{transition_type}_line_graph.png"
            filepath = os.path.join(graphs_dir, filename)
            plt.savefig(filepath, dpi=300, bbox_inches='tight')
            print(f"MFR/MAR line graph saved: {filepath}")
        
        plt.show()

In [10]:
def create_key_techniques_comparison_for_pid(final_results, pid, out_dir):
    """Create a focused comparison table for key techniques for a specific PID"""
    
    # Define key techniques to compare
    key_techniques = {
        # SBFL techniques
        **{f"{sbfl}_rank": f"{sbfl.upper()}" for sbfl in SBFL_FORMULA},
        # Best MBFL techniques (mutCnt10 with result_transition)
        "mutCnt10_result_transition_final_muse_score_rank": "MUSE (mutCnt10)",
        "mutCnt10_result_transition_final_metal_score_rank": "METAL (mutCnt10)"
    }
    
    comparison_data = []
    for technique_key, display_name in key_techniques.items():
        if technique_key in final_results:
            results = final_results[technique_key]
            category = "SBFL" if any(sbfl in technique_key for sbfl in SBFL_FORMULA) else "MBFL"
            
            row = {
                "PID": pid,
                "Category": category,
                "Technique": display_name,
                "Top-1": f"{results['topN_mean'][1]:.1f} ± {results['topN_std'][1]:.1f}",
                "Top-3": f"{results['topN_mean'][3]:.1f} ± {results['topN_std'][3]:.1f}",
                "Top-5": f"{results['topN_mean'][5]:.1f} ± {results['topN_std'][5]:.1f}",
                "Top-10": f"{results['topN_mean'][10]:.1f} ± {results['topN_std'][10]:.1f}",
                "MFR": f"{results['mfr_mean']:.1f} ± {results['mfr_std']:.1f}",
                "MAR": f"{results['mar_mean']:.1f} ± {results['mar_std']:.1f}"
            }
            comparison_data.append(row)
    
    comparison_df = pd.DataFrame(comparison_data)
    
    # Save the comparison table
    comparison_path = os.path.join(out_dir, f"{pid}_key_techniques_comparison.csv")
    comparison_df.to_csv(comparison_path, index=False)
    
    print(f"Key techniques comparison for {pid} saved to: {comparison_path}")
    return comparison_df

def create_combined_key_techniques_comparison(combined_final_results, all_final_results, out_dir):
    """Create a comprehensive comparison table including individual PIDs and combined results"""
    
    # Define key techniques to compare
    key_techniques = {
        # SBFL techniques
        **{f"{sbfl}_rank": f"{sbfl.upper()}" for sbfl in SBFL_FORMULA},
        # Best MBFL techniques (mutCnt10 with result_transition)
        "mutCnt10_result_transition_final_muse_score_rank": "MUSE (mutCnt10)",
        "mutCnt10_result_transition_final_metal_score_rank": "METAL (mutCnt10)"
    }
    
    all_comparison_data = []
    
    # Add combined results
    for technique_key, display_name in key_techniques.items():
        if technique_key in combined_final_results:
            results = combined_final_results[technique_key]
            category = "SBFL" if any(sbfl in technique_key for sbfl in SBFL_FORMULA) else "MBFL"
            
            row = {
                "PID": "COMBINED",
                "Category": category,
                "Technique": display_name,
                "Top-1": f"{results['topN_mean'][1]:.1f} ± {results['topN_std'][1]:.1f}",
                "Top-3": f"{results['topN_mean'][3]:.1f} ± {results['topN_std'][3]:.1f}",
                "Top-5": f"{results['topN_mean'][5]:.1f} ± {results['topN_std'][5]:.1f}",
                "Top-10": f"{results['topN_mean'][10]:.1f} ± {results['topN_std'][10]:.1f}",
                "MFR": f"{results['mfr_mean']:.1f} ± {results['mfr_std']:.1f}",
                "MAR": f"{results['mar_mean']:.1f} ± {results['mar_std']:.1f}"
            }
            all_comparison_data.append(row)
    
    # Add individual PID results
    for pid, pid_results in all_final_results.items():
        for technique_key, display_name in key_techniques.items():
            if technique_key in pid_results:
                results = pid_results[technique_key]
                category = "SBFL" if any(sbfl in technique_key for sbfl in SBFL_FORMULA) else "MBFL"
                
                row = {
                    "PID": pid,
                    "Category": category,
                    "Technique": display_name,
                    "Top-1": f"{results['topN_mean'][1]:.1f} ± {results['topN_std'][1]:.1f}",
                    "Top-3": f"{results['topN_mean'][3]:.1f} ± {results['topN_std'][3]:.1f}",
                    "Top-5": f"{results['topN_mean'][5]:.1f} ± {results['topN_std'][5]:.1f}",
                    "Top-10": f"{results['topN_mean'][10]:.1f} ± {results['topN_std'][10]:.1f}",
                    "MFR": f"{results['mfr_mean']:.1f} ± {results['mfr_std']:.1f}",
                    "MAR": f"{results['mar_mean']:.1f} ± {results['mar_std']:.1f}"
                }
                all_comparison_data.append(row)
    
    all_comparison_df = pd.DataFrame(all_comparison_data)
    
    # Sort by PID and Category
    all_comparison_df = all_comparison_df.sort_values(['PID', 'Category', 'Technique'])
    
    # Save the comparison tables
    combined_comparison_path = os.path.join(out_dir, "combined_key_techniques_comparison.csv")
    all_comparison_path = os.path.join(out_dir, "all_projects_key_techniques_comparison.csv")
    
    # Save combined only
    combined_df = all_comparison_df[all_comparison_df['PID'] == 'COMBINED'].copy()
    combined_df.to_csv(combined_comparison_path, index=False)
    
    # Save all (combined + individual)
    all_comparison_df.to_csv(all_comparison_path, index=False)
    
    print(f"Combined key techniques comparison saved to: {combined_comparison_path}")
    print(f"All projects key techniques comparison saved to: {all_comparison_path}")
    
    return all_comparison_df

In [11]:
# Create key techniques comparison for individual PIDs and combined results
print("Creating key techniques comparisons...")

# Create comparison for each PID
for PID in PIDS:
    print(f"\nCreating key techniques comparison for {PID}...")
    key_comparison_df = create_key_techniques_comparison_for_pid(all_final_results[PID], PID, PID_OUT_DIRS[PID])
    print(f"Key techniques comparison completed for {PID}")

# Create combined key techniques comparison
print(f"\nCreating combined key techniques comparison...")
combined_key_comparison_df = create_combined_key_techniques_comparison(combined_final_results, all_final_results, COMBINED_OUT_DIR)
print("Combined key techniques comparison completed")

Creating key techniques comparisons...

Creating key techniques comparison for Lang...
Key techniques comparison for Lang saved to: /ssd_home/yangheechan/d4j_research_data/attempt_1/Lang/experiment_information_results/Lang_key_techniques_comparison.csv
Key techniques comparison completed for Lang

Creating key techniques comparison for Mockito...
Key techniques comparison for Mockito saved to: /ssd_home/yangheechan/d4j_research_data/attempt_1/Mockito/experiment_information_results/Mockito_key_techniques_comparison.csv
Key techniques comparison completed for Mockito

Creating combined key techniques comparison...
Combined key techniques comparison saved to: /ssd_home/yangheechan/d4j_research_data/attempt_1/combined_experiment_results/combined_key_techniques_comparison.csv
All projects key techniques comparison saved to: /ssd_home/yangheechan/d4j_research_data/attempt_1/combined_experiment_results/all_projects_key_techniques_comparison.csv
Combined key techniques comparison completed


In [12]:
# Function definitions for visualization
def find_best_transition_type_for_technique(all_final_results, technique_pattern, top_n=1):
    """Find the best transition type for MUSE or METAL techniques across all PIDs"""
    best_results = {}
    
    for pid, pid_results in all_final_results.items():
        best_score = 0
        best_transition = None
        best_mut_count = None
        
        # Check all mutation counts and transition types
        for mut_cnt in range(MUT_RANGE[0], MUT_RANGE[1]):
            for transition_type, transition_key in TRANSITION_TYPES.items():
                technique_key = f"mutCnt{mut_cnt}_{transition_key}_final_{technique_pattern}_score_rank"
                
                if technique_key in pid_results:
                    score = pid_results[technique_key]["topN_mean"][top_n]
                    if score > best_score:
                        best_score = score
                        best_transition = transition_type
                        best_mut_count = mut_cnt
        
        if best_transition:
            best_results[pid] = {
                'transition_type': best_transition,
                'transition_key': TRANSITION_TYPES[best_transition],
                'mut_count': best_mut_count,
                'score': best_score,
                'technique_key': f"mutCnt{best_mut_count}_{TRANSITION_TYPES[best_transition]}_final_{technique_pattern}_score_rank"
            }
    
    return best_results

def get_technique_display_name_with_best_transition(technique_key, technique_pattern, best_results, pid):
    """Get display name for MUSE/METAL with best transition type info"""
    if technique_pattern in technique_key and pid in best_results:
        transition_type = best_results[pid]['transition_type']
        mut_count = best_results[pid]['mut_count']
        return f"{technique_pattern.upper()} ({transition_type}, mutCnt{mut_count})"
    elif technique_pattern in technique_key:
        return f"{technique_pattern.upper()}"
    else:
        return technique_key.replace("_rank", "").upper()

def generate_individual_pid_visualizations(total_exp_results, final_results, pid, graphs_dir):
    """Generate all visualizations for a single PID"""
    
    print(f"Generating line graphs for {pid}...")
    plot_topN_line_graphs_for_pid(total_exp_results, pid, graphs_dir)
    plot_mfr_mar_line_graphs_for_pid(total_exp_results, pid, graphs_dir)
    
    print(f"Generating bar graphs for {pid}...")
    create_techniques_topn_bar_graphs_for_pid(final_results, pid, graphs_dir)
    create_techniques_mfr_mar_bar_graphs_for_pid(final_results, pid, graphs_dir)

def generate_combined_visualizations(combined_final_results, all_final_results, graphs_dir):
    """Generate combined visualizations across all PIDs"""
    
    print("Generating combined comparison bar graphs...")
    create_combined_comparison_bar_graphs(combined_final_results, all_final_results, graphs_dir)
    
    print("Generating PID comparison visualizations...")
    create_pid_comparison_visualizations(all_final_results, graphs_dir)

def plot_topN_line_graphs_for_pid(total_exp_results, pid, graphs_dir):
    """Plot Top-N line graphs for a specific PID"""
    
    # Find best transition types for MUSE and METAL
    muse_best = find_best_transition_type_for_technique({pid: all_final_results[pid]}, "muse", top_n=1)
    metal_best = find_best_transition_type_for_technique({pid: all_final_results[pid]}, "metal", top_n=1)
    
    if pid in muse_best:
        best_transition_type = muse_best[pid]['transition_type']
        transition_key = muse_best[pid]['transition_key']
        
        plt.figure(figsize=(15, 10))

        # Get results for both techniques using the best transition type
        muse_res = get_mut_cnt_results_multi_runs(total_exp_results, best_transition_type, technique="muse")
        metal_res = get_mut_cnt_results_multi_runs(total_exp_results, best_transition_type, technique="metal")

        x_values = list(range(MUT_RANGE[0], MUT_RANGE[1]))

        # Plot Top-N graphs
        for i, top_n in enumerate(TOP_N, 1):
            plt.subplot(2, 2, i)
            
            # MUSE line with variance
            muse_mean = muse_res["topN_mean"][top_n]
            muse_std = muse_res["topN_std"][top_n]
            line1 = plt.plot(x_values, muse_mean, label=f'MUSE ({best_transition_type})', marker='x', linewidth=2)
            muse_color = line1[0].get_color()
            plt.fill_between(x_values, 
                           [m - s for m, s in zip(muse_mean, muse_std)], 
                           [m + s for m, s in zip(muse_mean, muse_std)], 
                           alpha=0.3, color=muse_color, label='MUSE ±1σ')

            # METAL line with variance
            metal_mean = metal_res["topN_mean"][top_n]
            metal_std = metal_res["topN_std"][top_n]
            line2 = plt.plot(x_values, metal_mean, label=f'METAL ({best_transition_type})', marker='o', linewidth=2)
            metal_color = line2[0].get_color()
            plt.fill_between(x_values, 
                           [m - s for m, s in zip(metal_mean, metal_std)], 
                           [m + s for m, s in zip(metal_mean, metal_std)], 
                           alpha=0.3, color=metal_color, label='METAL ±1σ')
            
            plt.ylim(0, 10)
            plt.title(f'Top-{top_n} ({pid})', fontsize=14)
            plt.xlabel('# of Mutants per line', fontsize=12)
            plt.ylabel(f'Top-{top_n}', fontsize=12)
            plt.legend(fontsize=9)
            plt.grid(True, alpha=0.3)
            plt.tick_params(axis='both', which='major', labelsize=10)

        plt.tight_layout()
        
        filename = f"{pid}_topN_{best_transition_type}_line_graph.png"
        filepath = os.path.join(graphs_dir, filename)
        plt.savefig(filepath, dpi=300, bbox_inches='tight')
        plt.close()
        
        print(f"Line graph saved for {pid} using best transition type: {best_transition_type}")

def plot_mfr_mar_line_graphs_for_pid(total_exp_results, pid, graphs_dir):
    """Plot MFR and MAR line graphs for a specific PID"""
    
    # Find best transition types for MUSE and METAL
    muse_best = find_best_transition_type_for_technique({pid: all_final_results[pid]}, "muse", top_n=1)
    metal_best = find_best_transition_type_for_technique({pid: all_final_results[pid]}, "metal", top_n=1)
    
    if pid in muse_best:
        best_transition_type = muse_best[pid]['transition_type']
        
        plt.figure(figsize=(12, 4))

        # Get results for both techniques using the best transition type
        muse_res = get_mut_cnt_results_multi_runs(total_exp_results, best_transition_type, technique="muse")
        metal_res = get_mut_cnt_results_multi_runs(total_exp_results, best_transition_type, technique="metal")

        x_values = list(range(MUT_RANGE[0], MUT_RANGE[1]))

        # Plot MFR
        plt.subplot(1, 2, 1)
        line1 = plt.plot(x_values, muse_res["mfr_mean"], label=f'MFR MUSE ({best_transition_type})', marker='x', linewidth=2)
        muse_color = line1[0].get_color()
        plt.fill_between(x_values, 
                       [m - s for m, s in zip(muse_res["mfr_mean"], muse_res["mfr_std"])], 
                       [m + s for m, s in zip(muse_res["mfr_mean"], muse_res["mfr_std"])], 
                       alpha=0.3, color=muse_color, label='MUSE ±1σ')
        
        line2 = plt.plot(x_values, metal_res["mfr_mean"], label=f'MFR METAL ({best_transition_type})', marker='o', linewidth=2)
        metal_color = line2[0].get_color()
        plt.fill_between(x_values, 
                       [m - s for m, s in zip(metal_res["mfr_mean"], metal_res["mfr_std"])], 
                       [m + s for m, s in zip(metal_res["mfr_mean"], metal_res["mfr_std"])], 
                       alpha=0.3, color=metal_color, label='METAL ±1σ')
        
        plt.title(f'MFR ({pid} - {best_transition_type})', fontsize=14)
        plt.xlabel('# of Mutants per line', fontsize=12)
        plt.ylabel('MFR', fontsize=12)
        plt.legend(fontsize=9)
        plt.grid(True, alpha=0.3)

        # Plot MAR
        plt.subplot(1, 2, 2)
        line1 = plt.plot(x_values, muse_res["mar_mean"], label=f'MAR MUSE ({best_transition_type})', marker='x', linewidth=2)
        muse_color = line1[0].get_color()
        plt.fill_between(x_values, 
                       [m - s for m, s in zip(muse_res["mar_mean"], muse_res["mar_std"])], 
                       [m + s for m, s in zip(muse_res["mar_mean"], muse_res["mar_std"])], 
                       alpha=0.3, color=muse_color, label='MUSE ±1σ')
        
        line2 = plt.plot(x_values, metal_res["mar_mean"], label=f'MAR METAL ({best_transition_type})', marker='o', linewidth=2)
        metal_color = line2[0].get_color()
        plt.fill_between(x_values, 
                       [m - s for m, s in zip(metal_res["mar_mean"], metal_res["mar_std"])], 
                       [m + s for m, s in zip(metal_res["mar_mean"], metal_res["mar_std"])], 
                       alpha=0.3, color=metal_color, label='METAL ±1σ')
        
        plt.title(f'MAR ({pid} - {best_transition_type})', fontsize=14)
        plt.xlabel('# of Mutants per line', fontsize=12)
        plt.ylabel('MAR', fontsize=12)
        plt.legend(fontsize=9)
        plt.grid(True, alpha=0.3)

        plt.tight_layout()
        
        filename = f"{pid}_mfr_mar_{best_transition_type}_line_graph.png"
        filepath = os.path.join(graphs_dir, filename)
        plt.savefig(filepath, dpi=300, bbox_inches='tight')
        plt.close()
        
        print(f"MFR/MAR graph saved for {pid} using best transition type: {best_transition_type}")

def create_techniques_topn_bar_graphs_for_pid(final_results, pid, graphs_dir):
    """Create Top-N bar graphs for all techniques for a specific PID"""
    
    # Find best transition types for MUSE and METAL
    muse_best = find_best_transition_type_for_technique({pid: all_final_results[pid]}, "muse", top_n=1)
    metal_best = find_best_transition_type_for_technique({pid: all_final_results[pid]}, "metal", top_n=1)
    
    # Separate SBFL and MBFL techniques
    sbfl_techniques = {}
    mbfl_techniques = {}
    
    for technique_key, results in final_results.items():
        if any(sbfl in technique_key for sbfl in SBFL_FORMULA):
            display_name = technique_key.replace("_rank", "").upper()
            sbfl_techniques[display_name] = results
        else:
            # Only show the best performing MUSE and METAL techniques
            if pid in muse_best and technique_key == muse_best[pid]['technique_key']:
                display_name = get_technique_display_name_with_best_transition(technique_key, "muse", muse_best, pid)
                mbfl_techniques[display_name] = results
            elif pid in metal_best and technique_key == metal_best[pid]['technique_key']:
                display_name = get_technique_display_name_with_best_transition(technique_key, "metal", metal_best, pid)
                mbfl_techniques[display_name] = results
    
    # Create Top-N bar graphs
    fig, axes = plt.subplots(2, 2, figsize=(20, 12))
    axes = axes.flatten()
    
    # Combine all techniques for comparison
    all_techniques = {**sbfl_techniques, **mbfl_techniques}
    technique_names = list(all_techniques.keys())
    
    colors = ['lightblue' if name in sbfl_techniques else 'lightcoral' for name in technique_names]
    
    for i, top_n in enumerate(TOP_N):
        means = [all_techniques[name]["topN_mean"][top_n] for name in technique_names]
        stds = [all_techniques[name]["topN_std"][top_n] for name in technique_names]
        
        x_pos = np.arange(len(technique_names))
        
        bars = axes[i].bar(x_pos, means, yerr=stds, capsize=5, alpha=0.8, color=colors)
        axes[i].set_xlabel('Techniques', fontsize=12)
        axes[i].set_ylabel(f'Top-{top_n} Count', fontsize=12)
        axes[i].set_title(f'{pid} - Top-{top_n} Performance (Best Transition Types)', fontsize=14)
        axes[i].set_xticks(x_pos)
        axes[i].set_xticklabels(technique_names, rotation=45, ha='right')
        axes[i].grid(True, alpha=0.3)
        axes[i].tick_params(axis='both', which='major', labelsize=10)
        
        # Add legend
        if i == 0:
            from matplotlib.patches import Patch
            legend_elements = [Patch(facecolor='lightblue', label='SBFL'),
                             Patch(facecolor='lightcoral', label='MBFL (Best Transition)')]
            axes[i].legend(handles=legend_elements, loc='upper right')
    
    plt.tight_layout()
    
    filename = f"{pid}_techniques_topN_best_transition_comparison.png"
    filepath = os.path.join(graphs_dir, filename)
    plt.savefig(filepath, dpi=300, bbox_inches='tight')
    plt.close()

def create_techniques_mfr_mar_bar_graphs_for_pid(final_results, pid, graphs_dir):
    """Create MFR and MAR bar graphs for all techniques for a specific PID"""
    
    # Find best transition types for MUSE and METAL
    muse_best = find_best_transition_type_for_technique({pid: all_final_results[pid]}, "muse", top_n=1)
    metal_best = find_best_transition_type_for_technique({pid: all_final_results[pid]}, "metal", top_n=1)
    
    # Separate SBFL and MBFL techniques
    sbfl_techniques = {}
    mbfl_techniques = {}
    
    for technique_key, results in final_results.items():
        if any(sbfl in technique_key for sbfl in SBFL_FORMULA):
            display_name = technique_key.replace("_rank", "").upper()
            sbfl_techniques[display_name] = results
        else:
            # Only show the best performing MUSE and METAL techniques
            if pid in muse_best and technique_key == muse_best[pid]['technique_key']:
                display_name = get_technique_display_name_with_best_transition(technique_key, "muse", muse_best, pid)
                mbfl_techniques[display_name] = results
            elif pid in metal_best and technique_key == metal_best[pid]['technique_key']:
                display_name = get_technique_display_name_with_best_transition(technique_key, "metal", metal_best, pid)
                mbfl_techniques[display_name] = results
    
    # Create MFR and MAR bar graphs
    fig, axes = plt.subplots(1, 2, figsize=(16, 6))
    
    # Combine all techniques for comparison
    all_techniques = {**sbfl_techniques, **mbfl_techniques}
    technique_names = list(all_techniques.keys())
    
    colors = ['lightblue' if name in sbfl_techniques else 'lightcoral' for name in technique_names]
    
    x_pos = np.arange(len(technique_names))
    
    # MFR graph
    mfr_means = [all_techniques[name]["mfr_mean"] for name in technique_names]
    mfr_stds = [all_techniques[name]["mfr_std"] for name in technique_names]
    
    bars = axes[0].bar(x_pos, mfr_means, yerr=mfr_stds, capsize=5, alpha=0.8, color=colors)
    axes[0].set_xlabel('Techniques', fontsize=12)
    axes[0].set_ylabel('MFR', fontsize=12)
    axes[0].set_title(f'{pid} - MFR Performance (Best Transition Types)', fontsize=14)
    axes[0].set_xticks(x_pos)
    axes[0].set_xticklabels(technique_names, rotation=45, ha='right')
    axes[0].grid(True, alpha=0.3)
    axes[0].tick_params(axis='both', which='major', labelsize=10)
    
    # MAR graph
    mar_means = [all_techniques[name]["mar_mean"] for name in technique_names]
    mar_stds = [all_techniques[name]["mar_std"] for name in technique_names]
    
    bars = axes[1].bar(x_pos, mar_means, yerr=mar_stds, capsize=5, alpha=0.8, color=colors)
    axes[1].set_xlabel('Techniques', fontsize=12)
    axes[1].set_ylabel('MAR', fontsize=12)
    axes[1].set_title(f'{pid} - MAR Performance (Best Transition Types)', fontsize=14)
    axes[1].set_xticks(x_pos)
    axes[1].set_xticklabels(technique_names, rotation=45, ha='right')
    axes[1].grid(True, alpha=0.3)
    axes[1].tick_params(axis='both', which='major', labelsize=10)
    
    # Add legend
    from matplotlib.patches import Patch
    legend_elements = [Patch(facecolor='lightblue', label='SBFL'),
                     Patch(facecolor='lightcoral', label='MBFL (Best Transition)')]
    axes[0].legend(handles=legend_elements, loc='upper right')
    
    plt.tight_layout()
    
    filename = f"{pid}_techniques_mfr_mar_best_transition_comparison.png"
    filepath = os.path.join(graphs_dir, filename)
    plt.savefig(filepath, dpi=300, bbox_inches='tight')
    plt.close()

def create_combined_comparison_bar_graphs(combined_final_results, all_final_results, graphs_dir):
    """Create comparison bar graphs showing combined results vs individual PIDs"""
    
    # Define key techniques to compare
    key_techniques = {
        **{f"{sbfl}_rank": f"{sbfl.upper()}" for sbfl in SBFL_FORMULA},
        "mutCnt10_result_transition_final_muse_score_rank": "MUSE",
        "mutCnt10_result_transition_final_metal_score_rank": "METAL"
    }
    
    # Create comparison for Top-N metrics
    fig, axes = plt.subplots(2, 2, figsize=(20, 12))
    axes = axes.flatten()
    
    for i, top_n in enumerate(TOP_N):
        # Prepare data
        technique_names = []
        combined_means = []
        pid_means = {pid: [] for pid in PIDS}
        
        for technique_key, display_name in key_techniques.items():
            if technique_key in combined_final_results:
                technique_names.append(display_name)
                combined_means.append(combined_final_results[technique_key]["topN_mean"][top_n])
                
                for pid in PIDS:
                    if technique_key in all_final_results[pid]:
                        pid_means[pid].append(all_final_results[pid][technique_key]["topN_mean"][top_n])
                    else:
                        pid_means[pid].append(0)
        
        # Create grouped bar chart
        x = np.arange(len(technique_names))
        width = 0.25
        
        # Plot bars
        axes[i].bar(x - width, combined_means, width, label='Combined', alpha=0.8, color='darkgreen')
        
        colors = ['lightblue', 'lightcoral', 'lightgreen', 'lightyellow']
        for j, pid in enumerate(PIDS):
            axes[i].bar(x + j*width, pid_means[pid], width, label=pid, alpha=0.8, color=colors[j % len(colors)])
        
        axes[i].set_xlabel('Techniques', fontsize=12)
        axes[i].set_ylabel(f'Top-{top_n}', fontsize=12)
        axes[i].set_title(f'Top-{top_n} Comparison: Combined vs Individual PIDs', fontsize=14)
        axes[i].set_xticks(x)
        axes[i].set_xticklabels(technique_names, rotation=45, ha='right')
        axes[i].legend()
        axes[i].grid(True, alpha=0.3)
    
    plt.tight_layout()
    
    filename = "combined_vs_individual_topN_comparison.png"
    filepath = os.path.join(graphs_dir, filename)
    plt.savefig(filepath, dpi=300, bbox_inches='tight')
    plt.close()

def create_pid_comparison_visualizations(all_final_results, graphs_dir):
    """Create visualizations comparing performance across different PIDs"""
    
    # Define key techniques to compare
    key_techniques = {
        **{f"{sbfl}_rank": f"{sbfl.upper()}" for sbfl in SBFL_FORMULA},
        "mutCnt10_result_transition_final_muse_score_rank": "MUSE",
        "mutCnt10_result_transition_final_metal_score_rank": "METAL"
    }
    
    # Create heatmap showing Top-1 performance across PIDs and techniques
    fig, ax = plt.subplots(1, 1, figsize=(12, 8))
    
    # Prepare data for heatmap
    heatmap_data = []
    technique_labels = []
    
    for technique_key, display_name in key_techniques.items():
        row_data = []
        for pid in PIDS:
            if technique_key in all_final_results[pid]:
                row_data.append(all_final_results[pid][technique_key]["topN_mean"][1])
            else:
                row_data.append(0)
        heatmap_data.append(row_data)
        technique_labels.append(display_name)
    
    # Create heatmap
    heatmap_data = np.array(heatmap_data)
    im = ax.imshow(heatmap_data, cmap='viridis', aspect='auto')
    
    # Set labels
    ax.set_xticks(np.arange(len(PIDS)))
    ax.set_yticks(np.arange(len(technique_labels)))
    ax.set_xticklabels(PIDS)
    ax.set_yticklabels(technique_labels)
    
    # Add colorbar
    plt.colorbar(im, ax=ax, label='Top-1 Performance')
    
    # Add text annotations
    for i in range(len(technique_labels)):
        for j in range(len(PIDS)):
            text = ax.text(j, i, f'{heatmap_data[i, j]:.1f}',
                         ha="center", va="center", color="white", fontsize=10)
    
    ax.set_title('Top-1 Performance Heatmap: Techniques vs PIDs', fontsize=16)
    ax.set_xlabel('Projects (PIDs)', fontsize=14)
    ax.set_ylabel('Techniques', fontsize=14)
    
    plt.tight_layout()
    
    filename = "pid_technique_heatmap_top1.png"
    filepath = os.path.join(graphs_dir, filename)
    plt.savefig(filepath, dpi=300, bbox_inches='tight')
    plt.close()

# Generate visualizations for all PIDs and combined results
print("Generating visualizations for all projects...")

# Generate visualizations for each PID individually
for PID in PIDS:
    print(f"\nGenerating visualizations for {PID}...")
    
    # Create PID-specific graphs directory
    pid_graphs_dir = os.path.join(PID_OUT_DIRS[PID], "graphs")
    os.makedirs(pid_graphs_dir, exist_ok=True)
    
    # Generate individual PID visualizations
    generate_individual_pid_visualizations(all_exp_results[PID], all_final_results[PID], PID, pid_graphs_dir)
    print(f"Completed visualizations for {PID}")

# Generate combined visualizations
print(f"\nGenerating combined visualizations across all projects...")
combined_graphs_dir = os.path.join(COMBINED_OUT_DIR, "graphs")
os.makedirs(combined_graphs_dir, exist_ok=True)

generate_combined_visualizations(combined_final_results, all_final_results, combined_graphs_dir)
print("Completed combined visualizations")

Generating visualizations for all projects...

Generating visualizations for Lang...
Generating line graphs for Lang...
Line graph saved for Lang using best transition type: type1
Line graph saved for Lang using best transition type: type1
MFR/MAR graph saved for Lang using best transition type: type1
Generating bar graphs for Lang...
MFR/MAR graph saved for Lang using best transition type: type1
Generating bar graphs for Lang...
Completed visualizations for Lang

Generating visualizations for Mockito...
Generating line graphs for Mockito...
Completed visualizations for Lang

Generating visualizations for Mockito...
Generating line graphs for Mockito...
Line graph saved for Mockito using best transition type: type2
Line graph saved for Mockito using best transition type: type2
MFR/MAR graph saved for Mockito using best transition type: type2
Generating bar graphs for Mockito...
MFR/MAR graph saved for Mockito using best transition type: type2
Generating bar graphs for Mockito...
Comple

# Multi-PID Analysis Summary

This notebook has been enhanced to process and analyze experimental results across multiple projects (PIDs) while preserving individual project results and generating comprehensive combined analyses.

## 🚀 **Enhanced Multi-PID Features**

### 1. **Multi-Project Processing**
- **Individual Analysis**: Each PID (Lang, Mockito) is processed separately with its own results
- **Combined Analysis**: Results are aggregated across all PIDs to show overall performance
- **Preserved Structure**: Individual project results are maintained for detailed analysis

### 2. **Comprehensive Output Structure**
```
Individual Project Results:
├── /RESEARCH_DATA/attempt_1/Lang/experiment_information_results/
│   ├── Lang_faulty_line_ranks.json
│   ├── Lang_comprehensive_results.csv
│   ├── Lang_key_techniques_comparison.csv
│   └── graphs/
│       ├── Lang_topN_*.png
│       ├── Lang_mfr_mar_*.png
│       └── Lang_techniques_*.png

├── /RESEARCH_DATA/attempt_1/Mockito/experiment_information_results/
│   └── (similar structure for Mockito)

Combined Results:
└── /RESEARCH_DATA/attempt_1/combined_experiment_results/
    ├── combined_comprehensive_results.csv
    ├── all_projects_comprehensive_results.csv
    ├── combined_key_techniques_comparison.csv
    ├── all_projects_key_techniques_comparison.csv
    └── graphs/
        ├── combined_vs_individual_topN_comparison.png
        ├── pid_technique_heatmap_top1.png
        └── other combined visualizations
```

### 3. **Advanced Visualizations**
- **Individual Project Graphs**: Line graphs and bar charts for each PID
- **Combined Comparison Graphs**: Side-by-side comparisons of combined vs individual results
- **Cross-Project Heatmaps**: Performance comparison matrices across PIDs and techniques
- **Comprehensive Coverage**: Top-N, MFR, MAR metrics for all techniques

### 4. **Data Analysis Capabilities**
- **Statistical Aggregation**: Mean and standard deviation calculations across PIDs
- **Performance Comparison**: Direct comparison between different projects
- **Technique Evaluation**: Comprehensive assessment of SBFL and MBFL techniques
- **Export Flexibility**: Multiple CSV formats for different analysis needs

## 📊 **Generated Outputs**

### Individual Project Outputs (for each PID):
1. **JSON Files**: Raw experimental data with faulty line ranks
2. **CSV Files**: Comprehensive and key techniques comparison tables
3. **Visualizations**: Line graphs (mutation trends) and bar charts (technique comparison)

### Combined Analysis Outputs:
1. **Combined CSV**: Aggregated results across all projects
2. **Comparison CSV**: Side-by-side individual and combined results
3. **Advanced Visualizations**: Cross-project comparisons and heatmaps
4. **Summary Tables**: Key techniques performance across all projects

## ⚡ **Performance & Scalability**
- **Efficient Processing**: Optimized data structures for multi-project analysis
- **Memory Management**: Proper handling of large datasets across multiple PIDs
- **Extensible Design**: Easy to add new PIDs to the analysis
- **Robust Error Handling**: Graceful handling of missing data for individual projects

## 🔍 **Research Benefits**
- **Comprehensive Analysis**: Complete view of technique performance across different projects
- **Statistical Rigor**: Proper aggregation and variance calculation across projects
- **Comparative Insights**: Clear understanding of technique consistency across different codebases
- **Publication Ready**: High-quality visualizations and comprehensive data tables for research papers

In [13]:
# Final Results Summary
print("="*80)
print("MULTI-PID EXPERIMENTAL RESULTS SUMMARY")
print("="*80)

print(f"\n📊 PROJECTS ANALYZED: {len(PIDS)} projects")
for i, pid in enumerate(PIDS, 1):
    print(f"  {i}. {pid}")

print(f"\n📈 EXPERIMENTAL RUNS: {RID_RANGE[1] - RID_RANGE[0]} runs per project")
print(f"📋 TECHNIQUES EVALUATED:")
print(f"  • SBFL Techniques: {len(SBFL_FORMULA)} ({', '.join(SBFL_FORMULA)})")
print(f"  • MBFL Techniques: {len(TRANSITION_TYPES)} transition types × {MUT_RANGE[1] - MUT_RANGE[0]} mutation counts × 2 methods")

print(f"\n📁 OUTPUT DIRECTORIES:")
print(f"  • Individual Results: {len(PIDS)} directories (one per project)")
print(f"  • Combined Results: {COMBINED_OUT_DIR}")

print(f"\n📊 GENERATED FILES:")
total_files = 0
for pid in PIDS:
    individual_files = [
        f"{pid}_faulty_line_ranks.json",
        f"{pid}_comprehensive_results.csv", 
        f"{pid}_key_techniques_comparison.csv"
    ]
    total_files += len(individual_files)
    print(f"  • {pid}: {len(individual_files)} data files + graphs")

combined_files = [
    "combined_comprehensive_results.csv",
    "all_projects_comprehensive_results.csv", 
    "combined_key_techniques_comparison.csv",
    "all_projects_key_techniques_comparison.csv"
]
total_files += len(combined_files)
print(f"  • Combined: {len(combined_files)} data files + graphs")
print(f"  • Total Data Files: {total_files}")

print(f"\n🎯 KEY METRICS CALCULATED:")
print(f"  • Top-N Performance: {TOP_N}")
print(f"  • Mean First Rank (MFR): Average rank of first faulty statement")
print(f"  • Mean Average Rank (MAR): Average rank of all faulty statements")

print(f"\n📈 VISUALIZATIONS GENERATED:")
viz_types = [
    "Top-N line graphs (mutation trends)",
    "MFR/MAR line graphs", 
    "Top-N bar charts (technique comparison)",
    "MFR/MAR bar charts",
    "Combined vs individual comparison",
    "Cross-project heatmaps"
]
for viz in viz_types:
    print(f"  • {viz}")

print(f"\n✅ ANALYSIS COMPLETED SUCCESSFULLY!")
print(f"All results saved and ready for research analysis.")
print("="*80)

MULTI-PID EXPERIMENTAL RESULTS SUMMARY

📊 PROJECTS ANALYZED: 2 projects
  1. Lang
  2. Mockito

📈 EXPERIMENTAL RUNS: 10 runs per project
📋 TECHNIQUES EVALUATED:
  • SBFL Techniques: 6 (tarantula, ochiai, dstar, naish1, naish2, gp13)
  • MBFL Techniques: 4 transition types × 10 mutation counts × 2 methods

📁 OUTPUT DIRECTORIES:
  • Individual Results: 2 directories (one per project)
  • Combined Results: /ssd_home/yangheechan/d4j_research_data/attempt_1/combined_experiment_results

📊 GENERATED FILES:
  • Lang: 3 data files + graphs
  • Mockito: 3 data files + graphs
  • Combined: 4 data files + graphs
  • Total Data Files: 10

🎯 KEY METRICS CALCULATED:
  • Top-N Performance: [1, 3, 5, 10]
  • Mean First Rank (MFR): Average rank of first faulty statement
  • Mean Average Rank (MAR): Average rank of all faulty statements

📈 VISUALIZATIONS GENERATED:
  • Top-N line graphs (mutation trends)
  • MFR/MAR line graphs
  • Top-N bar charts (technique comparison)
  • MFR/MAR bar charts
  • Combin