In [1]:
# change current working directory to the root path
# to enable relative imports
import os
curr_path = os.getcwd()
root_path = os.path.dirname(curr_path)
os.chdir(root_path)

import dotenv
dotenv.load_dotenv()

from lib.database import CRUD


# DB_HOST=tester3.kaist.ac.kr
# DB_PORT=15432
# DB_USER=yangheechan
# DB_PASSWORD=yang1234
# DB=yangheechan
db = CRUD(
    host=os.getenv("DB_HOST"),
    port=os.getenv("DB_PORT"),
    user=os.getenv("DB_USER"),
    password=os.getenv("DB_PASSWORD"),
    database=os.getenv("DB"),
    slack_channel=os.getenv("SLACK_CHANNEL"),
    slack_token=os.getenv("SLACK_TOKEN")
)


In [11]:
# merge all bit sequences into a single string
# the index of the bit is 1 if the bit is 1, otherwise 0

def cov_bit_seq(tcs_results):
    bit_val = 0
    for item in tcs_results:
        # print(item)
        bit_seq_str = item[0]
        bit_seq = int(bit_seq_str, 2)  # Convert binary string to integer
        bit_val |= bit_seq  # Use bitwise OR to merge sequences
    return format(bit_val, f'0{len(tcs_results[0][0])}b')  # Convert back to binary string

def get_most_executed_from_fail(tcs_results):
    """
    Increment the count of each line covered by the test cases
    and return a cov_bit_seq_str with top 8 most executed lines,
    if all lines have the same count, return False.
    """
    line_counts = {}
    for item in tcs_results:
        bit_seq_str = item[0]
        for i, bit in enumerate(bit_seq_str):
            if bit == '1':
                if i not in line_counts:
                    line_counts[i] = 0
                line_counts[i] += 1

    # Sort lines by count and get the top 8
    sorted_lines = sorted(line_counts.items(), key=lambda x: x[1], reverse=True)[:8]

    # If all lines have the same count, return False
    if len(set(line_counts.values())) == 1:
        return False

    # Create a new bit sequence string with the top 8 lines set to 1
    cov_bit_seq_str = ['0'] * len(tcs_results[0][0])
    for line_idx, _ in sorted_lines:
        cov_bit_seq_str[line_idx] = '1'

    return ''.join(cov_bit_seq_str)

def get_most_executed_from_pass(tcs_results):
    """
    Increment the count of each line covered by the test cases
    and return a cov_bit_seq_str with bottom 8 most executed lines,
    if all lines have the same count, return False.
    """
    line_counts = {}
    for item in tcs_results:
        bit_seq_str = item[0]
        for i, bit in enumerate(bit_seq_str):
            if bit == '1':
                if i not in line_counts:
                    line_counts[i] = 0
                line_counts[i] += 1

    # Sort lines by count and get the bottom 8
    sorted_lines = sorted(line_counts.items(), key=lambda x: x[1])[:8]

    # If all lines have the same count, return False
    if len(set(line_counts.values())) == 1:
        return False
    else:
        print(f"len of set is {len(set(line_counts.values()))}, not 1")

    # Create a new bit sequence string with the bottom 8 lines set to 1
    cov_bit_seq_str = ['0'] * len(tcs_results[0][0])
    for line_idx, _ in sorted_lines:
        cov_bit_seq_str[line_idx] = '1'

    return ''.join(cov_bit_seq_str)

PID = "Time"
FID = 242
BID = 13

# PASSING TEST CASES
passing_tcs = db.read(
    "d4j_tc_info",
    "line_coverage_bit_sequence",
    conditions = {
        "fault_idx": FID, # This is specific to pid:bid
        "result": 0,
    }
)
print(f"Number of passing test cases: {len(passing_tcs)}")

bit_length = len(passing_tcs[0][0])

passing_tc_covered_bit_seq_str = cov_bit_seq(passing_tcs)

# FAILING TEST CASES
failing_tcs = db.read(
    "d4j_tc_info",
    "line_coverage_bit_sequence",
    conditions = {
        "fault_idx": FID,
        "result": 1,
    }
)
print(f"Number of failing test cases: {len(failing_tcs)}")

failing_tc_covered_bit_seq_str = cov_bit_seq(failing_tcs)

# Lets find the unique lines covered by failing tcs
bit_sequences = []
# failing_tc_covered_bit_seq is the lines covered by failing TCS (vise versa for passing TCS)
# What is the bitwise operation to find the unique lines covered by failing TCS? 
unique_failing_lines = int(failing_tc_covered_bit_seq_str, 2) & ~int(passing_tc_covered_bit_seq_str, 2)
unique_failing_lines_str = format(unique_failing_lines, f'0{bit_length}b')

if "1" not in unique_failing_lines_str:
    unique_failing_lines_str = get_most_executed_from_fail(failing_tcs)

if unique_failing_lines_str is False:
    unique_failing_lines_str = get_most_executed_from_pass(passing_tcs)

if unique_failing_lines_str is False:
    raise ValueError("All lines have the same count, cannot determine unique failing lines.")

ground_truth_line_idx = []
for i, bit in enumerate(unique_failing_lines_str):
    if bit == "1":
        print(f"Bit {i} is set to 1")
        ground_truth_line_idx.append(i)

Number of passing test cases: 850
Number of failing test cases: 1
len of set is 55, not 1
Bit 36 is set to 1
Bit 37 is set to 1
Bit 38 is set to 1
Bit 89 is set to 1
Bit 90 is set to 1
Bit 101 is set to 1
Bit 102 is set to 1
Bit 108 is set to 1


In [12]:
line_infos = db.read(
    "d4j_line_info",
    "line_idx, file, method, line_num",
    conditions={
        "fault_idx": FID,
    }
)

ground_truth_data = {}

# select 8 randomly from ground_truth_line_idx
import random
random.shuffle(ground_truth_line_idx)
ground_truth_line_idx = ground_truth_line_idx[:8]
for gt in ground_truth_line_idx:
    print(f"Ground truth line index: {gt}")

for line_info in line_infos:
    line_idx = line_info[0]
    file = line_info[1]
    method = line_info[2]
    line_num = line_info[3]

    if line_idx in ground_truth_line_idx:
        if file not in ground_truth_data:
            ground_truth_data[file] = {}
        if method not in ground_truth_data[file]:
            ground_truth_data[file][method] = []
        ground_truth_data[file][method].append((line_num, line_idx))

import json
print(json.dumps(ground_truth_data, indent=2))

Ground truth line index: 38
Ground truth line index: 108
Ground truth line index: 36
Ground truth line index: 102
Ground truth line index: 89
Ground truth line index: 90
Ground truth line index: 37
Ground truth line index: 101
{
  "org/joda/time/Period.java": {
    "withMillis(int)": [
      [
        967,
        36
      ],
      [
        968,
        37
      ],
      [
        969,
        38
      ]
    ]
  },
  "org/joda/time/format/FormatUtils.java": {
    "appendUnpaddedInteger(java.lang.StringBuffer,int)": [
      [
        237,
        89
      ],
      [
        238,
        90
      ]
    ],
    "calculateDigitCount(long)": [
      [
        337,
        101
      ],
      [
        338,
        102
      ],
      [
        348,
        108
      ]
    ]
  }
}


In [13]:
gid = 0

for file, methods in ground_truth_data.items():
    print(f"File: {file}")
    for method, line_info in methods.items():
        for line_num, line_idx in line_info:
            # Assuming the description is "FAULT_OF_OMISSION"
            # You can change this to whatever description you want
            gid += 1
            value = [PID, BID, gid, file, method, line_num, line_idx, "FAULT_OF_OMISSION"]
            print(value)
            db.insert(
                "d4j_ground_truth_info",
                "pid, bid, gid, file, method, line, line_idx, description",
                value
            )

File: org/joda/time/Period.java
['Time', 13, 1, 'org/joda/time/Period.java', 'withMillis(int)', 967, 36, 'FAULT_OF_OMISSION']
['Time', 13, 2, 'org/joda/time/Period.java', 'withMillis(int)', 968, 37, 'FAULT_OF_OMISSION']
['Time', 13, 3, 'org/joda/time/Period.java', 'withMillis(int)', 969, 38, 'FAULT_OF_OMISSION']
File: org/joda/time/format/FormatUtils.java
['Time', 13, 4, 'org/joda/time/format/FormatUtils.java', 'appendUnpaddedInteger(java.lang.StringBuffer,int)', 237, 89, 'FAULT_OF_OMISSION']
['Time', 13, 5, 'org/joda/time/format/FormatUtils.java', 'appendUnpaddedInteger(java.lang.StringBuffer,int)', 238, 90, 'FAULT_OF_OMISSION']
['Time', 13, 6, 'org/joda/time/format/FormatUtils.java', 'calculateDigitCount(long)', 337, 101, 'FAULT_OF_OMISSION']
['Time', 13, 7, 'org/joda/time/format/FormatUtils.java', 'calculateDigitCount(long)', 338, 102, 'FAULT_OF_OMISSION']
['Time', 13, 8, 'org/joda/time/format/FormatUtils.java', 'calculateDigitCount(long)', 348, 108, 'FAULT_OF_OMISSION']
