In [1]:
# change current working directory to the root path
# to enable relative imports
import json
import os
curr_path = os.getcwd()
root_path = os.path.dirname(curr_path)
os.chdir(root_path)

import dotenv
dotenv.load_dotenv()

from lib.database import CRUD


# DB_HOST=tester3.kaist.ac.kr
# DB_PORT=15432
# DB_USER=yangheechan
# DB_PASSWORD=yang1234
# DB=yangheechan
db = CRUD(
    host=os.getenv("DB_HOST"),
    port=os.getenv("DB_PORT"),
    user=os.getenv("DB_USER"),
    password=os.getenv("DB_PASSWORD"),
    database=os.getenv("DB"),
    slack_channel=os.getenv("SLACK_CHANNEL"),
    slack_token=os.getenv("SLACK_TOKEN")
)


In [2]:
PID = "Lang"
EL = "timeMeasurement_1"
PARALLEL = 8

In [3]:
#              Column             |  Type   | Collation | Nullable | Default 
# --------------------------------+---------+-----------+----------+---------
#  pid                            | text    |           | not null | 
#  bid                            | integer |           | not null | 
#  experiment_label               | text    |           | not null | 
#  instr_duration_sec             | real    |           | not null | 
#  exec_duration_sec              | real    |           | not null | 
#  process_duration_sec           | real    |           | not null | 
#  num_of_failing_tests           | integer |           | not null | 
#  num_of_passing_tests           | integer |           | not null | 
#  num_of_relevant_tests          | integer |           | not null | 
#  relevant_test_total_time_ms    | real    |           | not null | 
#  num_mutants                    | integer |           | not null | 
#  num_lines_executed_by_fail_tcs | integer |           | not null | 

time_measurement_info = db.read(
    "d4j_time_measurement_info",
    conditions={
        "pid": PID,
        "experiment_label": EL
    }
)

In [4]:
def construct_time_info_dict(time_measurement_info):
    bid2timeInfo = {}
    for row in time_measurement_info:
        bid = row[1]
        bid2timeInfo[bid] = {
            "pid": row[0],
            "bid": bid,
            "experiment_label": row[2],
            "instr_duration_sec": row[3],
            "exec_duration_sec": row[4],
            "process_duration_sec": row[5],
            "num_of_failing_tests": row[6],
            "num_of_passing_tests": row[7],
            "num_of_relevant_tests": row[8],
            "relevant_test_total_time_ms": row[9],
            "num_mutants": row[10],
            "num_lines_executed_by_fail_tcs": row[11]
        }
    return bid2timeInfo

In [5]:
bid2timeInfo = construct_time_info_dict(time_measurement_info)

In [6]:
def measureTimeDiff(bid2timeInfo):
    """
    CORRECTED time measurement calculation
    
    The issue was:
    1. Using relevant_test_total_time_ms instead of exec_duration_sec
    2. Multiplying baseline costs incorrectly
    3. Missing the actual test execution time component
    """
    for bid, timeInfo in bid2timeInfo.items():
        # Per-mutant calculation (CORRECTED approach)
        withCov_per_mutant = (
            timeInfo["instr_duration_sec"] + 
            timeInfo["exec_duration_sec"] + 
            timeInfo["process_duration_sec"]
        )
        withoutCov_per_mutant = timeInfo["exec_duration_sec"]
        
        # Total time for all mutants
        withCov = withCov_per_mutant * timeInfo["num_mutants"]
        withoutCov = withoutCov_per_mutant * timeInfo["num_mutants"]
        
        # Relative time difference (overhead percentage)
        relativeTimeDiff = ((withCov - withoutCov) / withoutCov) * 100
        
        # Store values
        timeInfo["relativeTimeDiff"] = relativeTimeDiff
        timeInfo["secWithCov"] = withCov
        timeInfo["secWithoutCov"] = withoutCov
        timeInfo["minWithCov"] = withCov / 60
        timeInfo["minWithoutCov"] = withoutCov / 60
        timeInfo["hourWithCov"] = withCov / 3600
        timeInfo["hourWithoutCov"] = withoutCov / 3600
        timeInfo["hourWithCovParallel"] = (withCov / 3600) / PARALLEL
        timeInfo["hourWithoutCovParallel"] = (withoutCov / 3600) / PARALLEL

def measureAvgResults(bid2timeInfo):
    results = {
        "fails": [],
        "passes": [],
        "relativeTcs": [],
        "secWithCov": [],
        "secWithoutCov": [],
        "minWithCov": [],
        "minWithoutCov": [],
        "hourWithCov": [],
        "hourWithoutCov": [],
        "hourWithCovParallel": [],
        "hourWithoutCovParallel": [],
        "relativeTimeDiff": [],
        "numMutants": [],
        "linesExecuted": []
    }
    for bid, timeInfo in bid2timeInfo.items():
        results["fails"].append(timeInfo["num_of_failing_tests"])
        results["passes"].append(timeInfo["num_of_passing_tests"])
        results["relativeTcs"].append(timeInfo["num_of_relevant_tests"])
        results["secWithCov"].append(timeInfo["secWithCov"])
        results["secWithoutCov"].append(timeInfo["secWithoutCov"])
        results["minWithCov"].append(timeInfo["minWithCov"])
        results["minWithoutCov"].append(timeInfo["minWithoutCov"])
        results["hourWithCov"].append(timeInfo["hourWithCov"])
        results["hourWithoutCov"].append(timeInfo["hourWithoutCov"])
        results["hourWithCovParallel"].append(timeInfo["hourWithCovParallel"])
        results["hourWithoutCovParallel"].append(timeInfo["hourWithoutCovParallel"])
        results["relativeTimeDiff"].append(timeInfo["relativeTimeDiff"])
        results["numMutants"].append(timeInfo["num_mutants"])
        results["linesExecuted"].append(timeInfo["num_lines_executed_by_fail_tcs"])

    avgResults = {}
    for key, values in results.items():
        avgResults[key] = sum(values) / len(values) if values else 0
    return results, avgResults

def showDataFrame(bid2timeInfo):
    import pandas as pd
    df = pd.DataFrame.from_dict(bid2timeInfo, orient='index')
    df.reset_index(inplace=True)
    return df

In [7]:
measureTimeDiff(bid2timeInfo)

In [8]:
results, avgResults = measureAvgResults(bid2timeInfo)

In [9]:
df = showDataFrame(bid2timeInfo)

In [10]:
df.to_csv("/ssd_home/yangheechan/d4j_extractor/notebooks/time_measurement_results.csv", index=False)
print("CSV file saved to: /ssd_home/yangheechan/d4j_extractor/notebooks/time_measurement_results.csv")

CSV file saved to: /ssd_home/yangheechan/d4j_extractor/notebooks/time_measurement_results.csv


In [11]:
# print average results
for key, val in avgResults.items():
    print(f"{key}: {val}")

fails: 2.918032786885246
passes: 103.0327868852459
relativeTcs: 105.95081967213115
secWithCov: 29578.277631400157
secWithoutCov: 28071.677689754095
minWithCov: 492.97129385666926
minWithoutCov: 467.86129482923513
hourWithCov: 8.21618823094449
hourWithoutCov: 7.797688247153914
hourWithCovParallel: 1.0270235288680611
hourWithoutCovParallel: 0.9747110308942393
relativeTimeDiff: 5.550907134829642
numMutants: 798.9672131147541
linesExecuted: 63.57377049180328
