In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
!cp -R drive/MyDrive/shared/GPT/gpt gpt

In [None]:
main_path = "drive/MyDrive/shared/GPT/"
repo_dir = "eclipse_netbeans/"
project_name = "eclipse"
tests_path = main_path + "tests/" + repo_dir
setting = "dynamic"
model = "flute"
filter_threshold = 20 #Default is None
if filter_threshold is None:
    pred_path = f"{main_path}predictions/{repo_dir}flute_{setting}/"
else:
    pred_path = f"{main_path}predictions/{repo_dir}flute_{setting}_top_{filter_threshold}/"
data_path = main_path + "data/"
result_path = main_path + "results/" + repo_dir

In [None]:
import json

def readTests(projectName, foldId):
    oneArgTests = []
    with open(f"{tests_path}{projectName}_ArgRecTests_fold{foldId}.txt") as f:
        lines = f.read().split('\n')
        for line in lines[:-1]:
            oneArgTests.append(json.loads(line))
        lines = None
    return oneArgTests

In [None]:
def toSingleArgRecTest(this):
    test = {}
    test['filePath'] = this['filePath']
    test['numArg'] = 1 if this['argPos'] != 0 else 0
    test['lex_context'] = this['lex_context']
    test['excode_context'] = this['excode_context']
    test['next_excode'] = [this['next_excode']]
    test['next_lex'] = [this['next_lex']]
    test['expected_excode'] = this['expected_excode']
    test['expected_lex'] = this['expected_lex']
    test['ignored'] = this['ignored']
    test['argRecTestList'] = [this]
    test['id'] = this['test_id']
    test['methodInvocClassQualifiedName'] = this['methodInvocClassQualifiedName']
    return test

In [None]:
def allTestsToSingleArgRecTest(oneArgTests):
    tests = []
    for i in range(len(oneArgTests)):
        test = oneArgTests[i]
        # SKIP METHOD INVOCATIONS WITH NO ARGUMENT PASSED
        if test['argPos'] > 0:
            test = toSingleArgRecTest(test)
            tests.append(test)
    return tests

In [None]:
import os

def readPredictions(projectName, foldId):
    predictions = []
    if os.path.isfile(f"{pred_path}{projectName}_ArgRecs_fold{foldId}.txt"):
        filePath = f"{pred_path}{projectName}_ArgRecs_fold{foldId}.txt"

    with open(filePath) as f:
        lines = f.read().split('\n')
        for line in lines[:-1]:
            predictions.append(json.loads(line))
        lines = None
    return predictions

In [None]:
import os

def readFilterPreds(projectName, foldId):
    predictions = []
    filePath = f"{main_path}predictions/{repo_dir}filter_{setting}/{projectName}/fold{foldId}/{projectName}_prediction_detail_flute_sequence.txt"

    if not os.path.isfile(filePath):
        return None
    with open(filePath) as f:
        lines = f.read().split('\n')
        for line in lines[:-1]:
            predictions.append(json.loads(line))
        lines = None
    return predictions

In [None]:
def correctPredsOrder(preds):
    correctPreds = []
    for i in range(len(preds)):
        prediction = preds[i]
        if prediction['answer'] != ')':
            correctPreds.append(prediction)
    return correctPreds

def_recentness = creating_distance

In [None]:
import numpy as np

with open(f'{data_path}targets_def_recentness.npy', 'rb') as f:
    targets_def_recentness = np.load(f)

In [None]:
from collections import Counter

def_recent_dict = Counter(targets_def_recentness.tolist())

for key in def_recent_dict:
    def_recent_dict[key] = def_recent_dict[key] / len(targets_def_recentness)

def_recent_dict

Counter({0: 0.5726471754499647,
         1: 0.2769640318384479,
         2: 0.0968161552075979,
         3: 0.03216727758058745,
         4: 0.013283750979235374,
         5: 0.004726926313142548,
         6: 0.0017916380552627253,
         7: 0.0010445177326228034,
         8: 0.0002587115680338888,
         9: 5.561089780167703e-05,
         10: 0.0002103542655976479,
         11: 4.8357302436240895e-06,
         12: 2.1760786096308403e-05,
         13: 4.8357302436240895e-06,
         15: 2.4178651218120447e-06})

use_recentness = accessing_recentness

In [None]:
import numpy as np

with open(f'{data_path}targets_use_recentness.npy', 'rb') as f:
    targets_use_recentness = np.load(f)

In [None]:
from collections import Counter

use_recent_dict = Counter(targets_use_recentness)

for key in use_recent_dict:
    use_recent_dict[key] = use_recent_dict[key] / len(targets_use_recentness)

use_recent_dict

In [None]:
from gpt import preprocessor

def preprocess(target):
    target = preprocessor.empty_string_literal(target)
    target = preprocessor.remove_array_access_index(target)
    return target

In [None]:
def preprocess_filter(candidate):
    candidate = preprocessor.empty_string_literal(candidate)
    if "{" in candidate:
        candidate = candidate[:candidate.index("{")].rstrip()
    if "]" in candidate:
        candidate = preprocessor.remove_array_access_index(candidate)
    if "(" in candidate and candidate.index("(") > 0:
        candidate = preprocessor.normalize_method_invocation(candidate)

    # Lambda expression
    if "->" in candidate:
        candidate = "x -> {}"

    # Exclude candidates starting with this if they are redundant
    if candidate.startswith("this."):
        candidate = candidate[5:]

    return candidate

In [None]:
def preprocess_all_filter_preds(filters_predictions):  
  for i in range(len(filters_predictions)):
      for j in range(len(filters_predictions[i]['predictions'])):
          filters_predictions[i]['predictions'][j] = preprocess_filter(filters_predictions[i]['predictions'][j])

In [None]:
def matchesArg(expectedLex, result):
    if result == expectedLex:
        return True

    if '->' in expectedLex and '->' in result:
        return True

    if '->' in expectedLex and result == "<LAMBDA>":
        return True

    if '.this' in expectedLex:
        if matchesArg(expectedLex[expectedLex.index('.this')+1:], result):
            return True

    if '.this' in result:
        if matchesArg(expectedLex, result[result.index('.this')+1:]):
            return True

    if expectedLex.startswith('this.'):
        if matchesArg(expectedLex[5:], result):
            return True

    if result.startswith('this.'):
        if matchesArg(expectedLex, result[5:]):
            return True

    return False

In [None]:
def canAcceptResult(test, result):
    test = test['argRecTestList'][0]

    expectedLex = test['expected_lex']

    expectedLex = preprocess(expectedLex)
    if '{' in expectedLex:
        expectedLex = expectedLex[:expectedLex.index('{')].rstrip()

    result = preprocess(result)
    if '{' in result:
        result = result[:result.index('{')].rstrip()
    if result.find('(') > 0:
        result = preprocessor.normalize_method_invocation(result)
    
    if matchesArg(expectedLex, result):
        return True

    alternateLex = None
    if 'methodAccessLex' in test:
        alternateLex = test['methodAccessLex']
    if 'objectCreationLex' in test:
        alternateLex = test['objectCreationLex']
    if alternateLex is not None and matchesArg(alternateLex, result):
        return True

    if 'staticMemberAccessLex' in test:
        if matchesArg(test['staticMemberAccessLex'], result):
            return True
            
    return False

In [None]:
expressionTypes = ['NAME', 'METHOD_INVOC', 'FIELD_ACCESS', 'ARRAY_ACCESS', 'CAST', 'STRING_LIT', 'NUM_LIT', 'CHAR_LIT', 'TYPE_LIT', 'BOOL_LIT',
    'NULL_LIT', 'OBJ_CREATION', 'ARR_CREATION', 'THIS', 'SUPER', 'COMPOUND', 'LAMBDA', 'METHOD_REF']
expressionTypeDict = {}

for i in range(len(expressionTypes)):
    expressionTypeDict[expressionTypes[i]] = i

tops = [1, 3, 5, 10]

In [None]:
import os
import shutil
from collections import defaultdict

os.makedirs('results/' + project_name, exist_ok=True)
os.makedirs('logs/' + project_name, exist_ok=True)

In [None]:
dataFrame = defaultdict(list)

def updateTopKResult(test, results, k, adequateGeneratedCandidate, doPrintIncorrectPrediction, projectName):
    isOverallCorrectTopK = False
    for i in range(min(k, len(results))):
        if canAcceptResult(test, results[i]):
            isOverallCorrectTopK = True
            break

    argType = test['argRecTestList'][0]['argType'] if 'argType' in test['argRecTestList'][0] else 'null'
    if isOverallCorrectTopK:
        dataFrame[f'GPTActualTop{k}'].append(1)
        dataFrame[f'GPTActualTop{k}{argType}'].append(1)

        if not test['ignored']:
            dataFrame[f'GPTOverallTop{k}'].append(1)
            dataFrame[f'GPTOverallTop{k}{argType}'].append(1)

        if adequateGeneratedCandidate:
            dataFrame[f'GPTTop{k}'].append(1)
            dataFrame[f'GPTTop{k}{argType}'].append(1)
    else:
        dataFrame[f'GPTActualTop{k}'].append(0)
        dataFrame[f'GPTActualTop{k}{argType}'].append(0)

        if not test['ignored']:
            dataFrame[f'GPTOverallTop{k}'].append(0)
            dataFrame[f'GPTOverallTop{k}{argType}'].append(0)
            if doPrintIncorrectPrediction:
                outputFileName = f"{projectName}_incorrect_ArgRecTests_top_{k}.txt"
                with open(f'logs/{repo_dir}{outputFileName}', "a") as f:
                    f.write(json.dumps(test['argRecTestList'][0]) + '\n')
                    f.write('Predictions: ' + str(results) + '\n')

        if adequateGeneratedCandidate:
            dataFrame[f'GPTTop{k}'].append(0)
            dataFrame[f'GPTTop{k}{argType}'].append(0)

In [None]:
import numpy as np

W_GPT = 1
W_LOCALITY = 1

def combine_score(main_score, filter_score, candidate):
    return main_score * W_GPT + filter_score * (1 - W_GPT)
    #return np.log(np.exp(main_score) * W_GPT + np.exp(filter_score) * (1 - W_GPT))
    #return max(main_score, filter_score)
    #return min(main_score, filter_score)
    #return score_by_type(main_score, filter_score, candidate)

def combine_score_all_feature(lex_score, locality_score):
    return (lex_score + W_LOCALITY * locality_score) / (1 + W_LOCALITY)

In [None]:
from tqdm.notebook import tqdm as tqdm

COMPOUND_CONSIDERED = False
TEST_APIS = [
            #  "org.eclipse.swt",
            #  "java.awt",
            #  "javax.swing",
]
if len(TEST_APIS) > 0:
    model += "__lib"
TEST_LOCAL_ARG = None
if TEST_LOCAL_ARG is not None:
    if TEST_LOCAL_ARG:
        model += "__local_arg"
    else:
        model += "__not_local_arg"
if filter_threshold is not None:
    model += f"__top_{filter_threshold}"

rank_list = []

fold_list = []
for testFile in os.listdir(pred_path):
    if testFile.startswith(f"{project_name}_ArgRecs_fold"):
        fold_id = testFile[testFile.find("fold")+4:][0]
        fold_list.append(fold_id)

for fold_id in tqdm(fold_list):
    tests = readTests(project_name, fold_id)
    predictions = readPredictions(project_name, fold_id)
    filters_predictions = readFilterPreds(project_name, fold_id)
    filters_predictions = correctPredsOrder(filters_predictions)
    tests = allTestsToSingleArgRecTest(tests)
    preprocess_all_filter_preds(filters_predictions)

    assert len(tests) == len(predictions), "Tests not matched!"
    for i in range(len(tests)):
        assert tests[i]['expected_lex'][:3] == predictions[i]['answer'][:3], "Tests not matched!"

    assert len(tests) == len(filters_predictions), "Tests not matched!"
    for i in range(len(tests)):
        if tests[i]['expected_lex'][:3] != filters_predictions[i]['answer'][:3]:
            print(tests[i]['expected_lex'])
            print(filters_predictions[i]['answer'])
        assert tests[i]['expected_lex'][:3] == filters_predictions[i]['answer'][:3], "Tests not matched!"

    for i in range(len(tests)):
        test = tests[i]
        dataFrame['Tested'].append(1)

        if test['numArg'] == 0:
            continue
        if len(TEST_APIS) > 0:
            is_target = False
            for target_api in TEST_APIS:
                if test['methodInvocClassQualifiedName'].startswith(target_api + '.'):
                    is_target = True
            if not is_target:
                continue
        if TEST_LOCAL_ARG is not None:
            if not predictions[i]['sufficient_candidates']:
                continue
            is_local_arg = False
            for j in range(len(test['next_lex'][0])):
                for k in range(len(test['next_lex'][0][j])):
                    candidate = test['next_lex'][0][j][k]
                    if candidate == test['expected_lex']:
                        candidate_locality = test['argRecTestList'][0]['candidates_locality'][j][k]
                        if candidate_locality >= 4:
                            is_local_arg = True
                            break
                if is_local_arg:
                    break
            if TEST_LOCAL_ARG != is_local_arg:
                continue

        dataFrame['Predicted'].append(1)
        if not test['ignored']:
            dataFrame['Predicted supported'].append(1)

        oneArgTest = test['argRecTestList'][0]
        next_lex_locality_dict = {}
        for j in range(len(oneArgTest['next_lex'])):
            for k in range(len(oneArgTest['next_lex'][j])):
                candidate = oneArgTest['next_lex'][j][k]
                scope_distance = oneArgTest['candidates_scope_distance'][j][k]
                lu_distance = oneArgTest['candidates_last_usage_distance'][j][k]
                if scope_distance >= 0:
                    next_lex_locality_dict[candidate] = def_recent_dict[scope_distance]
                    #next_lex_locality_dict[candidate] *= use_recent_dict[lu_distance]
        
        response = predictions[i]
        gptResults = response['predictions']
        gptScores = response['scores']
        runtime = response['runtime']
        if not COMPOUND_CONSIDERED:
            for k in range(len(gptResults)):
                if gptResults[k] == '<COMPOUND>':
                    gptResults = gptResults[:k] + gptResults[k + 1:]
                    gptScores = gptScores[:k] + gptScores[k + 1:]
                    break

        prediction_dict = {}
        lex_sim_candidate_dict = {}
        for j in range(len(filters_predictions[i]['predictions']) - 1, -1, -1):
            candidate = filters_predictions[i]['predictions'][j]
            prediction_dict[candidate] = filters_predictions[i]['lexModelScores'][j]
            if filters_predictions[i]['lexModelScores'][j] > 0:
                #prediction_dict[candidate] = LOG_ZERO
                print(f"Bug: project {project_name} - fold {fold_id} - test id {i} - prediction id {j}")

            lex_sim = filters_predictions[i]['lexSimScores'][j]
            # lex_sim_score = np.log(lex_sim_dict[get_bin(np.exp(lex_sim))])
            # lex_sim_candidate_dict[candidate] = lex_sim_score

        for j in range(len(gptResults)):
            prediction_dict[gptResults[j]] = combine_score(gptScores[j], prediction_dict[gptResults[j]], gptResults[j])
            # prediction_dict[gptResults[j]] = prediction_dict[gptResults[j]] + lex_sim_candidate_dict[gptResults[j]] * 0.5
            # prediction_dict[gptResults[j]] = prediction_dict[gptResults[j]] + lex_sim * 0.5

            if gptResults[j] in next_lex_locality_dict:
                prediction_dict[gptResults[j]] = combine_score_all_feature(prediction_dict[gptResults[j]], np.log(next_lex_locality_dict[gptResults[j]]))

        combinedResults = sorted(list(set(filters_predictions[i]['predictions'])), key=lambda x: -prediction_dict[x])

        rank = -1
        for k in range(min(10, len(combinedResults))):
            if canAcceptResult(test, combinedResults[k]):
                rank = k
        rank_list.append(rank)

        for k in tops:
            updateTopKResult(test, combinedResults, k, response['sufficient_candidates'], False, project_name)

        dataFrame["GPT's runtime"].append(runtime)
        argType = test['argRecTestList'][0]['argType'] if 'argType' in test['argRecTestList'][0] else None
        if argType is not None:
            dataFrame["ArgType"].append(expressionTypeDict[argType])

In [None]:
MRR = 0
for rank in rank_list:
    if rank < 0:
        MRR += 0
    else:
        MRR += 1/(rank + 1)
MRR /= len(rank_list)
print(MRR)

0.7125629791584323


In [None]:
# import pickle

# with open('logs/dataframe.pkl', 'wb') as f:
#     pickle.dump(dataFrame, f)

In [None]:
import numpy as np
import csv

def printTestResult():
    with open(f'results/{project_name}/arg_rec_{model}_log.txt', 'w') as f:
        f.write(f"Ran {len(dataFrame['Tested'])} tests successfully.\n")
        f.write(f"Predicted {len(dataFrame['Predicted'])} tests.\n")
        f.write(f"Predicted {len(dataFrame['Predicted supported'])} tests that were supported.\n")
        f.write(f"Skipped {len(dataFrame['Tested']) - len(dataFrame['Predicted'])} tests. They were not taken into account during evaluation.\n")
        gptRuntime = np.mean(dataFrame["GPT's runtime"])
        f.write(f"GPT's runtime: {gptRuntime}s\n")
        f.write(f"MRR: {MRR}\n")

    accuracyPerNumArg = []
    row = []
    row.append("Number of params")
    row.append("Percentage of distribution")
    for k in tops:
        row.append(f"GPT's top-{k} accuracy")
    for k in tops:
        row.append(f"Top-{k} precision")
    for k in tops:
        row.append(f"Top-{k} recall")
    accuracyPerNumArg.append(row)

    unique, counts = np.unique(dataFrame['ArgType'], return_counts=True)
    counts = counts / counts.sum()
    argTypeDict = defaultdict(float)
    for i in range(len(unique)):
        argTypeDict[unique[i]] = counts[i]

    for i in range(len(expressionTypes)):
        argType = expressionTypes[i]
        row = []
        row.append(argType)
        row.append(argTypeDict[i] * 100)
        for k in tops:
            row.append(np.mean(dataFrame[f"GPTTop{k}{argType}"]))
        for k in tops:
            row.append(np.mean(dataFrame[f"GPTOverallTop{k}{argType}"]))
        for k in tops:
            row.append(np.mean(dataFrame[f"GPTActualTop{k}{argType}"]))
        accuracyPerNumArg.append(row)

    row = []
    row.append("all")
    row.append("100")
    for k in tops:
        row.append(np.mean(dataFrame[f"GPTTop{k}"]))
    for k in tops:
        row.append(np.mean(dataFrame[f"GPTOverallTop{k}"]))
    for k in tops:
        row.append(np.mean(dataFrame[f"GPTActualTop{k}"]))
    accuracyPerNumArg.append(row)

    with open(f'results/{project_name}/arg_rec_{model}.csv', 'w') as f:
        csv_writer = csv.writer(f, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
        for row in accuracyPerNumArg:
            csv_writer.writerow(row)

    with open(f'results/{project_name}/arg_rec_{model}_log.txt', 'a') as f:
        for k in tops:
            correctTestsCount = np.sum(dataFrame[f"GPTActualTop{k}"])
            f.write(f"Target showed up in top {k} recommendations in {correctTestsCount} tests.\n")

printTestResult()

  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)


In [None]:
for text_file in os.listdir('results/' + project_name):
    shutil.copyfile('results/' + project_name + '/' + text_file, main_path + 'results/' + project_name + '/' + text_file)