In [1]:
%matplotlib inline
from datasets import *
from pyvenn import *
from tqdm import tqdm

# LOAD DATASET

In [2]:
def load_validation_dataset(dataset_name):
    dataset=load_from_disk(f'../defects4j_validation/dataset_validated/{dataset_name}')
    print(f' \n==========\n{dataset_name}\n==========\n',dataset)
    return dataset

In [3]:
codellama_vanilla=load_validation_dataset('codellama_vanilla')
codellama_classinfo=load_validation_dataset('codellama_classinfo')
codellama_classinfo_lora=load_validation_dataset('codellama_classinfo_lora')
codellama_no_classinfo_lora=load_validation_dataset('codellama_no_classinfo_lora')
repairllama=load_validation_dataset('repairllama')
repairllama_classinfo=load_validation_dataset('repairllama_classinfo')
repairllama_classinfo_lora=load_validation_dataset('repairllama_classinfo_lora')
repairllama_paper=load_validation_dataset('repairllama_paper')

 
codellama_vanilla
 Dataset({
    features: ['methodInformation', 'involvedTypesInformation', 'filePath', 'classInformation', 'buggyInfo', 'projectName', 'bug_id', 'start_line', 'end_line', 'path', 'fix_code', 'pre_context', 'post_context', 'buggy_code', 'input', 'gen', 'test_res'],
    num_rows: 479
})
 
codellama_classinfo
 Dataset({
    features: ['methodInformation', 'involvedTypesInformation', 'filePath', 'classInformation', 'buggyInfo', 'projectName', 'bug_id', 'start_line', 'end_line', 'path', 'fix_code', 'pre_context', 'post_context', 'buggy_code', 'input', 'gen', 'test_res'],
    num_rows: 479
})
 
codellama_classinfo_lora
 Dataset({
    features: ['methodInformation', 'involvedTypesInformation', 'filePath', 'classInformation', 'buggyInfo', 'projectName', 'bug_id', 'start_line', 'end_line', 'path', 'fix_code', 'pre_context', 'post_context', 'buggy_code', 'input', 'gen', 'test_res'],
    num_rows: 479
})
 
codellama_no_classinfo_lora
 Dataset({
    features: ['methodInformatio

In [4]:
dataset_dict={
    'codellama_vanilla':codellama_vanilla,
    'codellama_classinfo':codellama_classinfo,
    'codellama_classinfo_lora':codellama_classinfo_lora,
    'codellama_no_classinfo_lora':codellama_no_classinfo_lora,
    'repairllama':repairllama,
    'repairllama_classinfo':repairllama_classinfo,
    'repairllama_classinfo_lora':repairllama_classinfo_lora,
    'repairllama_paper':repairllama_paper,
}

# STATICS ANALYSIS

## correctness

In [5]:
def determine_correctness(correctness_list):
    # 按优先级确定correctness
    if 'plausible' in correctness_list:
        return 'plausible'
    elif 'wrong' in correctness_list:
        return 'wrong'
    elif 'uncompilable' in correctness_list:
        return 'uncompilable'
    else:
        return 'timeout'  # 如果列表中没有已知的correctness值

def statistics_by_correctness(dataset):
    # 初始化一个字典来临时存储每个bug_id的所有correctness值
    temp_result = {}
    # 初始化最终结果字典
    final_result = {}

    # 收集每个bug_id的所有correctness值
    for row in dataset:
        bug_id = row['bug_id']
        test_res_list = row['test_res']  # 这是一个列表，不是单个字典
        if not test_res_list:
            continue
        if bug_id not in temp_result:
            temp_result[bug_id] = []
        # 遍历test_res_list中的每个字典
        for test_res in test_res_list:
            correctness = test_res['correctness']
            temp_result[bug_id].append(correctness)

    # 确定每个bug_id的最终correctness并组织最终结果
    for bug_id, correctness_list in temp_result.items():
        final_correctness = determine_correctness(correctness_list)
        if final_correctness not in final_result:
            final_result[final_correctness] = []
        final_result[final_correctness].append(bug_id)

    return final_result



In [6]:
res_dict={}
for name in dataset_dict:
    print(f'========={name}=========')
    dataset= dataset_dict[name]
    res=statistics_by_correctness(dataset)
    lst=['plausible','wrong','uncompilable']
    s=0
    for i in lst:
        print(f'{i}:', len(res[i]))
        s+=len(res[i])
    print('timeout or error', 479-s)
    res_dict[name]=res

plausible: 116
wrong: 238
uncompilable: 119
timeout or error 6
plausible: 87
wrong: 274
uncompilable: 113
timeout or error 5
plausible: 87
wrong: 284
uncompilable: 102
timeout or error 6
plausible: 86
wrong: 269
uncompilable: 119
timeout or error 5
plausible: 114
wrong: 254
uncompilable: 106
timeout or error 5
plausible: 114
wrong: 255
uncompilable: 104
timeout or error 6
plausible: 97
wrong: 261
uncompilable: 116
timeout or error 5
plausible: 150
wrong: 254
uncompilable: 70
timeout or error 5


## length

In [7]:
def average_patch_length(dataset):
    # 初始化累计变量和计数器
    total_length = 0
    count = 0

    # 遍历数据集
    for row in dataset:
        # 获取每个bug_id对应的test_res列表
        test_res_list = row['test_res']
        if not test_res_list:
            continue
        # 遍历test_res列表中的每个字典
        for test_res in test_res_list:
            # 获取patch并累计其长度
            patch = test_res['patch']
            total_length += len(patch)
            count += 1

    # 计算平均长度，避免除以零的错误
    average_length = total_length / count if count > 0 else 0
    return average_length


# 调用函数并打印结果
for name in dataset_dict:
    print(f'========={name}=========')
    dataset= dataset_dict[name]
    avg_length=average_patch_length(dataset)
    print('average patch length', avg_length)

average patch length 174.16966363444044
average patch length 199.437156157427
average patch length 54.74960594460707
average patch length 81.9032400264492
average patch length 147.90901231899718
average patch length 169.2356182499449
average patch length 49.2953125
average patch length 156.5341963322546


## similarity

In [8]:
def levenshtein_distance(s1, s2):
    s1, s2=s1.strip(), s2.strip()
    if len(s1) < len(s2):
        return levenshtein_distance(s2, s1)

    if len(s2) == 0:
        return len(s1)

    previous_row = range(len(s2) + 1)
    for i, c1 in enumerate(s1):
        current_row = [i + 1]
        for j, c2 in enumerate(s2):
            insertions = previous_row[j + 1] + 1
            deletions = current_row[j] + 1
            substitutions = previous_row[j] + (c1 != c2)
            current_row.append(min(insertions, deletions, substitutions))
        previous_row = current_row
    
    return previous_row[-1]

def similarity_score(distance, max_length):
    if max_length == 0:
        return 1.0  # 假设两个空字符串是完全相似的
    return 1 - (distance / max_length)

def average_patch_similarity(dataset):
    similarity_results = {}
    all_scores = []

    for row in tqdm(dataset):
        bug_id = row['bug_id']
        test_res_list = row['test_res']
        
        if not test_res_list:
            continue
            
        scores = []

        for i in range(len(test_res_list)):
            for j in range(i+1, len(test_res_list)):
                patch1 = test_res_list[i]['patch']
                patch2 = test_res_list[j]['patch']
                max_length = max(len(patch1), len(patch2))
                distance = levenshtein_distance(patch1, patch2)
                score = similarity_score(distance, max_length)
                scores.append(score)

        # 计算并存储平均相似度分数
        if scores:
            average_score = sum(scores) / len(scores)
            similarity_results[bug_id] = average_score
            all_scores.extend(scores)  # 将所有得分添加到总列表中，以计算整个数据集的平均相似度
        else:
            similarity_results[bug_id] = 1.0  # 如果只有一个patch，假设相似度为100%

    # 计算整个数据集的平均相似度
    dataset_average_similarity = sum(all_scores) / len(all_scores) if all_scores else 1.0
    return similarity_results, dataset_average_similarity

In [9]:
# 调用函数并打印结果
for name in dataset_dict:
    print(f'========={name}=========')
    dataset= dataset_dict[name]
    sim_res, avg_sim=average_patch_similarity(dataset)
    print(f"Dataset Average Similarity Score = {avg_sim:.2%}")



100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 479/479 [07:41<00:00,  1.04it/s]


Dataset Average Similarity Score = 55.38%


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 479/479 [10:24<00:00,  1.30s/it]


Dataset Average Similarity Score = 46.35%


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 479/479 [00:28<00:00, 16.78it/s]


Dataset Average Similarity Score = 45.27%


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 479/479 [01:53<00:00,  4.22it/s]


Dataset Average Similarity Score = 48.30%


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 479/479 [06:17<00:00,  1.27it/s]


Dataset Average Similarity Score = 51.49%


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 479/479 [07:09<00:00,  1.12it/s]


Dataset Average Similarity Score = 51.85%


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 479/479 [00:20<00:00, 22.92it/s]


Dataset Average Similarity Score = 42.36%


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 479/479 [08:43<00:00,  1.09s/it]

Dataset Average Similarity Score = 53.60%





In [None]:
def max_patch_list_similarity(listA, listB):
    max_similarity = 0
    for patchA in listA:
        for patchB in listB:
            distance = levenshtein_distance(patchA, patchB)
            max_length = max(len(patchA), len(patchB))
            similarity = similarity_score(distance, max_length)
            max_similarity = max(max_similarity, similarity)
    return max_similarity

def compare_datasets(dataset_dict, base_dataset_name='codellama_vanilla'):
    results = {}
    base_dataset = dataset_dict[base_dataset_name]

    for dataset_name, dataset in dataset_dict.items():
        if dataset_name == base_dataset_name:
            continue  # Skip comparing the dataset with itself
        print(f'{dataset_name} start!')
        
        results[dataset_name] = {}
        for sample in base_dataset:
            bug_id = sample['bug_id']
            test_res = sample['test_res']
            if not test_res:
                continue
                
            sampleB = dataset.filter(lambda x:x['bug_id']==bug_id)[0]
            
            listA = [test_res['patch'] for test_res in test_res]
            test_resB=sampleB['test_res']
            if not test_resB:
                continue
            listB=[]

            for res in test_resB:
                if res['correctness']=='plausible':
                    listB.append(res['patch'])
                    
            if not listB:
                continue
                
            similarity = max_patch_list_similarity(listA, listB)
            results[dataset_name][bug_id] = similarity

    return results


# 调用函数并打印结果
dataset_sim = compare_datasets(dataset_dict)

In [12]:
for dataset, sim_dict in dataset_sim.items():
    sim_list=[]
    for bug_id, sim_score in sim_dict.items():
        sim_list.append(sim_score)
    print(f'{dataset} avg sim score:{sum(sim_list)/len(sim_list)}')

codellama_classinfo avg sim score:0.8457298398404217
codellama_classinfo_lora avg sim score:0.7940045907064501
codellama_no_classinfo_lora avg sim score:0.8247297603699361
repairllama avg sim score:0.8517089833743694
repairllama_classinfo avg sim score:0.8138774788569407
repairllama_classinfo_lora avg sim score:0.7796504763810237
repairllama_paper avg sim score:0.8034998668012293


## same patch analysis

In [18]:
not_sim_bug_ids_dict={}
for dataset_name, similarities in dataset_sim.items():
    sim_thershold=0.5
    for bug_id, similarity_score in similarities.items():
        if similarity_score <= sim_thershold:
            if dataset_name not in not_sim_bug_ids_dict:
                not_sim_bug_ids_dict[dataset_name]=[]
            else:
                not_sim_bug_ids_dict[dataset_name].append(bug_id)

In [None]:
not_sim_bug_ids_dict

In [30]:
check_name='codellama_classinfo_lora'

In [32]:
for name, dataset in dataset_dict.items():
    if name=='codellama_vanilla':
        continue
    if name!=check_name:
        continue    
    
    print(f'======{name}=======')
    for sample in dataset:
        bug_id=sample['bug_id']
        not_sim_lst=not_sim_bug_ids_dict[name]
        if bug_id in not_sim_lst:
            print(f"\n====={bug_id}=====\n")
            print(sample['input'])
            print('fix: ', sample['fix_code'])
            print('model generated patch:\n')
            for i in sample['test_res']:
                if i['correctness']=='plausible':
                    print('------------------')
                    print(i['patch'])
            codellama_sample=codellama_vanilla.filter(lambda x:x['bug_id']==bug_id)[0]
            print('codellama generated patch:\n')
            for i in sample['test_res']:
                print('------------------')
                print(i['patch'])


=====Mockito-38=====

<PRE> // class information of method
public class ArgumentMatchingTool{

    public Integer[] getSuspiciouslyNotMatchingArgsIndexes(List<Matcher> matchers, Object[] arguments)
    private boolean safelyMatches(Matcher m, Object arg)
    private boolean toStringEquals(Matcher m, Object arg)
}

// method to be repaired
    private boolean toStringEquals(Matcher m, Object arg) {
// buggy lines
// return StringDescription.toString(m).equals(arg.toString()); <SUF>    } <MID>
fix:  return StringDescription.toString(m).equals(arg == null? "null" : arg.toString());
model generated patch:

------------------

    return arg != null && StringDescription.toString(m).equals(arg.toString());


Filter:   0%|          | 0/479 [00:00<?, ? examples/s]

codellama generated patch:

------------------

------------------

    return arg instanceof String && StringDescription.toString(m).equals(arg.toString());
------------------

    return arg instanceof String && StringDescription.toString(m).equals(arg);
}
------------------

    return arg != null && StringDescription.toString(m).equals(arg.toString());
}
------------------

    return arg != null && StringDescription.toString(m).equals(arg.toString());
------------------

    return arg instanceof String && StringDescription.toString(m).equals(arg.toString());
}
------------------

    return StringDescription.toString(m).equals(arg.toString());
------------------

return StringDescription.toString(m).equals(arg.toString());
}
------------------

    return StringDescription.toString(m).equals(arg.toString());
}

=====Closure-109=====

<PRE> // class information of method
public class JsDocInfoParser{
    private JsDocTokenStream stream
    private JSDocInfoBuilder jsdocBuilder
   

Filter:   0%|          | 0/479 [00:00<?, ? examples/s]

codellama generated patch:

------------------

------------------

return parseTypeName(token);
}
------------------

if (lookAheadForType()) {
    return parseTypeName(token);
} else {
    return null;
}
------------------

return lookAheadForType() ? parseTypeName(token) : parseBasicTypeExpression(token);
------------------

    skipEOLs();
    return parseTypeName(token);
------------------

return lookAheadForType() ? parseTypeName(token) : null;
------------------

return lookAheadForType() ? parseTypeName(token) : parseTypeExpression(token);
------------------

    skipEOLs();
    return parseTypeName(token);
}
------------------

    return parseTypeName(token);
}

=====JacksonDatabind-12=====

<PRE> // class information of method
public class MapDeserializer extends ContainerDeserializerBase implements ContextualDeserializer, ResolvableDeserializer{
    private long serialVersionUID
    JavaType _mapType
    KeyDeserializer _keyDeserializer
    boolean _standardStringKey
    J

Filter:   0%|          | 0/479 [00:00<?, ? examples/s]

codellama generated patch:

------------------

------------------
>
------------------
;
------------------

    return (_keyDeserializer == null)
                && (_valueTypeDeserializer == null)

------------------

    return (_keyDeserializer == null)
            && (_valueTypeDeserializer == null)
            && (_valueDeserializer == null)
------------------

    return (_valueTypeDeserializer == null)
                && (_valueInstantiator == null)

------------------

    return (_keyDeserializer == null)
            && (_valueDeserializer == null)
            && (_valueTypeDeserializer == null)
------------------

    return (_keyDeserializer == null)
                && (_valueDeserializer == null)
                && (_valueTypeDeserializer == null)

------------------

    return (_valueTypeDeserializer == null)
                && (_valueDeserializer == null)


=====Lang-57=====

<PRE> // class information of method
public class LocaleUtils{
    private List cAvailableLoca

Filter:   0%|          | 0/479 [00:00<?, ? examples/s]

codellama generated patch:

------------------

------------------

    return locale != null && cAvailableLocaleSet.contains(locale);
}
------------------

return cAvailableLocaleSet.contains(locale);
}
------------------

return cAvailableLocaleSet != null && cAvailableLocaleSet.contains(locale);
------------------

    return cAvailableLocaleSet.contains(toLocale(locale));
}
------------------

return cAvailableLocaleSet != null && cAvailableLocaleSet.contains(locale);
}
------------------

    return cAvailableLocaleSet.contains(locale);
}
------------------

    return locale != null && cAvailableLocaleSet.contains(locale);
------------------

return locale != null && cAvailableLocaleSet.contains(locale);

=====Compress-38=====

<PRE> // class information of method
public class TarArchiveEntry implements TarConstants, ArchiveEntry{
    private TarArchiveEntry[] EMPTY_TAR_ARCHIVE_ENTRIES
    private String name
    private boolean preserveLeadingSlashes
    private int mode
    pri

Filter:   0%|          | 0/479 [00:00<?, ? examples/s]

codellama generated patch:

------------------

------------------

    if (isGNUSparse() && getName().endsWith("/")) {
------------------
}
------------------

}
------------------

    if (getName() != null && getName().endsWith("/")) {
------------------

    if (name != null && getName().endsWith("/")) {
------------------

    if (preserveLeadingSlashes && getName().endsWith("/")) {
------------------

    if (isSymbolicLink() && getName().endsWith("/")) {

=====Math-70=====

<PRE> // class information of method
public class BisectionSolver extends UnivariateRealSolverImpl{

    public double solve(double min, double max, double initial)
    public double solve(double min, double max)
    public double solve(UnivariateRealFunction f, double min, double max, double initial)
    public double solve(UnivariateRealFunction f, double min, double max)
}

// method to be repaired
    public double solve(final UnivariateRealFunction f, double min, double max, double initial)
        throw

Filter:   0%|          | 0/479 [00:00<?, ? examples/s]

codellama generated patch:

------------------

------------------

    return solve(f, min, max);
------------------

if (f.value(initial) == 0) {
    return initial;
} else {
    return solve(min, max);
}
------------------

    return super.solve(f, min, max, initial);
}
------------------

    return solve(f, min, max, initial, getAbsoluteAccuracy());
------------------

    return solve(f, min, max, initial, getAbsoluteAccuracy());
}
------------------

    return solve(f, min, max, initial, DEFAULT_ABSOLUTE_ACCURACY);
------------------

    return super.solve(f, min, max, initial);
------------------

    return solve(f, min, max);
}

=====Csv-4=====

<PRE> // class information of method
public class CSVParser implements Iterable, Closeable{
    private CSVFormat format
    private Map<String, Integer> headerMap
    private Lexer lexer
    private List<String> record
    private long recordNumber
    private Token reusableToken
    public static CSVParser parse(File file, CSVFor

Filter:   0%|          | 0/479 [00:00<?, ? examples/s]

codellama generated patch:

------------------

------------------

    if (headerMap != null) {
        return new LinkedHashMap<String, Integer>(this.headerMap);
    } else {
        return null;
    }
}
------------------

    return new LinkedHashMap<>(this.headerMap);
}
------------------

    return new LinkedHashMap<String, Integer>(this.headerMap);
}
------------------

    return this.headerMap == null ? new LinkedHashMap<String, Integer>() : new LinkedHashMap<String, Integer>(this.headerMap);
------------------

return new LinkedHashMap<String, Integer>(this.headerMap);

------------------

    return this.headerMap == null ? null : new LinkedHashMap<String, Integer>(this.headerMap);
------------------

    return this.headerMap == null ? null : new LinkedHashMap<String, Integer>(this.headerMap);
}
------------------

return new LinkedHashMap<String, Integer>(this.headerMap);
}

=====Codec-9=====

<PRE> // class information of method
public class Base64 implements BinaryEncod

Filter:   0%|          | 0/479 [00:00<?, ? examples/s]

codellama generated patch:

------------------

------------------

long len = getEncodeLength(binaryData, maxResultSize, CHUNK_SEPARATOR);
------------------

    long len = getEncodeLength(binaryData, MIME_CHUNK_SIZE, CHUNK_SEPARATOR);
------------------

long len = getEncodeLength(binaryData, MIME_CHUNK_SIZE, CHUNK_SEPARATOR);
------------------

long len = Base64.getEncodeLength(binaryData, MIME_CHUNK_SIZE, CHUNK_SEPARATOR);
------------------

long len = getEncodeLength(binaryData, (isChunked ? MIME_CHUNK_SIZE : 0), CHUNK_SEPARATOR);
------------------

long len = getEncodeLength(binaryData, MIME_CHUNK_SIZE, isChunked ? CHUNK_SEPARATOR : null);
------------------

long len = getEncodeLength(binaryData, isChunked ? MIME_CHUNK_SIZE : 0, CHUNK_SEPARATOR);
------------------

long len = getEncodeLength(binaryData, 0, null);

=====Collections-26=====

<PRE> // class information of method
public class MultiKey<K> implements Serializable{
    private long serialVersionUID
    private K[]

Filter:   0%|          | 0/479 [00:00<?, ? examples/s]

codellama generated patch:

------------------

------------------

synchronized (this) {
------------------

if (keys != null) {
------------------

public Object readResolve() {
------------------

if (keys == null) {
    return this;
}
------------------

final K[] keys = getKeys();
------------------

Object[] keys = getKeys();
------------------

if (keys == null) {
    return null;
}
------------------

private Object readResolve() {

=====Lang-38=====

<PRE> // class information of method
public class FastDateFormat extends Format{
    private long serialVersionUID
    public int FULL
    public int LONG
    public int MEDIUM
    public int SHORT
    private String cDefaultPattern
    private Map<FastDateFormat, FastDateFormat> cInstanceCache
    private Map<Object, FastDateFormat> cDateInstanceCache
    private Map<Object, FastDateFormat> cTimeInstanceCache
    private Map<Object, FastDateFormat> cDateTimeInstanceCache
    private Map<Object, String> cTimeZoneDisplayCache
    p

Filter:   0%|          | 0/479 [00:00<?, ? examples/s]

codellama generated patch:

------------------
if (calendar instanceof GregorianCalendar) {
    ((GregorianCalendar) calendar).setTimeZone(mTimeZone);
}
------------------
if (calendar == null) {
    throw new IllegalArgumentException("The calendar must not be null");
}
------------------
assert calendar != null;
------------------
if (calendar.getTimeZone() != mTimeZone) {
    calendar = (Calendar) calendar.clone();
    calendar.setTimeZone(mTimeZone);
}
------------------
if (calendar instanceof GregorianCalendar) {
    calendar.setTimeZone(mTimeZone);
    return applyRules(calendar, buf);
}
------------------
calendar.setTimeZone(mTimeZone);
------------------
if (calendar instanceof GregorianCalendar) {
    ((GregorianCalendar) calendar).setGregorianChange(new Date(Long.MAX_VALUE));
}
------------------
if (calendar == null) {
    calendar = Calendar.getInstance(mTimeZone, mLocale);
}
------------------
if (calendar instanceof GregorianCalendar) {
    ((GregorianCalendar) calendar)

Filter:   0%|          | 0/479 [00:00<?, ? examples/s]

codellama generated patch:

------------------
_tableInfo.set(new TableInfo(_hashArea, _names));
_needRehash = false;
------------------
_parent = null;
_tableInfo.set(null);
------------------
_verifyNeedForRehash();
_needRehash = false;
------------------
_needRehash = false;
_parent = null;
------------------
_needRehash = false;
_count = 0;
------------------
_tableInfo.set(new TableInfo(_hashArea, _names, _count));
_needRehash = false;
------------------
_tableInfo.set(new TableInfo(_hashArea, _hashSize));
_needRehash = false;
------------------
_verifyNeedForRehash();
_needRehash = true;
------------------
_needRehash = false;
_intern = false;
------------------
_needRehash = false;
_verifyNeedForRehash();

=====Math-75=====

<PRE> // class information of method
public class Frequency implements Serializable{
    private long serialVersionUID
    private TreeMap<Comparable<?>, Long> freqTable
    public String toString()
    public void addValue(Object v)
    public void addValue

Filter:   0%|          | 0/479 [00:00<?, ? examples/s]

codellama generated patch:

------------------

------------------

    return getPct((Comparable<?>) v);
------------------

return getCumPct((Comparable<?>) v);
}
------------------

    return getCumPct((Comparable<?>) v);
}
------------------

    return v instanceof Comparable<?> ? getCumPct((Comparable<?>) v) : 0;
------------------

    return getPct((Comparable<?>) v);
}
------------------

    return getCumPct((Comparable<?>) v);
------------------

    return v instanceof Comparable ? getCumPct((Comparable<?>) v) : -1;
------------------

    return v instanceof Comparable ? getCumPct((Comparable<?>) v) : 0;
------------------

    return v instanceof Comparable<?> ? getCumPct((Comparable<?>) v) : -1;

=====Math-27=====

<PRE> // class information of method
public class Fraction extends Number implements FieldElement, Comparable, Serializable{
    public Fraction TWO
    public Fraction ONE
    public Fraction ZERO
    public Fraction FOUR_FIFTHS
    public Fraction ONE_FIFTH

Filter:   0%|          | 0/479 [00:00<?, ? examples/s]

codellama generated patch:

------------------

------------------

return multiply(100).doubleValue();
}
------------------

    return multiply(100).doubleValue();
}
------------------

return multiply(100).doubleValue() / 100;
------------------

return multiply(100).doubleValue();

------------------

return multiply(100).doubleValue();
------------------

return (double) numerator / denominator * 100;
}
------------------

return (double) numerator / denominator * 100;

------------------

    return multiply(100).doubleValue();

=====Codec-10=====

<PRE> // class information of method
public class Caverphone implements StringEncoder{

    public String caverphone(String txt)
    public Object encode(Object pObject)
    public String encode(String pString)
    public boolean isCaverphoneEqual(String str1, String str2)
}

// method to be repaired
    public String caverphone(String txt) {
        if( txt == null || txt.length() == 0 ) {
            return "1111111111";
        }
  

Filter:   0%|          | 0/479 [00:00<?, ? examples/s]

codellama generated patch:

------------------

------------------

txt = txt.replaceAll("mb", "m2");
------------------

    txt = txt.replaceAll("mb", "m2");
------------------

txt = txt.replaceAll("mb", "m2");
txt = txt.replaceAll("mp", "m2");
------------------

txt = txt.replaceAll("mb$", "m2");
txt = txt.replaceAll("mb", "m2");
------------------

txt = txt.replaceAll("^mb$", "m2");
------------------

txt = txt.replaceAll("mb$", "m2");
------------------

txt = txt.replaceAll("^mb", "m2");
------------------

txt = txt.replaceAll("mc", "m2");
------------------

txt = txt.replaceAll("mb", "m2");
txt = txt.replaceAll("ng", "2ng");

=====Chart-8=====

<PRE> // class information of method


// method to be repaired
    public Week(Date time, TimeZone zone) {
// buggy lines
// this(time, RegularTimePeriod.DEFAULT_TIME_ZONE, Locale.getDefault()); <SUF>    } <MID>
fix:  this(time, zone, Locale.getDefault());
model generated patch:

------------------

    this(time, zone, Locale.getD

Filter:   0%|          | 0/479 [00:00<?, ? examples/s]

codellama generated patch:

------------------

------------------

this(time, zone, Locale.getDefault());
this.locale = locale;
------------------

this(time, zone, Locale.getDefault());
this.locale = null;
------------------

this(time, zone, Locale.getDefault());
}
------------------

this(time, zone, null);
------------------

    this(time, zone, null);
------------------

    this(time, zone, Locale.getDefault());
}
------------------

    this(time, zone, Locale.getDefault());
------------------

this(time, zone, Locale.getDefault());
------------------

this(time, zone, Locale.getDefault());



In [None]:
for sample in repairllama_paper:
    bug_id=sample['bug_id']
    if bug_id in sim_bug_ids:
        print(f"\n====={bug_id}=====\n")
        print(sample['input'])
        print('fix: ', sample['fix_code'])
        for i in sample['test_res']:
            if i['correctness']=='plausible':
                print('------------------')
                print(i['patch'])

# PATCH analysis

## codellama vanilla pluasible patches

In [None]:
for sample in codellama_vanilla:
    if not sample['test_res']:
        continue
    is_plausible=len([i for i in sample['test_res'] if i['correctness']=='plausible']) > 0
    if is_plausible:
        bug_id=sample['bug_id']
        print(f"\n====={bug_id}=====\n")
        print(sample['input'])
        print('fix: ', sample['fix_code'])

        for i in sample['test_res']:
            if i['correctness']=='plausible':
                print('------------------')
                print(i['patch'])

## unique bug_id

In [14]:
def find_unique_plausible_ids(res_dict):
    # 初始化一个字典来存储最终结果
    unique_plausible_ids = {dataset: [] for dataset in res_dict}

    # 遍历每个数据集，找到每个数据集中的plausible bug_id
    for dataset, correctness_dict in res_dict.items():
        plausible_ids = correctness_dict.get('plausible', [])
        
        # 对于每个plausible的bug_id，检查它是否在其他数据集中出现过
        for bug_id in plausible_ids:
            is_unique = True  # 假设当前bug_id是唯一的，直到证明它在其他数据集中出现过
            
            # 检查其他数据集
            for other_dataset, other_correctness_dict in res_dict.items():
                if dataset == other_dataset:
                    continue  # 跳过当前正在检查的数据集
                # 如果bug_id在其他数据集的任何correctness下出现，则不是唯一的
                if any(bug_id in ids for ids in other_correctness_dict['plausible']):
                    is_unique = False
                    break  # 一旦找到重复项，就停止检查当前bug_id
            
            # 如果当前bug_id是唯一的，则添加到结果字典中
            if is_unique:
                unique_plausible_ids[dataset].append(bug_id)

    return unique_plausible_ids


# 调用函数并打印结果
unique_plausible_ids = find_unique_plausible_ids(res_dict)
print(unique_plausible_ids)


{'codellama_vanilla': ['Csv-14', 'Compress-41', 'Chart-10', 'Closure-78', 'Mockito-22', 'Compress-26', 'Closure-58', 'Lang-37'], 'codellama_classinfo': ['JacksonXml-4', 'Lang-40'], 'codellama_classinfo_lora': ['Closure-109', 'Lang-38'], 'codellama_no_classinfo_lora': ['Jsoup-49'], 'repairllama': ['Closure-65', 'Time-18'], 'repairllama_classinfo': ['JacksonDatabind-24', 'Lang-16', 'JacksonDatabind-39', 'Lang-55'], 'repairllama_classinfo_lora': ['Math-105', 'Codec-2'], 'repairllama_paper': ['Compress-15', 'Time-20', 'Closure-77', 'Jsoup-80', 'Math-69', 'Compress-7', 'Mockito-29', 'JacksonDatabind-93', 'Jsoup-70', 'Jsoup-85']}


In [15]:
print('unique bug_id results')
for name in unique_plausible_ids:
    id_lst=unique_plausible_ids[name]
    dataset=dataset_dict[name]
    
    print(f'========={name}=========')
    for ids in id_lst:
        sample=dataset.filter(lambda x:x['bug_id']==ids)[0]
        print(f'====={ids}=====')
        print(sample['input'])
        print('fix: ', sample['fix_code'])
        for i in sample['test_res']:
            del i['test_message']
            if i['correctness']=='plausible':
                print('-----------------')
                print(i['patch'])
    

unique bug_id results


Filter:   0%|          | 0/479 [00:00<?, ? examples/s]

=====Csv-14=====
<PRE>     private void printAndQuote(final Object object, final CharSequence value, final int offset, final int len,
            final Appendable out, final boolean newRecord) throws IOException {
        boolean quote = false;
        int start = offset;
        int pos = offset;
        final int end = offset + len;
        final char delimChar = getDelimiter();
        final char quoteChar = getQuoteCharacter().charValue();
        QuoteMode quoteModePolicy = getQuoteMode();
        if (quoteModePolicy == null) {
            quoteModePolicy = QuoteMode.MINIMAL;
        }
        switch (quoteModePolicy) {
        case ALL:
            quote = true;
            break;
        case NON_NUMERIC:
            quote = !(object instanceof Number);
            break;
        case NONE:
            printAndEscape(value, offset, len, out);
            return;
        case MINIMAL:
            if (len <= 0) {
                if (newRecord) {
                    quote = true;
 

Filter:   0%|          | 0/479 [00:00<?, ? examples/s]

=====Compress-41=====
<PRE>     public ZipArchiveEntry getNextZipEntry() throws IOException {
        boolean firstEntry = true;
        if (closed || hitCentralDirectory) {
            return null;
        }
        if (current != null) {
            closeEntry();
            firstEntry = false;
        }
        try {
            if (firstEntry) {
                readFirstLocalFileHeader(LFH_BUF);
            } else {
                readFully(LFH_BUF);
            }
        } catch (final EOFException e) {
            return null;
        }
        final ZipLong sig = new ZipLong(LFH_BUF);
        if (sig.equals(ZipLong.CFH_SIG) || sig.equals(ZipLong.AED_SIG)) {
            hitCentralDirectory = true;
            skipRemainderOfArchive();
 <SUF>        }
        int off = WORD;
        current = new CurrentEntry();
        final int versionMadeBy = ZipShort.getValue(LFH_BUF, off);
        off += SHORT;
        current.entry.setPlatform((versionMadeBy >> ZipFile.BYTE_SHIFT) & ZipFile

Filter:   0%|          | 0/479 [00:00<?, ? examples/s]

=====Chart-10=====
<PRE>     public String generateToolTipFragment(String toolTipText) {
 <SUF>            + "\" alt=\"\"";
    }
 <MID>
fix:  return " title=\"" + ImageMapUtilities.htmlEscape(toolTipText)
-----------------
       if (toolTipText == null) {
            return "";
        }
        return " title=\"" + toolTipText.replaceAll("\"", "&quot;")
    
-----------------
       if ((toolTipText == null) || (toolTipText.length() == 0)) {
            return "";
        }
        return " title=\"" + toolTipText.replaceAll("\"", "&quot;")
    
-----------------
       if (toolTipText == null || toolTipText.length() == 0) {
            return "";
        }
        return " title=\"" + toolTipText.replaceAll("\"", "&quot;").replaceAll("\n", " ")
    
-----------------
       if (toolTipText == null || toolTipText.length() == 0) {
            return "";
        }
        return " title=\"" + toolTipText.replaceAll("\"", "&quot;")
    
-----------------
       if (toolTipText == null 

Filter:   0%|          | 0/479 [00:00<?, ? examples/s]

=====Closure-78=====
<PRE>   private Node performArithmeticOp(int opType, Node left, Node right) {
    if (opType == Token.ADD
        && (NodeUtil.mayBeString(left, false)
            || NodeUtil.mayBeString(right, false))) {
      return null;
    }
    double result;
    Double lValObj = NodeUtil.getNumberValue(left);
    if (lValObj == null) {
      return null;
    }
    Double rValObj = NodeUtil.getNumberValue(right);
    if (rValObj == null) {
      return null;
    }
    double lval = lValObj;
    double rval = rValObj;
    switch (opType) {
      case Token.BITAND:
        result = ScriptRuntime.toInt32(lval) & ScriptRuntime.toInt32(rval);
        break;
      case Token.BITOR:
        result = ScriptRuntime.toInt32(lval) | ScriptRuntime.toInt32(rval);
        break;
      case Token.BITXOR:
        result = ScriptRuntime.toInt32(lval) ^ ScriptRuntime.toInt32(rval);
        break;
      case Token.ADD:
        result = lval + rval;
        break;
      case Token.SUB:
        

Filter:   0%|          | 0/479 [00:00<?, ? examples/s]

=====Mockito-22=====
<PRE>     public static boolean areEqual(Object o1, Object o2) {
 <SUF>            return o1 == null && o2 == null;
        } else if (isArray(o1)) {
            return isArray(o2) && areArraysEqual(o1, o2);
        } else {
            return o1.equals(o2);
        }
    }
 <MID>
fix:  if (o1 == o2 ) {
             return true;
 	} else if (o1 == null || o2 == null) {
-----------------
       if (o1 == o2) {
            return true;
        } else if (o1 == null) {



Filter:   0%|          | 0/479 [00:00<?, ? examples/s]

=====Compress-26=====
<PRE>     public static long skip(InputStream input, long numToSkip) throws IOException {
        long available = numToSkip;
        while (numToSkip > 0) {
            long skipped = input.skip(numToSkip);
            if (skipped == 0) {
                break;
            }
            numToSkip -= skipped;
        }
 <SUF>        return available - numToSkip;
    }
 <MID>
fix:  if (numToSkip > 0) {
             byte[] skipBuf = new byte[SKIP_BUF_SIZE];
             while (numToSkip > 0) {
                 int read = readFully(input, skipBuf, 0,
                                      (int) Math.min(numToSkip, SKIP_BUF_SIZE));
                 if (read < 1) {
                     break;
                 }
                 numToSkip -= read;
             }
         }
-----------------
       if (numToSkip > 0) {
            while (numToSkip > 0) {
                int skipped = input.read();
                if (skipped < 0) {
                    break;
             

Filter:   0%|          | 0/479 [00:00<?, ? examples/s]

=====Closure-58=====
<PRE>   private void computeGenKill(Node n, BitSet gen, BitSet kill,
      boolean conditional) {
    switch (n.getType()) {
      case Token.SCRIPT:
      case Token.BLOCK:
      case Token.FUNCTION:
        return;
      case Token.WHILE:
      case Token.DO:
      case Token.IF:
        computeGenKill(NodeUtil.getConditionExpression(n), gen, kill,
            conditional);
        return;
      case Token.FOR:
        if (!NodeUtil.isForIn(n)) {
          computeGenKill(NodeUtil.getConditionExpression(n), gen, kill,
              conditional);
        } else {
          Node lhs = n.getFirstChild();
          Node rhs = lhs.getNext();
          if (NodeUtil.isVar(lhs)) {
            lhs = lhs.getLastChild();
          }
 <SUF>          computeGenKill(rhs, gen, kill, conditional);
        }
        return;
      case Token.VAR:
        for (Node c = n.getFirstChild(); c != null; c = c.getNext()) {
          if (c.hasChildren()) {
            computeGenKill(c.getF

Filter:   0%|          | 0/479 [00:00<?, ? examples/s]

=====Lang-37=====
<PRE>     public static <T> T[] addAll(T[] array1, T... array2) {
        if (array1 == null) {
            return clone(array2);
        } else if (array2 == null) {
            return clone(array1);
        }
        final Class<?> type1 = array1.getClass().getComponentType();
        T[] joinedArray = (T[]) Array.newInstance(type1, array1.length + array2.length);
        System.arraycopy(array1, 0, joinedArray, 0, array1.length);
 <SUF>        return joinedArray;
    }
 <MID>
fix:  try {
             System.arraycopy(array2, 0, joinedArray, array1.length, array2.length);
         } catch (ArrayStoreException ase) {
             final Class<?> type2 = array2.getClass().getComponentType();
             if (!type1.isAssignableFrom(type2)){
                 throw new IllegalArgumentException("Cannot store "+type2.getName()+" in an array of "+type1.getName());
             }
             throw ase; // No, so rethrow original
         }
-----------------
       try {
   

Filter:   0%|          | 0/479 [00:00<?, ? examples/s]

=====JacksonXml-4=====
<PRE> // class information of method
public class XmlSerializerProvider extends DefaultSerializerProvider{
    private long serialVersionUID
    QName ROOT_NAME_FOR_NULL
    XmlRootNameLookup _rootNameLookup
    public DefaultSerializerProvider createInstance(SerializationConfig config, SerializerFactory jsf)
    public void serializeValue(JsonGenerator gen, Object value)
    public void serializeValue(JsonGenerator gen, Object value, JavaType rootType)
    public void serializeValue(JsonGenerator gen, Object value, JavaType rootType, JsonSerializer<Object> ser)
    protected void _serializeXmlNull(JsonGenerator jgen)
    protected void _startRootArray(ToXmlGenerator xgen, QName rootName)
    protected void _initWithRootName(ToXmlGenerator xgen, QName rootName)
    protected QName _rootNameFromConfig()
    protected ToXmlGenerator _asXmlGenerator(JsonGenerator gen)
}

// method to be repaired
    protected void _serializeXmlNull(JsonGenerator jgen) throws IOExcep

Filter:   0%|          | 0/479 [00:00<?, ? examples/s]

=====Lang-40=====
<PRE> // class information of method
public class StringUtils{
    public String EMPTY
    public int INDEX_NOT_FOUND
    private int PAD_LIMIT
    public static boolean isEmpty(CharSequence str)
    public static boolean isNotEmpty(CharSequence str)
    public static boolean isBlank(CharSequence str)
    public static boolean isNotBlank(CharSequence str)
    public static String trim(String str)
    public static String trimToNull(String str)
    public static String trimToEmpty(String str)
    public static String strip(String str)
    public static String stripToNull(String str)
    public static String stripToEmpty(String str)
    public static String strip(String str, String stripChars)
    public static String stripStart(String str, String stripChars)
    public static String stripEnd(String str, String stripChars)
    public static String[] stripAll(String[] strs)
    public static String[] stripAll(String[] strs, String stripChars)
    public static boolean eq

Filter:   0%|          | 0/479 [00:00<?, ? examples/s]

=====Closure-109=====
<PRE> // class information of method
public class JsDocInfoParser{
    private JsDocTokenStream stream
    private JSDocInfoBuilder jsdocBuilder
    private StaticSourceFile sourceFile
    private Node associatedNode
    private ErrorReporter errorReporter
    private ErrorReporterParser parser
    private Node templateNode
    private JSDocInfo fileOverviewJSDocInfo
    private State state
    private Map<String, Annotation> annotationNames
    private Set<String> suppressionNames
    private Set<String> modifiesAnnotationKeywords
    private Set<String> idGeneratorAnnotationKeywords
    private Node.FileLevelJsDocBuilder fileLevelJsDocBuilder
    private JsDocToken NO_UNREAD_TOKEN
    private JsDocToken unreadToken
    void setFileLevelJsDocBuilder(Node.FileLevelJsDocBuilder fileLevelJsDocBuilder)
    void setFileOverviewJSDocInfo(JSDocInfo fileOverviewJSDocInfo)
    private String getSourceName()
    public JSDocInfo parseInlineTypeDoc()
    private void record

Filter:   0%|          | 0/479 [00:00<?, ? examples/s]

=====Lang-38=====
<PRE> // class information of method
public class FastDateFormat extends Format{
    private long serialVersionUID
    public int FULL
    public int LONG
    public int MEDIUM
    public int SHORT
    private String cDefaultPattern
    private Map<FastDateFormat, FastDateFormat> cInstanceCache
    private Map<Object, FastDateFormat> cDateInstanceCache
    private Map<Object, FastDateFormat> cTimeInstanceCache
    private Map<Object, FastDateFormat> cDateTimeInstanceCache
    private Map<Object, String> cTimeZoneDisplayCache
    private String mPattern
    private TimeZone mTimeZone
    private boolean mTimeZoneForced
    private Locale mLocale
    private boolean mLocaleForced
    private Rule[] mRules
    private int mMaxLengthEstimate
    public static FastDateFormat getInstance()
    public static FastDateFormat getInstance(String pattern)
    public static FastDateFormat getInstance(String pattern, TimeZone timeZone)
    public static FastDateFormat getInstance(S

Filter:   0%|          | 0/479 [00:00<?, ? examples/s]

=====Jsoup-49=====
<PRE> protected void addChildren(int index, Node... children) {
        Validate.noNullElements(children);
        ensureChildNodes();
        for (int i = children.length - 1; i >= 0; i--) {
            Node in = children[i];
            reparentChild(in);
            childNodes.add(index, in);
// buggy lines
// }
//        reindexChildren(index); <SUF>} <MID>
fix:  reindexChildren(index);
         
     }
-----------------

reindexChildren(index);
}


Filter:   0%|          | 0/479 [00:00<?, ? examples/s]

=====Closure-65=====
  static String strEscape(String s, char quote,
                          String doublequoteEscape,
                          String singlequoteEscape,
                          String backslashEscape,
                          CharsetEncoder outputCharsetEncoder) {
    StringBuilder sb = new StringBuilder(s.length() + 2);
    sb.append(quote);
    for (int i = 0; i < s.length(); i++) {
      char c = s.charAt(i);
      switch (c) {
// buggy code
//        case '\0': sb.append("\\0"); break;
<FILL_ME>
        case '\n': sb.append("\\n"); break;
        case '\r': sb.append("\\r"); break;
        case '\t': sb.append("\\t"); break;
        case '\\': sb.append(backslashEscape); break;
        case '\"': sb.append(doublequoteEscape); break;
        case '\'': sb.append(singlequoteEscape); break;
        case '>':                       
          if (i >= 2 &&
              ((s.charAt(i - 1) == '-' && s.charAt(i - 2) == '-') ||
               (s.charAt(i - 1) == ']' &

Filter:   0%|          | 0/479 [00:00<?, ? examples/s]

=====Time-18=====
    public long getDateTimeMillis(int year, int monthOfYear, int dayOfMonth,
                                  int hourOfDay, int minuteOfHour,
                                  int secondOfMinute, int millisOfSecond)
        throws IllegalArgumentException
    {
        Chronology base;
        if ((base = getBase()) != null) {
            return base.getDateTimeMillis
                (year, monthOfYear, dayOfMonth,
                 hourOfDay, minuteOfHour, secondOfMinute, millisOfSecond);
        }
        long instant;
// buggy code
//            instant = iGregorianChronology.getDateTimeMillis
//                (year, monthOfYear, dayOfMonth,
//                 hourOfDay, minuteOfHour, secondOfMinute, millisOfSecond);
<FILL_ME>
        if (instant < iCutoverMillis) {
            instant = iJulianChronology.getDateTimeMillis
                (year, monthOfYear, dayOfMonth,
                 hourOfDay, minuteOfHour, secondOfMinute, millisOfSecond);
            if (ins

Filter:   0%|          | 0/479 [00:00<?, ? examples/s]

=====JacksonDatabind-24=====
<PRE> // class information of method
public class BaseSettings implements Serializable{
    private long serialVersionUID
    ClassIntrospector _classIntrospector
    AnnotationIntrospector _annotationIntrospector
    VisibilityChecker<?> _visibilityChecker
    PropertyNamingStrategy _propertyNamingStrategy
    TypeFactory _typeFactory
    TypeResolverBuilder<?> _typeResolverBuilder
    DateFormat _dateFormat
    HandlerInstantiator _handlerInstantiator
    Locale _locale
    TimeZone _timeZone
    Base64Variant _defaultBase64
    public BaseSettings withClassIntrospector(ClassIntrospector ci)
    public BaseSettings withAnnotationIntrospector(AnnotationIntrospector ai)
    public BaseSettings withInsertedAnnotationIntrospector(AnnotationIntrospector ai)
    public BaseSettings withAppendedAnnotationIntrospector(AnnotationIntrospector ai)
    public BaseSettings withVisibilityChecker(VisibilityChecker<?> vc)
    public BaseSettings withVisibility(PropertyAc

Filter:   0%|          | 0/479 [00:00<?, ? examples/s]

=====Lang-16=====
<PRE> // class information of method
public class NumberUtils{
    public Long LONG_ZERO
    public Long LONG_ONE
    public Long LONG_MINUS_ONE
    public Integer INTEGER_ZERO
    public Integer INTEGER_ONE
    public Integer INTEGER_MINUS_ONE
    public Short SHORT_ZERO
    public Short SHORT_ONE
    public Short SHORT_MINUS_ONE
    public Byte BYTE_ZERO
    public Byte BYTE_ONE
    public Byte BYTE_MINUS_ONE
    public Double DOUBLE_ZERO
    public Double DOUBLE_ONE
    public Double DOUBLE_MINUS_ONE
    public Float FLOAT_ZERO
    public Float FLOAT_ONE
    public Float FLOAT_MINUS_ONE
    public static int toInt(String str)
    public static int toInt(String str, int defaultValue)
    public static long toLong(String str)
    public static long toLong(String str, long defaultValue)
    public static float toFloat(String str)
    public static float toFloat(String str, float defaultValue)
    public static double toDouble(String str)
    public static double toDou

Filter:   0%|          | 0/479 [00:00<?, ? examples/s]

=====JacksonDatabind-39=====
<PRE> // class information of method
public class NullifyingDeserializer extends StdDeserializer{
    private long serialVersionUID
    public NullifyingDeserializer instance
    public Object deserialize(JsonParser p, DeserializationContext ctxt)
    public Object deserializeWithType(JsonParser p, DeserializationContext ctxt, TypeDeserializer typeDeserializer)
}

// method to be repaired
    public Object deserialize(JsonParser p, DeserializationContext ctxt) throws IOException
    {
// buggy lines
// p.skipChildren(); <SUF>        return null;
    } <MID>
fix:  if (p.hasToken(JsonToken.FIELD_NAME)) {
             while (true) {
                 JsonToken t = p.nextToken();
                 if ((t == null) || (t == JsonToken.END_OBJECT)) {
                     break;
                 }
                 p.skipChildren();
             }
         } else {
             p.skipChildren();
         }
-----------------

        p.readValueAs(Object.class);

------

Filter:   0%|          | 0/479 [00:00<?, ? examples/s]

=====Lang-55=====
<PRE> // class information of method
public class StopWatch{
    private int STATE_UNSTARTED
    private int STATE_RUNNING
    private int STATE_STOPPED
    private int STATE_SUSPENDED
    private int STATE_UNSPLIT
    private int STATE_SPLIT
    private int runningState
    private int splitState
    private long startTime
    private long stopTime
    public void start()
    public void stop()
    public void reset()
    public void split()
    public void unsplit()
    public void suspend()
    public void resume()
    public long getTime()
    public long getSplitTime()
    public String toString()
    public String toSplitString()
}

// method to be repaired
    public void stop() {
        if(this.runningState != STATE_RUNNING && this.runningState != STATE_SUSPENDED) {
            throw new IllegalStateException("Stopwatch is not running. ");
        }
// buggy lines
// stopTime = System.currentTimeMillis(); <SUF>        this.runningState = STATE_STOPPED;
    } 

Filter:   0%|          | 0/479 [00:00<?, ? examples/s]

=====Math-105=====
<PRE> // class information of method
public class SimpleRegression implements Serializable{
    private long serialVersionUID
    private double sumX
    private double sumXX
    private double sumY
    private double sumYY
    private double sumXY
    private long n
    private double xbar
    private double ybar
    public void addData(double x, double y)
    public void addData(double[][] data)
    public void clear()
    public long getN()
    public double predict(double x)
    public double getIntercept()
    public double getSlope()
    public double getSumSquaredErrors()
    public double getTotalSumSquares()
    public double getRegressionSumSquares()
    public double getMeanSquareError()
    public double getR()
    public double getRSquare()
    public double getInterceptStdErr()
    public double getSlopeStdErr()
    public double getSlopeConfidenceInterval()
    public double getSlopeConfidenceInterval(double alpha)
    public double getSignificance()
 

Filter:   0%|          | 0/479 [00:00<?, ? examples/s]

=====Codec-2=====
<PRE> // class information of method
public class Base64 implements BinaryEncoder, BinaryDecoder{
    int CHUNK_SIZE
    byte[] CHUNK_SEPARATOR
    private byte[] STANDARD_ENCODE_TABLE
    private byte[] URL_SAFE_ENCODE_TABLE
    private byte PAD
    private byte[] DECODE_TABLE
    private int MASK_6BITS
    private int MASK_8BITS
    private byte[] encodeTable
    private int lineLength
    private byte[] lineSeparator
    private int decodeSize
    private int encodeSize
    private byte[] buf
    private int pos
    private int readPos
    private int currentLinePos
    private int modulus
    private boolean eof
    private int x
    public boolean isUrlSafe()
    boolean hasData()
    int avail()
    private void resizeBuf()
    int readResults(byte[] b, int bPos, int bAvail)
    void setInitialBuffer(byte[] out, int outPos, int outAvail)
    void encode(byte[] in, int inPos, int inAvail)
    void decode(byte[] in, int inPos, int inAvail)
    public static boolea

Filter:   0%|          | 0/479 [00:00<?, ? examples/s]

=====Compress-15=====
    public boolean equals(Object obj) {
        if (this == obj) {
            return true;
        }
        if (obj == null || getClass() != obj.getClass()) {
            return false;
        }
        ZipArchiveEntry other = (ZipArchiveEntry) obj;
        String myName = getName();
        String otherName = other.getName();
        if (myName == null) {
            if (otherName != null) {
                return false;
            }
        } else if (!myName.equals(otherName)) {
            return false;
        }
        String myComment = getComment();
        String otherComment = other.getComment();
        if (myComment == null) {
// buggy code
//            if (otherComment != null) {
//                return false;
//            }
//        } else if (!myComment.equals(otherComment)) {
//            return false;
//        }
//        return getTime() == other.getTime()
<FILL_ME>
            && getInternalAttributes() == other.getInternalAttributes()


Filter:   0%|          | 0/479 [00:00<?, ? examples/s]

=====Time-20=====
        public int parseInto(DateTimeParserBucket bucket, String text, int position) {
            String str = text.substring(position);
// buggy code
//            for (String id : ALL_IDS) {
//                if (str.startsWith(id)) {
//                    bucket.setZone(DateTimeZone.forID(id));
//                    return position + id.length();
//                }
<FILL_ME>
            }
            return ~position;
        }

fix:  String best = null;
             for (String id : ALL_IDS) {
                 if (str.startsWith(id)) {
                 	if (best == null || id.length() > best.length()) {
                 		best = id;
                 	}
                 }
             }
             if (best != null) {
                 bucket.setZone(DateTimeZone.forID(best));
                 return position + best.length();
-----------------
for (int i = str.length() - 1; i >= 0; i--) {
                String id = str.substring(0, i + 1);
                if (AL

Filter:   0%|          | 0/479 [00:00<?, ? examples/s]

=====Closure-77=====
  static String strEscape(String s, char quote,
                          String doublequoteEscape,
                          String singlequoteEscape,
                          String backslashEscape,
                          CharsetEncoder outputCharsetEncoder) {
    StringBuilder sb = new StringBuilder(s.length() + 2);
    sb.append(quote);
    for (int i = 0; i < s.length(); i++) {
      char c = s.charAt(i);
      switch (c) {
// buggy code
<FILL_ME>
        case '\n': sb.append("\\n"); break;
        case '\r': sb.append("\\r"); break;
        case '\t': sb.append("\\t"); break;
        case '\\': sb.append(backslashEscape); break;
        case '\"': sb.append(doublequoteEscape); break;
        case '\'': sb.append(singlequoteEscape); break;
        case '>':                       
          if (i >= 2 &&
              ((s.charAt(i - 1) == '-' && s.charAt(i - 2) == '-') ||
               (s.charAt(i - 1) == ']' && s.charAt(i - 2) == ']'))) {
            sb.a

Filter:   0%|          | 0/479 [00:00<?, ? examples/s]

=====Jsoup-80=====
    void insert(Token.Comment commentToken) {
        Comment comment = new Comment(commentToken.getData());
        Node insert = comment;
        if (commentToken.bogus) { 
            String data = comment.getData();
            if (data.length() > 1 && (data.startsWith("!") || data.startsWith("?"))) {
                Document doc = Jsoup.parse("<" + data.substring(1, data.length() -1) + ">", baseUri, Parser.xmlParser());
// buggy code
//                    Element el = doc.child(0);
//                    insert = new XmlDeclaration(settings.normalizeTag(el.tagName()), data.startsWith("!"));
//                    insert.attributes().addAll(el.attributes());
<FILL_ME>
            }
        }
        insertNode(insert);
    }

fix:  id insert(Token.Comment commentToken) {
         Comment comment = new Comment(commentToken.getData());
         Node insert = comment;
         if (commentToken.bogus) { // xml declarations are emitted as bogus comments (which is right 

Filter:   0%|          | 0/479 [00:00<?, ? examples/s]

=====Math-69=====
    public RealMatrix getCorrelationPValues() throws MathException {
        TDistribution tDistribution = new TDistributionImpl(nObs - 2);
        int nVars = correlationMatrix.getColumnDimension();
        double[][] out = new double[nVars][nVars];
        for (int i = 0; i < nVars; i++) {
            for (int j = 0; j < nVars; j++) {
                if (i == j) {
                    out[i][j] = 0d;
                } else {
                    double r = correlationMatrix.getEntry(i, j);
                    double t = Math.abs(r * Math.sqrt((nObs - 2)/(1 - r * r)));
// buggy code
//                    out[i][j] = 2 * (1 - tDistribution.cumulativeProbability(t));
<FILL_ME>
                }
            }
        }
        return new BlockRealMatrix(out);
    }

fix:  out[i][j] = 2 * tDistribution.cumulativeProbability(-t);
-----------------
out[i][j] = 2 * tDistribution.cumulativeProbability(-t);


Filter:   0%|          | 0/479 [00:00<?, ? examples/s]

=====Compress-7=====
    public static String parseName(byte[] buffer, final int offset, final int length) {
        StringBuffer result = new StringBuffer(length);
        int          end = offset + length;
        for (int i = offset; i < end; ++i) {
// buggy code
//            if (buffer[i] == 0) {
//                break;
//            }
//            result.append((char) buffer[i]);
<FILL_ME>
        }
        return result.toString();
    }

fix:  byte b = buffer[i];
             if (b == 0) { // Trailing null
                 break;
             }
             result.append((char) (b & 0xFF)); // Allow for sign-extension
-----------------
if (buffer[i] == 0) {
                break;
            }
            result.append((char) (buffer[i] & 0xff));
-----------------
if (buffer[i] == 0) {
                break;
            }
            result.append((char) (buffer[i] & 0xFF));


Filter:   0%|          | 0/479 [00:00<?, ? examples/s]

=====Mockito-29=====
    public void describeTo(Description description) {
        description.appendText("same(");
        appendQuoting(description);
// buggy code
//        description.appendText(wanted.toString());
<FILL_ME>
        appendQuoting(description);
        description.appendText(")");
    }

fix:  description.appendText(wanted == null ? "null" : wanted.toString());
-----------------
description.appendText(String.valueOf(wanted));
-----------------
description.appendText(wanted == null ? "null" : wanted.toString());


Filter:   0%|          | 0/479 [00:00<?, ? examples/s]

=====JacksonDatabind-93=====
    public void validateSubType(DeserializationContext ctxt, JavaType type) throws JsonMappingException
    {
        final Class<?> raw = type.getRawClass();
        String full = raw.getName();
        main_check:
        do {
            if (_cfgIllegalClassNames.contains(full)) {
                break;
            }
// buggy code
//            if (full.startsWith(PREFIX_STRING)) {
//                for (Class<?> cls = raw; cls != Object.class; cls = cls.getSuperclass()) {
<FILL_ME>
                    String name = cls.getSimpleName();
                    if ("AbstractPointcutAdvisor".equals(name)
                            || "AbstractApplicationContext".equals(name)) {
                        break main_check;
                    }
                }
            }
            return;
        } while (false);
        throw JsonMappingException.from(ctxt,
                String.format("Illegal type (%s) to deserialize: prevented for security reasons", f

Filter:   0%|          | 0/479 [00:00<?, ? examples/s]

=====Jsoup-70=====
    static boolean preserveWhitespace(Node node) {
        if (node != null && node instanceof Element) {
            Element el = (Element) node;
// buggy code
//                if (el.tag.preserveWhitespace())
//                    return true;
//                else
//                    return el.parent() != null && el.parent().tag.preserveWhitespace();
<FILL_ME>
        }
        return false;
    }

fix:  int i = 0;
             do {
                 if (el.tag.preserveWhitespace())
                     return true;
                 el = el.parent();
                 i++;
             } while (i < 6 && el != null);
-----------------
while (el != null) {
                if (el.tag.preserveWhitespace())
                    return true;
                el = el.parent();
            }
-----------------
while (el != null) {
                if (el.tag.preserveWhitespace())
                    return true;
                else
                    el = el.parent();
   

Filter:   0%|          | 0/479 [00:00<?, ? examples/s]

=====Jsoup-85=====
    public Attribute(String key, String val, Attributes parent) {
        Validate.notNull(key);
// buggy code
//        this.key = key.trim();
//        Validate.notEmpty(key); 
<FILL_ME>
        this.val = val;
        this.parent = parent;
    }

fix:  key = key.trim();
         Validate.notEmpty(key); // trimming could potentially make empty, so validate here
         this.key = key;
-----------------
this.key = key.trim();
        Validate.notEmpty(this.key);


# repairllama check

In [54]:
repairllama_result=load_dataset('json', data_files='/Users/17988/PycharmProjects/repairllama/results/defects4j/repairllama/lora/RepairLLaMA_defects4j_f2f_bugs_results_ir4_or2.jsonl', split='all')

Found cached dataset json (C:/Users/17988/.cache/huggingface/datasets/json/default-bfbb90bc52752341/0.0.0/e347ab1c932092252e717ff3f949105a4dd28b27e842dd53157d2f72e276c2e4)


In [60]:
plausible_id_list=[]
for sample in repairllama_result:
    bug_id, res=sample['bug_id'], sample['test_results']
    if 'Line match' in res or 'Plausible' in res or 'AST match' in res:
        plausible_id_list.append(bug_id) 

In [61]:
len(plausible_id_list)

196

In [None]:
for sample in repairllama_paper:
    bug_id, res=sample['bug_id'], sample['test_res']
    if bug_id in plausible_id_list:
        different=True
        for patch in res:
            if patch['correctness']=='plausible':
                different=False
                break
        if different:
            print(sample['input'])
            print('fix: ', sample['fix_code'])
            for i in res:
                del i['test_message']
                print(i)