In [1]:
import pandas as pd
df = pd.read_csv('./data/all_patches.csv')

In [2]:
df = df.drop_duplicates(subset=["patch"], keep="first")

In [3]:
df["agent"].describe()

count                                 46238
unique                                   92
top       20240620_sweagent_claude3.5sonnet
freq                                   2218
Name: agent, dtype: object

In [5]:
labeled_df = pd.read_csv('./data/ensembled_annotations_public.csv')

# Filter out the rows where the 'instance_id' is not in the labeled_df
df = df[df['instance_id'].isin(labeled_df['instance_id'])]

In [6]:
len(df)

40270

In [7]:
df['instance_id'].value_counts()

instance_id
django__django-11848                90
astropy__astropy-14995              89
sympy__sympy-17655                  88
django__django-14999                88
sympy__sympy-15345                  88
                                    ..
sphinx-doc__sphinx-8579              3
sphinx-doc__sphinx-8611              3
scikit-learn__scikit-learn-13933     3
scikit-learn__scikit-learn-12656     3
pytest-dev__pytest-8250              2
Name: count, Length: 1699, dtype: int64

In [8]:
df["resolved"].value_counts()

resolved
False    25432
True     14838
Name: count, dtype: int64

In [9]:
input_bouncer_df = pd.read_csv('./dataset/input_bouncer.csv')
evaluatable_df = input_bouncer_df[
    (input_bouncer_df['false_negative'] != 2.0) &
    (input_bouncer_df['false_negative'] != 3.0) &
    (input_bouncer_df['other_major_issues'] != 1.0)]
len(evaluatable_df)

648

In [10]:
input_bounced = evaluatable_df[
    (evaluatable_df['underspecified'] == 2.0) |
    (evaluatable_df['underspecified'] == 3.0)
]
len(input_bounced)

109

Random Sample

In [11]:
new_dataset = pd.DataFrame()
for _instance_id in evaluatable_df['instance_id'].unique():
    _options = df[df['instance_id'] == _instance_id]
    # if _instance_id is in input_bounced['instance_id'].unique():
    # _options must be only the resolved = False
    if _instance_id in input_bounced['instance_id'].unique():
        if len(_options[_options['resolved'] == False]) == 0:
            print(f"Skipping instance_id {_instance_id} because all options {len(_options)} are resolved")
            continue
        else:
            _sample = _options[_options['resolved'] == False].sample(1, random_state=42)
    else:
        _sample = _options.sample(1, random_state=42)
    new_dataset = pd.concat([new_dataset, _sample], ignore_index=True)
new_dataset = new_dataset.reset_index(drop=True)

Skipping instance_id django__django-12231 because all options 6 are resolved
Skipping instance_id django__django-16588 because all options 7 are resolved
Skipping instance_id django__django-9871 because all options 6 are resolved
Skipping instance_id psf__requests-1537 because all options 7 are resolved
Skipping instance_id scikit-learn__scikit-learn-11333 because all options 7 are resolved
Skipping instance_id sympy__sympy-24455 because all options 7 are resolved


In [12]:
def get_problem_statement(instance_id):
    return input_bouncer_df[input_bouncer_df['instance_id'] == instance_id]['problem_statement'].values[0]
def get_repo(instance_id):
    return input_bouncer_df[input_bouncer_df['instance_id'] == instance_id]['repo'].values[0]
def get_base_commit(instance_id):
    return input_bouncer_df[input_bouncer_df['instance_id'] == instance_id]['base_commit'].values[0]
new_dataset['problem_statement'] = new_dataset['instance_id'].apply(get_problem_statement)
new_dataset['repo'] = new_dataset['instance_id'].apply(get_repo)
new_dataset["base_commit"] = new_dataset["instance_id"].apply(get_base_commit)

In [13]:
def parse_frac(ts):
    d = eval(ts)
    succ = len(d["FAIL_TO_PASS"]["success"]) + len(d["PASS_TO_PASS"]["success"])
    tot = succ + len(d["FAIL_TO_PASS"]["failure"]) + len(d["PASS_TO_PASS"]["failure"])
    return succ / tot if tot > 0 else 0.0


pass_frac = new_dataset["tests_status"].apply(parse_frac)
new_dataset["output_quality"] = pass_frac
new_dataset["output_bounce"] = pass_frac < 1.0

In [14]:
new_dataset.to_csv('./dataset/random_sample_bouncer.csv', index=False)

In [15]:
new_dataset["output_bounce"].value_counts()

output_bounce
True     405
False    237
Name: count, dtype: int64