<a href="https://colab.research.google.com/github/yiw008/nondet-project/blob/main/Go_Through_IFixFlakies.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import os
import requests
import csv
import random
from copy import deepcopy
import json

In [None]:
# URL of the CSV file: iPFlakies
url = "https://zenodo.org/records/6176417/files/Test_Status.csv"

In [None]:
# Read the CSV file from the URL
data = pd.read_csv(url)

In [None]:
# Display the first few rows of the data
print(data.head())

  Project_Name                               Project_URL  \
0   BT-Tracker  https://github.com/nordwind80/BT-Tracker   
1      Breathe         https://github.com/mrob95/Breathe   
2      Breathe         https://github.com/mrob95/Breathe   
3      Breathe         https://github.com/mrob95/Breathe   
4      Breathe         https://github.com/mrob95/Breathe   

                               Project_Hash  \
0  558c15b399871c1ca11d0c4ae1eb598e3060931e   
1  4600818e24f4156cd7bb8cc0f43886b27323968e   
2  4600818e24f4156cd7bb8cc0f43886b27323968e   
3  4600818e24f4156cd7bb8cc0f43886b27323968e   
4  4600818e24f4156cd7bb8cc0f43886b27323968e   

                                             Test_id  Detected  Have_Patch  \
0  Tracker/tests/test_event.py::TestEvent::test_o...      True        True   
1  tests/test_command_context.py::test_manual_con...      True       False   
2                tests/test_loading.py::test_loading      True        True   
3        tests/test_loading.py::test_loading

In [None]:
def match_class(content, class_name):
  start_index = content.find(f"class {class_name}")
  if start_index == -1:
    return None
  return content[start_index:]

In [None]:
def match_method(content, test_name):
  start_index = content.find(f"def {test_name}")
  if start_index == -1:
    return None

  end_index = -1
  next_def_index = content.find("def ", start_index + len(test_name))
  main_block_index = content.find("if __name__ == '__main__':", start_index)

  if next_def_index != -1:
    end_index = next_def_index
  elif main_block_index != -1:
    end_index = main_block_index
  else:
    end_index = len(content)

  return content[start_index:end_index]

In [None]:
def remove_square_brackets(test_name):
  start_index = test_name.find('[')
  end_index = test_name.find(']')

  if start_index != -1 and end_index != -1:
    new_test_name = test_name[:start_index]
    return new_test_name
  else:
    return test_name

In [None]:
def read_python_file(url):
  try:
    response = requests.get(url)
    response.raise_for_status()  # Check for HTTP errors
    # The content of the file is in response.text
    python_code = response.text
    return python_code
  except requests.exceptions.RequestException as e:
    print(f"Error fetching the file: {e}")
    return None

In [None]:
def special_cases(project_name, repo_url, commit_hash, test_file_path, class_name, test_name):
  if project_name == 'Butter.MAS.PythonAPI' and commit_hash == 'f86ebe75df3826f62a268645cdbe4400b43fab07' and test_file_path.startswith('butter/mas/tests/clients/'):
    test_file_path = 'butter/mas/tests/clients/client_test.py'
    class_name = ''

  if project_name == 'SNData' and commit_hash == 'e4854f0dc357484b437b15f9dac15f7c589eff58' and (class_name == 'Sako18Parsing' or class_name == 'DR1Parsing'):
    test_file_path = 'tests/data_parsing_template_tests.py'
    class_name = ''
    if test_name == 'test_standard_column_names':
      class_name = 'PhotometricDataParsing'

  if project_name == 'bootstrap_env' and commit_hash == 'ab68025d8f6b9a17d8feeed83e8aae26e3f28769' and test_file_path.startswith('bootstrap_env/tests/'):
    test_file_path = 'bootstrap_env/tests/base.py'

  if repo_url.endswith('.git'):
    repo_url = repo_url[:-4]

  if project_name == 'data-pypes' and test_name == 'pypes.logsetup.get_logconfig':
    test_name = 'get_logconfig'

  if project_name == 'elife-tools':
    end_index = test_name.find('_1_elife_02833_v2_xml')
    test_name = test_name[:end_index]

  if project_name == 'noipy':
    repo_url = 'https://github.com/pv8/noipy'

  if project_name == 'pymq' and commit_hash == '101857bca2b705c328d3bda3b26797b51e8ffb70':
    if class_name == 'SimplePubSubTest':
      test_file_path = 'tests/base/pubsub.py'
      class_name = 'AbstractPubSubTest'
    if class_name == 'IpcQueueTest':
      test_file_path = 'tests/base/queue.py'
      class_name = 'AbstractQueueTest'
    if class_name == 'IpcRpcTest' or class_name == 'SimpleRpcTest':
      test_file_path = 'tests/base/rpc.py'
      class_name = 'AbstractRpcTest'

  if project_name == 'pyswarms' and commit_hash == '08756526f39699eef28e515cac2ead17cef55710' and class_name == 'TestLocalBestOptimizer' and test_name == 'test_obj_with_kwargs':
    test_file_path = 'tests/optimizers/abc_test_optimizer.py'
    class_name = 'ABCTestOptimizer'

  if project_name == 'python-openflow' and commit_hash == 'a3387a7b28d529a3605aa1506a028e03394e4526' and class_name == 'TestFlowMod' and test_name == 'test_minimum_size':
    test_file_path = 'tests/unit/test_struct.py'
    class_name = 'TestStruct'

  return project_name, repo_url, commit_hash, test_file_path, class_name, test_name

In [None]:
all_test_methods = []
print('The indices below corresponds to the row number in the csv file shown on https://sites.google.com/view/ipflakies.')

for index, row in data.iterrows():
  row_in_csv = index + 2
  project_name = row['Project_Name']
  repo_url = row['Project_URL']
  commit_hash = row['Project_Hash']
  if len(row['Test_id'].split('::')) > 2:
    test_file_path = row['Test_id'].split('::')[0]
    class_name = row['Test_id'].split('::')[1]
    test_name = row['Test_id'].split('::')[2]
  else:
    test_file_path = row['Test_id'].split('::')[0]
    class_name = ''
    test_name = row['Test_id'].split('::')[1]
  test_name = remove_square_brackets(test_name)
  detected = row['Detected']

  test_file_path_rec = test_file_path
  class_name_rec = class_name
  test_name_rec = test_name

  project_name, repo_url, commit_hash, test_file_path, class_name, test_name = special_cases(project_name, repo_url, commit_hash, test_file_path, class_name, test_name)

  new_url = repo_url.replace('github.com', 'raw.githubusercontent.com')
  new_url += '/' + commit_hash + '/' + test_file_path

  content = read_python_file(new_url)
  if content is None:
    print(f"Row {row_in_csv}: {repo_url}/blob/{commit_hash}/{test_file_path}, Error fetching the file\n")
    continue

  # Class content
  if class_name != '':
    content = match_class(content, class_name)
    if content is None:
      print(f"Row {row_in_csv}: In {repo_url}/blob/{commit_hash}/{test_file_path}, Class '{class_name}' not found.")
      continue

  # Test method content
  content = match_method(content, test_name)
  if content is None:
    print(f"Row {row_in_csv}: In {repo_url}/blob/{commit_hash}/{test_file_path}, Class '{class_name}' , Test function '{test_name}' not found.")
    continue

  test_method = {}
  test_method['Row'] = row_in_csv
  test_method['URL'] = repo_url + '/blob/' + commit_hash + '/' + test_file_path_rec
  test_method['Class'] = class_name_rec
  test_method['Test'] = test_name_rec
  test_method['Content'] = content
  test_method['Detected'] = detected
  all_test_methods.append(test_method)

The indices below corresponds to the row number in the csv file shown on https://sites.google.com/view/ipflakies.
Error fetching the file: 404 Client Error: Not Found for url: https://raw.githubusercontent.com/AshtonUPS/Py-MI-PS/2d22327c75bac1b58a4804a61e7a703ecc5ba978/src/PyMIPS/tests/register_test.py
Row 135: https://github.com/AshtonUPS/Py-MI-PS/blob/2d22327c75bac1b58a4804a61e7a703ecc5ba978/src/PyMIPS/tests/register_test.py, Error fetching the file

Error fetching the file: 404 Client Error: Not Found for url: https://raw.githubusercontent.com/cryptowatch/cw-sdk-python/92bd90db16dfc116c0708d19d27208d9bfc990c1/tests/test_api.py
Row 516: https://github.com/cryptowatch/cw-sdk-python/blob/92bd90db16dfc116c0708d19d27208d9bfc990c1/tests/test_api.py, Error fetching the file

Error fetching the file: 404 Client Error: Not Found for url: https://raw.githubusercontent.com/cryptowatch/cw-sdk-python/92bd90db16dfc116c0708d19d27208d9bfc990c1/tests/test_api.py
Row 517: https://github.com/cryptowat

In [None]:
filename = "all_test_methods.csv"
with open(filename, mode="w", newline="") as file:
  writer = csv.DictWriter(file, fieldnames=all_test_methods[0].keys())
  writer.writeheader()
  writer.writerows(all_test_methods)

In [None]:
# Only use this code block if you are using Google Colab.
# If you are using Jupyter Notebook, please ignore this code block. You can directly upload the file to your Jupyter Notebook file systems.
from google.colab import files
import pandas as pd
import os
import requests
import csv
import random
from copy import deepcopy
import json

if not os.path.exists('all_test_methods.csv'):
  ## It will prompt you to select a local file. Click on “Choose Files” then select and upload the file.
  ## Wait for the file to be 100% uploaded. You should see the name of the file once Colab has uploaded it.
  uploaded = files.upload()
  all_test_methods = []
  with open('all_test_methods.csv', 'r') as f:
    reader = csv.DictReader(f)
    for row in reader:
      all_test_methods.append(row)

Saving all_test_methods.csv to all_test_methods.csv


In [None]:
all_test_methods[0]

{'Row': '2',
 'URL': 'https://github.com/nordwind80/BT-Tracker/blob/558c15b399871c1ca11d0c4ae1eb598e3060931e/Tracker/tests/test_event.py',
 'Class': 'TestEvent',
 'Test': 'test_object',
 'Content': 'def test_object(self):\n        assert id(status.state) == id(status.state)\n\n    ',
 'Detected': 'True'}

In [None]:
len(all_test_methods)

1932

In [None]:
true_methods = []
false_methods = []
true_size = 0
false_size = 0

for test_method in all_test_methods:
  if test_method['Detected'] == 'True':
    true_methods.append(test_method)
    true_size += 1
  else:
    false_methods.append(test_method)
    false_size += 1

In [None]:
total_size = len(all_test_methods)
training_and_validation_size = int(total_size * 0.9)
validation_size = int(training_and_validation_size * 0.2)
test_size = total_size - training_and_validation_size

In [None]:
training_and_validation_set = random.sample(all_test_methods, training_and_validation_size)
validation_set = random.sample(training_and_validation_set, validation_size)
training_set = [test_method for test_method in training_and_validation_set if test_method not in validation_set]
test_set = [test_method for test_method in all_test_methods if test_method not in training_set and test_method not in validation_set]
random.shuffle(training_set)
random.shuffle(validation_set)
random.shuffle(test_set)

In [None]:
true_training_set = []
false_training_set = []
true_validation_set = []
false_validation_set = []
true_test_size = 0
false_test_size = 0

for test_method in training_set:
  if test_method['Detected'] == 'True':
    true_training_set.append(test_method)
  else:
    false_training_set.append(test_method)

for test_method in validation_set:
  if test_method['Detected'] == 'True':
    true_validation_set.append(test_method)
  else:
    false_validation_set.append(test_method)

for test_method in test_set:
  if test_method['Detected'] == 'True':
    true_test_size += 1
  else:
    false_test_size += 1

In [None]:
print(f'Total in Training set: {len(training_set)}')
print(f'True in Training set: {len(true_training_set)}')
print(f'False in Training set: {len(false_training_set)}')
print(f'Total in Validation set: {len(validation_set)}')
print(f'True in Validation set: {len(true_validation_set)}')
print(f'False in Validation set: {len(false_validation_set)}')
print(f'Total in Test set: {len(test_set)}')
print(f'True in Test set: {true_test_size}')
print(f'False in Test set: {false_test_size}')
print(f'Total: {total_size}')
print(f'Total True: {len(true_methods)}')
print(f'Total False: {len(false_methods)}')

Total in Training set: 1391
True in Training set: 941
False in Training set: 450
Total in Validation set: 347
True in Validation set: 240
False in Validation set: 107
Total in Test set: 194
True in Test set: 129
False in Test set: 65
Total: 1932
Total True: 1310
Total False: 622


In [None]:
num_true_training = len(true_training_set)
false_training_undersampled = deepcopy(false_training_set)
while num_true_training >= 2 * len(false_training_undersampled):
  false_training_undersampled += false_training_set
false_training_undersampled += random.sample(false_training_set, num_true_training - len(false_training_undersampled))

balanced_training_set = true_training_set + false_training_undersampled
random.shuffle(balanced_training_set)

print(f'True in Balanced Training set: {len(true_training_set)}')
print(f'False in Balanced Training set: {len(false_training_undersampled)}')
print(f'Total in Balanced Training set: {len(balanced_training_set)}')

True in Balanced Training set: 941
False in Balanced Training set: 941
Total in Balanced Training set: 1882


In [None]:
num_true_validation = len(true_validation_set)
false_validation_undersampled = deepcopy(false_validation_set)
while num_true_validation >= 2 * len(false_validation_undersampled):
  false_validation_undersampled += false_validation_set
false_validation_undersampled += random.sample(false_validation_set, num_true_validation - len(false_validation_undersampled))

balanced_validation_set = true_validation_set + false_validation_undersampled
random.shuffle(balanced_validation_set)

print(f'True in Balanced Validation set: {len(true_validation_set)}')
print(f'False in Balanced Validation set: {len(false_validation_undersampled)}')
print(f'Total in Balanced Validation set: {len(balanced_validation_set)}')

True in Balanced Validation set: 240
False in Balanced Validation set: 240
Total in Balanced Validation set: 480


In [None]:
training_messages_list = []
for test_method in training_set:
  messages = [
      {"role": "system", "content": "You need to identify flaky tests."},
      {"role": "user", "content": f"Is this a flaky test? Only answer True or False.\n{test_method['Content']}"},
      {"role": "assistant", "content": test_method['Detected']}
  ]
  training_messages_list.append({"messages": messages})

with open('training_set.jsonl', 'w') as jsonl_file:
  for entry in training_messages_list:
    jsonl_file.write(json.dumps(entry) + "\n")

In [None]:
training_messages_list = []
for test_method in balanced_training_set:
  messages = [
      {"role": "system", "content": "You need to identify flaky tests."},
      {"role": "user", "content": f"Is this a flaky test? Only answer True or False.\n{test_method['Content']}"},
      {"role": "assistant", "content": test_method['Detected']}
  ]
  training_messages_list.append({"messages": messages})

with open('balanced_training_set.jsonl', 'w') as jsonl_file:
  for entry in training_messages_list:
    jsonl_file.write(json.dumps(entry) + "\n")

In [None]:
validation_messages_list = []
for test_method in validation_set:
  messages = [
      {"role": "system", "content": "You need to identify flaky tests."},
      {"role": "user", "content": f"Is this a flaky test? Only answer True or False.\n{test_method['Content']}"},
      {"role": "assistant", "content": test_method['Detected']}
  ]
  validation_messages_list.append({"messages": messages})

with open('validation_set.jsonl', 'w') as jsonl_file:
  for entry in validation_messages_list:
    jsonl_file.write(json.dumps(entry) + "\n")

In [None]:
validation_messages_list = []
for test_method in balanced_validation_set:
  messages = [
      {"role": "system", "content": "You need to identify flaky tests."},
      {"role": "user", "content": f"Is this a flaky test? Only answer True or False.\n{test_method['Content']}"},
      {"role": "assistant", "content": test_method['Detected']}
  ]
  validation_messages_list.append({"messages": messages})

with open('balanced_validation_set.jsonl', 'w') as jsonl_file:
  for entry in validation_messages_list:
    jsonl_file.write(json.dumps(entry) + "\n")

In [None]:
test_messages_list = []
for test_method in test_set:
  messages = [
      {"role": "system", "content": "You need to identify flaky tests."},
      {"role": "user", "content": f"Is this a flaky test? Only answer True or False.\n{test_method['Content']}"},
      {"role": "assistant", "content": test_method['Detected']}
  ]
  test_messages_list.append({"messages": messages})

with open('test_set.jsonl', 'w') as jsonl_file:
  for entry in test_messages_list:
    jsonl_file.write(json.dumps(entry) + "\n")