# Helper functions

In [2]:
# Load the test dataset
import json
import os

def load_json_from_folder(folder_path):
  """
  Loads all JSON files from a specified folder.

  Args:
    folder_path: The path to the folder containing the JSON files.

  Returns:
    A list containing all JSON objects from the files.
  """

  all_data = []
  for filename in os.listdir(folder_path):
    if filename.endswith(".json"):
      filepath = os.path.join(folder_path, filename)
      try:
        with open(filepath, 'r') as f:
          data = json.load(f)
          all_data.extend(data)
      except json.JSONDecodeError as e:
        print(f"Error decoding JSON in file {filename}: {e}")
  return all_data

Total number of objects: 37639
First object: {'progid': '3f75c5788ea80f1ea8de77ed565a3281', 'orig_err_obj': {'msg': 'unbalanced (){}[]'}, 'anonymize_dict': {'<STRING>': [], '<unk>': ['test_delitem_keyerror', 'EntryBase', 'req_'], '<COMMENT>': []}, 'src': {'tok_format': 'def <unk> ( self ) : <NEWLINE> <INDENT> e = <unk> ( <unk> ( ) <NEWLINE> del e [ <STRING> ] <NEWLINE> <DEDENT>', 'string_format': 'def test_delitem_keyerror ( self ) :\n    e = EntryBase ( req_ ( )\n    del e [ "str" ]\n'}, 'pred': [{'tok_format': 'def <unk> ( self ) : <NEWLINE> <INDENT> e = <unk> ( <unk> ( ) ) <NEWLINE> del e [ <STRING> ] <NEWLINE> <DEDENT>', 'string_format': 'def test_delitem_keyerror ( self ) :\n    e = EntryBase ( req_ ( ) )\n    del e [ "str" ]\n', 'err_obj': 0, 'diff_metric': 1}, {'tok_format': 'def <unk> ( self ) : <NEWLINE> <INDENT> e = <unk> ( <unk> ) <NEWLINE> del e [ <STRING> ] <NEWLINE> <DEDENT>', 'string_format': 'def test_delitem_keyerror ( self ) :\n    e = EntryBase ( req_ )\n    del e [ 

In [4]:
import ast

def validate_code(code_str):
  """
  Validates Python code and returns error information if any.

  Args:
    code_str: The Python code as a string.

  Returns:
    None if the code is valid, otherwise a string describing the syntax error.
  """
  try:
    ast.parse(code_str)
    return None  # No error
  except SyntaxError as e:
    return str(e)  # Return the error message as a string

# Evaluate GitHub Python

## Load the dataset

In [None]:
github_python_dataset = "../github-python-test"
all_data = load_json_from_folder(github_python_dataset)

In [7]:
total_records = len(all_data)
first_code = all_data[0]['src']['string_format']

print(f"Total number of objects: {total_records}")
print(f"First code:\n {first_code}")
print(f"Error (if any): {validate_code(first_code)}")

Total number of objects: 37639
First code:
 def test_delitem_keyerror ( self ) :
    e = EntryBase ( req_ ( )
    del e [ "str" ]

Error (if any): '(' was never closed (<unknown>, line 2)
