In [2]:
from tree_sitter_language_pack import get_binding, get_language, get_parser

python_binding = get_binding('python')  # this is an int pointing to the C binding
python_lang = get_language('python')  # this is an instance of tree_sitter.Language
python_parser = get_parser('python')  # this is an instance of tree_sitter.Parser

In [3]:
python_node = python_parser.parse(bytes("""
class Node:
    def __init__(self):
        self.x = 10
    def return_type(self):
        return self.x
""", "utf8"))

python_node.root_node.type

'module'

In [4]:
root_node = python_node.root_node
print(root_node.type)
print(root_node.text)

module
b'class Node:\n    def __init__(self):\n        self.x = 10\n    def return_type(self):\n        return self.x\n'


In [4]:
# get functions from root node 
root_node.children

[<Node type=class_definition, start_point=(1, 0), end_point=(5, 21)>]

In [5]:
# ge

In [5]:
javascript_binding = get_binding('javascript')  # this is an int pointing to the C binding
javascript_lang = get_language('javascript')  # this is an instance of tree_sitter.Language
javascript_parser = get_parser('javascript')  # this is an instance of tree_sitter.Parser

In [6]:
js_node = javascript_parser.parse(bytes("""
function hello() {
    return "Hello, world!";
}
""", "utf8"))

In [8]:
js_node.root_node.type

'program'

In [29]:
from pathlib import Path

def extract_nodes(tree, source_code, source_code_path):
    def get_node_text(node):
        return source_code[node.start_byte:node.end_byte].decode('utf-8')

    def get_module_name(file_path):
        module_name = Path(file_path).stem
        return module_name

    def get_struct_name(node):
        if node.type == 'class_definition':
            start_byte = node.start_byte
            end_byte = node.end_byte
            definition = source_code[start_byte:end_byte].decode('utf-8')
            class_name = definition.split()[1].split('(')[0]

            return class_name

        return None

    def get_context(node):
        file_path = Path(source_code_path).resolve()
        file_name = Path(file_path).name
        module = get_module_name(file_path)
        struct_name = get_struct_name(node)

        return {
            "module": module,
            "file_path": str(file_path),
            "file_name": file_name,
            "struct_name": struct_name,
            "snippet": get_node_text(node)
        }

    root_node = tree.root_node
    class_node = None
    results = []
    allowed_node_types = ["class_definition", "function_definition"]

    # Traverse the tree to find classes and methods
    def traverse(node):
        nonlocal class_node

        if node.type in allowed_node_types:
            class_node = node
            results.append({
                "name": get_node_text(node.child_by_field_name('name')),
                "signature": get_node_text(node),
                "code_type": node.type,
                "docstring": None,
                "line": node.start_point[0] + 1,
                "line_from": node.start_point[0] + 1,
                "line_to": node.end_point[0] + 1,
                "context": get_context(node)
            })

        # if node.type == 'class_definition':
        #     class_node = node
        #     class_info = {
        #         "name": get_node_text(node.child_by_field_name('name')),
        #         "signature": get_node_text(node),
        #         "code_type": "Class",
        #         "docstring": None,
        #         "line": node.start_point[0] + 1,
        #         "line_from": node.start_point[0] + 1,
        #         "line_to": node.end_point[0] + 1,
        #         "context": get_context(node)
        #     }
        #     results.append(class_info)
        # elif node.type == 'function_definition' and class_node:
        #     method_info = {
        #         "name": get_node_text(node.child_by_field_name('name')),
        #         "signature": get_node_text(node),
        #         "code_type": "Method",
        #         "docstring": None,
        #         "line": node.start_point[0] + 1,
        #         "line_from": node.start_point[0] + 1,
        #         "line_to": node.end_point[0] + 1,
        #         "context": get_context(node)
        #     }
        #     results.append(method_info)
        for child in node.children:
            traverse(child)

    traverse(root_node)
    return results

In [28]:
# source_code = bytes("""
#                     class Node: 
#                         def __init__(self): 
#                             self.x = 10 
#                         def return_type(self): 
#                             y = return_type()

#                             return""","utf-8")

source_code_path = "example.py"

with open(source_code_path, "rb") as file:
    source_code = file.read()

python_node = python_parser.parse(source_code)

extract_nodes(python_node, source_code, source_code_path)

[{'name': <Node type=identifier, start_point=(11, 4), end_point=(11, 18)>,
  'signature': 'def calculate_area(radius):\n    """Calculate the area of a circle given its radius."""\n    if radius <= 0:\n        raise ValueError("Radius must be positive")\n    return PI * (radius ** 2)',
  'code_type': 'function_definition',
  'docstring': None,
  'line': 12,
  'line_from': 12,
  'line_to': 16,
  'context': {'module': 'example',
   'file_path': '/home/devblin/desktop/project/reviewturtl/cookbooks/example.py',
   'file_name': 'example.py',
   'struct_name': None,
   'snippet': 'def calculate_area(radius):\n    """Calculate the area of a circle given its radius."""\n    if radius <= 0:\n        raise ValueError("Radius must be positive")\n    return PI * (radius ** 2)'}},
 {'name': <Node type=identifier, start_point=(18, 6), end_point=(18, 12)>,
  'signature': 'class Circle:\n    def __init__(self, radius):\n        self.radius = radius\n\n    def area(self):\n        return calculate_area(