In [776]:
from tree_sitter_language_pack import get_binding, get_language, get_parser

python_binding = get_binding('python')  # this is an int pointing to the C binding
python_lang = get_language('python')  # this is an instance of tree_sitter.Language
python_parser = get_parser('python')  # this is an instance of tree_sitter.Parser

In [777]:
python_node = python_parser.parse(bytes("""
class Node:
    def __init__(self):
        self.x = 10
    def return_type(self):
        return self.x
""", "utf8"))

python_node.root_node.type

'module'

In [778]:
root_node = python_node.root_node
print(root_node.type)
print(root_node.text)

module
b'class Node:\n    def __init__(self):\n        self.x = 10\n    def return_type(self):\n        return self.x\n'


In [779]:
# get functions from root node 
root_node.children

[<Node type=class_definition, start_point=(1, 0), end_point=(5, 21)>]

In [780]:
# ge

In [781]:
javascript_binding = get_binding('javascript')  # this is an int pointing to the C binding
javascript_lang = get_language('javascript')  # this is an instance of tree_sitter.Language
javascript_parser = get_parser('javascript')  # this is an instance of tree_sitter.Parser

In [782]:
js_node = javascript_parser.parse(bytes("""
function hello() {
    return "Hello, world!";
}
""", "utf8"))

In [783]:
js_node.root_node.type

'program'

In [784]:
from pathlib import Path

def extract_nodes(tree, source_code, source_code_path):
    allowed_node_types = ["class_definition", "function_definition"]

    def get_node_text(node):
        return source_code[node.start_byte:node.end_byte].decode('utf-8')

    def get_module_name(file_path):
        module_name = Path(file_path).stem
        return module_name

    def get_struct_name(node):
        if node.type == 'class_definition':
            start_byte = node.start_byte
            end_byte = node.end_byte
            definition = get_node_text(node)
            class_name = definition.split()[1].split('(')[0]

            return class_name

        return None

    def get_docstring(node):
        def traverse(node):
            # if the string_content node's parent's parent's is expression_statement type, we know it's docstring
            is_expression_statement_type = node.parent and node.parent.parent and node.parent.parent.type == "expression_statement"
            if is_expression_statement_type and node.type == "string_content":
                return get_node_text(node)
            
            docstring = ""
            for child in node.children:
                child_docstring = traverse(child)
                if child_docstring:
                    docstring += child_docstring
            
            return docstring

        return traverse(node)

    def get_context(node):
        file_path = Path(source_code_path).resolve()
        file_name = Path(file_path).name
        module = get_module_name(file_path)
        struct_name = get_struct_name(node)

        return {
            "module": module,
            "file_path": str(file_path),
            "file_name": file_name,
            "struct_name": struct_name,
            "snippet": get_node_text(node)
        }

    root_node = tree.root_node
    # class_node = None
    results = []

    # Traverse the tree to find classes and methods
    def traverse(node):
        # nonlocal class_node
        # ........... need better way to resolve class methods

        if node.type in allowed_node_types:
            results.append({
                "name": get_node_text(node.child_by_field_name('name')),
                "signature": get_node_text(node),
                "code_type": node.type,
                "docstring": get_docstring(node),
                "line": node.start_point[0] + 1,
                "line_from": node.start_point[0] + 1,
                "line_to": node.end_point[0] + 1,
                "context": get_context(node),
                "node": node
            })

        for child in node.children:
            traverse(child)

    traverse(root_node)
    return results

In [785]:
source_code_path = "example.py"

with open(source_code_path, "rb") as file:
    source_code = file.read()

python_node = python_parser.parse(source_code)

extract_nodes(python_node, source_code, source_code_path)

[{'name': 'calculate_area',
  'signature': 'def calculate_area(radius):\n    """Calculate the area of a circle given its radius."""\n    if radius <= 0:\n        raise ValueError("Radius must be positive")\n    return PI * (radius ** 2)',
  'code_type': 'function_definition',
  'docstring': 'Calculate the area of a circle given its radius.',
  'line': 12,
  'line_from': 12,
  'line_to': 16,
  'context': {'module': 'example',
   'file_path': '/home/devblin/desktop/project/reviewturtl/cookbooks/example.py',
   'file_name': 'example.py',
   'struct_name': None,
   'snippet': 'def calculate_area(radius):\n    """Calculate the area of a circle given its radius."""\n    if radius <= 0:\n        raise ValueError("Radius must be positive")\n    return PI * (radius ** 2)'},
  'node': <Node type=function_definition, start_point=(11, 0), end_point=(15, 29)>},
 {'name': 'Circle',
  'signature': 'class Circle:\n    """This is circle class."""\n\n    def __init__(self, radius):\n        self.radius 