In [None]:
class CodeExtractor:

> Initialize code extractor with filtering options.

In [None]:
    def __init__(self, include_classes, include_methods, exclude_classes, exclude_methods):
        self.include_classes = include_classes or set()
        self.include_methods = include_methods or set()
        self.exclude_classes = exclude_classes or set()
        self.exclude_methods = exclude_methods or set()
        self.docstrings = []
        self.code_blocks = []
        self.docstring_blocks = []
        self.parent = None


> Check if a class should be included based on filtering options.

In [None]:
    def is_class_included(self, class_name):
        return (not self.include_classes or class_name in self.include_classes) and class_name not in self.exclude_classes


> Check if a method should be included based on filtering options.

In [None]:
    def is_method_included(self, method_name):
        return (not self.include_methods or method_name in self.include_methods) and method_name not in self.exclude_methods


> Remove docstrings from the body of a method or class.

In [None]:
    def remove_docstrings(self, body):
        if isinstance(body[0], ast.Expr) and isinstance(body[0].value, ast.Str):
            self.docstrings.append(body[0].value.s)
            body.pop(0)
        else:
            self.docstrings.append('')


> Store the docstring of a class or method.

In [None]:
    def store_docstring(self, node):
        docstring = ast.get_docstring(node)
        if isinstance(node, ast.ClassDef):
            name = node.name
        elif isinstance(node, ast.FunctionDef):
            name = node.name
        else:
            name = None
        if docstring:
            self.docstrings.append(docstring)
            self.docstring_blocks.append((name, docstring))
        else:
            self.docstrings.append(None)
            self.docstring_blocks.append((name, None))


> Extract method name and arguments from method definition.

In [None]:
    @staticmethod
    def extract_method_definitions(method):
        method_name = method.name
        method_args = []
        for arg in method.args.args:
            arg_name = arg.arg
            arg_type = ast.unparse(arg.annotation).strip() if arg.annotation else None
            method_args.append(f'{arg_name}: {arg_type}' if arg_type else arg_name)
        method_args_str = ', '.join(method_args)
        return (method_name, method_args_str)


In [None]:
    @staticmethod
    def extract_decorators(decorator_list, indent_level):
        decorators = '\n'.join([f"{' ' * 4 * indent_level}@{ast.unparse(d).strip()}" for d in decorator_list])
        return f'{decorators}\n' if decorators else ''


In [None]:
    @staticmethod
    def extract_code_block(body):
        code_block = ast.unparse(body)
        return '\n'.join(['    ' + line if line.strip() else line for line in code_block.splitlines()])


In [None]:
    @staticmethod
    def indent_code_block(code, level):
        return '\n'.join(['    ' * level + line if line.strip() else line for line in code.splitlines()])


> Visit class definition and extract relevant code and docstrings.

In [None]:
    def visit_ClassDef(self, node):
        if self.is_class_included(node.name):
            self.store_docstring(node)
            class_decorators = self.extract_decorators(node.decorator_list)
            self.remove_docstrings(node.body)
            class_code_block = f'{class_decorators}class {node.name}:'
            self.code_blocks.append(class_code_block)
            for method in node.body:
                if isinstance(method, ast.FunctionDef):
                    if self.is_method_included(method.name):
                        if method.name == '__init__' and len(method.body) == 1 and isinstance(method.body[0], ast.Pass):
                            continue
                        self.store_docstring(method)
                        decorators = self.extract_decorators(method.decorator_list, indent_level=1)
                        self.remove_docstrings(method.body)
                        (method_name, method_args_str) = self.extract_method_definitions(method)
                        method_body = self.indent_code_block(self.extract_code_block(method.body), 1)
                        method_code_block = f'{decorators}    def {method_name}({method_args_str}):\n{method_body}\n'
                        self.code_blocks.append(method_code_block)
        self.generic_visit(node)


> Visit function definition and extract relevant code and docstrings.

In [None]:
    def visit_FunctionDef(self, node):
        if isinstance(node.parent, ast.ClassDef):
            return
        if self.is_method_included(node.name):
            self.store_docstring(node)
            decorators = self.extract_decorators(node.decorator_list)
            self.remove_docstrings(node.body)
            (function_name, function_args_str) = self.extract_method_definitions(node)
            function_body = self.extract_code_block(node.body)
            function_code_block = f'{decorators}def {function_name}({function_args_str}):\n{function_body}\n'
            self.code_blocks.append(function_code_block)
        self.generic_visit(node)


> Visit all nodes in the AST and set the parent attribute.

In [None]:
    def generic_visit(self, node):
        for (field, value) in ast.iter_fields(node):
            if isinstance(value, list):
                for item in value:
                    if isinstance(item, ast.AST):
                        item.parent = node
                        self.visit(item)
            elif isinstance(value, ast.AST):
                value.parent = node
                self.visit(value)


In [None]:
def create_notebook(docstring_blocks, code_blocks, output_file):
    nb = nbf.v4.new_notebook()
    for (file_docstring_blocks, file_code_blocks) in zip(docstring_blocks, code_blocks):
        for ((name, docstring), code) in zip(file_docstring_blocks, file_code_blocks):
            if docstring:
                formatted_docstring = '> ' + docstring.replace('\n', '\n> ')
                markdown_docstring_cell = nbf.v4.new_markdown_cell(formatted_docstring)
                nb.cells.append(markdown_docstring_cell)
            code_cell = nbf.v4.new_code_cell(code)
            nb.cells.append(code_cell)
    with open(output_file, 'w') as f:
        nbf.write(nb, f)


In [None]:
def extract_code(filenames, output_file, include_classes, include_methods, exclude_classes, exclude_methods):
    desired_code_blocks = []
    docstrings = []
    extractor = CodeExtractor(include_classes, include_methods, exclude_classes, exclude_methods)
    file_code_blocks = []
    file_docstring_blocks = []
    for filename in filenames:
        with open(filename, 'r') as f:
            code = f.read()
        ast_tree = ast.parse(code)
        ast.fix_missing_locations(ast_tree)
        extractor.visit(ast_tree)
        desired_code = '\n'.join(extractor.code_blocks)
        docstrings.extend(extractor.docstrings)
        if desired_code:
            file_name_only = os.path.basename(filename)
            desired_code_blocks.append(f'# {file_name_only}:\n{desired_code}\n')
        file_code_blocks.append(extractor.code_blocks[:])
        file_docstring_blocks.append(extractor.docstring_blocks[:])
        extractor.code_blocks.clear()
        extractor.docstring_blocks.clear()
    with open(output_file, 'w') as f:
        f.write('\n'.join(desired_code_blocks))
    extractor.code_blocks = file_code_blocks
    extractor.docstring_blocks = file_docstring_blocks
    return extractor


In [None]:
def filter_files(path, include, exclude, ignore_dirs, ignore_files):
    if ignore_dirs is None:
        ignore_dirs = ['__pycache__', '.pytest_cache', 'v', 'cache', '.git', '.idea', 'objects', 'info', 'logs', 'refs', 'hooks', 'inspectionProfiles', 'b2', 'pack', '00', '6c', 'e6', 'heads', 'tags', 'remotes', 'origin', 'bb', 'f3', '4b', '72', '88', '46', '08']
    if ignore_files is None:
        ignore_files = ['*.pyc', '__init__.py', '.DS_Store', '*.yaml', '*.json', '*.txt', '*.md', '*.csv', '*.png', '*.jpg', '.idea', '*.git', '*.gitignore', '*.pylintrc', '.ipynb', '*.ipynb', '*.pkl', '*.pickle', '*code_summary.py*', '*scratch*', '*test*']
    filtered_files = []
    for (root, _, files) in os.walk(path):
        if os.path.basename(root) in ignore_dirs:
            continue
        for file in files:
            filepath = os.path.join(root, file)
            if include and (not any((fnmatch.fnmatch(file, pattern) for pattern in include))):
                continue
            if exclude and any((fnmatch.fnmatch(file, pattern) for pattern in exclude)):
                continue
            if any((fnmatch.fnmatch(file, pattern) for pattern in ignore_files)):
                continue
            filtered_files.append(filepath)
    return filtered_files


In [None]:
def copy_directory_tree(path):
    tree = ''
    ignore_dirs = {'__pycache__', '.pytest_cache'}
    ignore_files = ['*.pyc', '__init__.py', '.DS_Store', '*.yaml', '*.json', '*.txt', '*.md', '*.csv', '*.png', '*.jpg', '.idea', '*.git', '*.gitignore', '*.pylintrc', '.ipynb', '*.ipynb', '*.pkl', '*.pickle', '*code_summary.py*', '*scratch*', '*test*']
    for (root, dirs, files) in os.walk(path):
        dirs[:] = [d for d in dirs if not d.startswith('.') and d not in ignore_dirs]
        level = root.replace(path, '').count(os.sep)
        indent = ' ' * 4 * level
        dir_name = os.path.basename(root)
        tree += f'{indent}{dir_name}/\n'
        subindent = ' ' * 4 * (level + 1)
        for file in files:
            if any((fnmatch.fnmatch(file, pat) for pat in ignore_files)):
                continue
            tree += f'{subindent}{file}\n'
    pyperclip.copy(tree)
    return tree


In [None]:
def extract_py_files(tree_str):
    lines = tree_str.strip().split('\n')
    py_files = []
    for line in lines:
        if line.endswith('.py'):
            py_files.append(line.strip())
    return py_files


In [None]:
def read_multiline_input(prompt):
    print(prompt)
    lines = []
    while True:
        line = input()
        if not line:
            break
        lines.append(line)
    return '\n'.join(lines)


In [None]:
def main():
    output_file = '/Users/mhodges/code_summarizer/output/extracted_code_summary.py'
    output_notebook = '/Users/mhodges/code_summarizer/output/extracted_code_summary.ipynb'
    source_directory = input('Enter the source directory: ')
    tree_string = copy_directory_tree(source_directory)
    tree_string_input = read_multiline_input('Paste the directory tree string to filter directory code (Press enter twice to finish):')
    if not tree_string_input:
        tree_string_input = tree_string
    include = extract_py_files(tree_string_input)
    print('Extracting:', include)
    filtered_files = filter_files(source_directory, include=include)
    extractor = extract_code(filtered_files, output_file)
    create_notebook(extractor.docstring_blocks, extractor.code_blocks, output_notebook)
