# Class Inventory

In [1]:
import os
import pandas as pd
import re
from typing import List, Tuple

In [2]:
def get_class_inventory(base_dir: str, layer: str) -> List[Tuple[str, str]]:
    """
    Returns a list of tuples containing class names and the path to the module
    containing the class from the given base directory and its subdirectories.
    This function uses regex to find class definitions instead of importing the modules.

    Args:
        base_dir (str): The base directory to start searching.
        layer (str): The layer (can be a specific subdirectory) to search for classes.

    Returns:
        List[Tuple[str, str]]: A list of (class_name, module_path) tuples.
    """
    class_inventory = []

    # Full path to the directory where we will start the search
    search_path = os.path.join(base_dir, layer)

    # Regex pattern to find class definitions
    class_pattern = re.compile(r"^class\s+(\w+)\s*\(.*\):")

    # Walk through the directory and find all .py files
    for root, _, files in os.walk(search_path):
        for file in files:
            if file.endswith(".py") and not file.startswith("__"):
                file_path = os.path.join(root, file)
                relative_path = os.path.relpath(file_path, base_dir)
                module_path = relative_path.replace(os.sep, ".")[
                    :-3
                ]  # Convert to module path

                try:
                    # Open the file and search for class definitions using regex
                    with open(file_path, "r", encoding="utf-8") as f:
                        for line in f:
                            match = class_pattern.match(line.strip())
                            if match:
                                class_name = match.group(1)
                                class_inventory.append((class_name, module_path))

                except Exception as e:
                    # Handle file read errors or permission issues
                    print(f"Error reading file {file_path}: {e}")

    return class_inventory


# Example usage:
# base_dir = '/path/to/base/directory'
# layer = 'your_layer'
# class_inventory = get_class_inventory_via_regex(base_dir, layer)
# for class_name, module_path in class_inventory:
#     print(f"Class: {class_name}, Module: {module_path}")

In [3]:
# Example usage:
base_dir = "discover"
layer = "infra"
filepath = "notes/class_inventory_infra.csv"
class_inventory = get_class_inventory(base_dir, layer)
df = pd.DataFrame(data=class_inventory, columns=["Class", "Modulle"])
df.to_csv(filepath)