# Make Excel Data Dictionary from Data Model

This code examines the data model files in the `./data_model/` path and renders a data dictionary in Excel.

In [2]:
import sys
import os
import importlib
sys.path.insert(0, os.path.abspath("../data_model/"))

In [3]:
import inspect
from pydantic import BaseModel, Field
from openpyxl import Workbook
from typing import Optional, get_origin, get_args
import data_model
import enums as e
from enum import Enum

## Input/Output

In [4]:
output_file = "../reports/data_dictionary.xlsx"

## Methods

In [5]:
def document_pydantic_model(model_cls, ws, enum_ws, enums_set):
    # Extracting docstrings, field descriptions, and types
    class_name = model_cls.__name__
    class_docstring = model_cls.__doc__.strip() if model_cls.__doc__ else "No description available."

    # Collecting field details
    field_details = {}
    for field_name, field_info in model_cls.__fields__.items():
        description = field_info.description or "No description available."
        field_class = field_info.annotation
        if field_class.__name__ in ("Optional", "Union"):
            field_class =  get_args(field_info.annotation)[0]
        if field_class.__name__ == "List":
            field_class = get_args(get_args(get_args(field_info.annotation)[0])[0])[0]


        datatype = field_class.__name__

        if issubclass(field_class, Enum) and field_class not in enums_set:
            enums_set.add(field_class)
            datatype = field_class.__name__
            for e_value in field_class:
                enum_ws.append([field_class.__name__, e_value.value, e_value.name])
            enum_ws.append([])

        field_details[field_name] = (datatype, description)

    # Add class name and class docstring
    ws.append([class_name, "Data Model", class_docstring])

    # Add field details (name, type, description)
    for field, details in field_details.items():
        datatype, description = details
        ws.append([field, datatype, description])
    ws.append([])  # Add an empty row for separation


In [6]:
def document_all_models(module, output_file):
    # Create a workbook and select the active worksheet
    wb = Workbook()
    ws = wb.active
    ws.title = "Pydantic Class Documentation"

    # Add headers
    ws.append(["Field", "Data Type", "Description"])

    # Create a new worksheet for enum mappings
    enum_ws = wb.create_sheet(title="Enum Mappings")
    enum_ws.append(["Enum", "Codes", "Labels"])

    enums_set = set()
    # Iterate over all classes in the module
    for name, obj in inspect.getmembers(module, inspect.isclass):
        if issubclass(obj, BaseModel) and obj.__module__ == module.__name__:
            document_pydantic_model(obj, ws, enum_ws, enums_set)

    # Save the workbook
    wb.save(output_file)

### Implementation

In [7]:
document_all_models(data_model, output_file)