# Make Excel Data Dictionary from Data Model

This code examines the data model files in the `./data_model/` path and renders a data dictionary in Excel.

In [1]:
import sys
import os
import importlib
sys.path.insert(0, os.path.abspath("../data_model/"))

In [2]:
import inspect
from pydantic import BaseModel, Field
from openpyxl import Workbook
from typing import Optional, get_origin, get_args
import data_model
import enums as e
from enum import Enum

## Input/Output

In [3]:
output_file = "../reports/data_dictionary.xlsx"

## Methods

In [4]:
done_fields = []
def document_pydantic_model(model_cls, class_name, ws, enum_ws, enums_set):
    # Extracting docstrings, field descriptions, and types
    #class_name = model_cls.__name__

    # Collecting field details
    field_details = {}
    for field_name, field_info in model_cls.__fields__.items():
        description = field_info.description or "No description available."
        field_class = field_info.annotation
        if field_class.__name__ in ("Optional", "Union"):
            field_class =  get_args(field_info.annotation)[0]

        datatype = field_class.__name__
        response_option = "Actual Value"
        if issubclass(field_class, Enum):
            response_option = field_class.__name__
            datatype = "int"
            if field_class not in enums_set:
                for e_value in field_class:
                    enums_set.add(field_class)
                    enum_ws.append([field_class.__name__, e_value.value, e_value.name])
                enum_ws.append([])

        field_details[field_name] = (datatype, response_option, description, class_name)

        # Collecting computed variables (properties without a Pydantic field)
    computed_variables = [
        attr for attr in dir(model_cls)
        if isinstance(getattr(model_cls, attr), property) and attr not in model_cls.__fields__ and attr not in {"__fields_set__", "model_extra", "model_fields_set"}
    ]
    for computed_var in computed_variables:
        description = getattr(model_cls, computed_var).__doc__ or "Computed property."
        datatype = "int"
        response_option = "Computed Value"
        field_details[computed_var] = (datatype, response_option, description, class_name)

    # Add field details (name, type, description)
    for field, details in field_details.items():
        if field not in done_fields:
            datatype, response_option, description, class_name = details
            ws.append([field, datatype, response_option, description, class_name])
            done_fields.append(field)

In [5]:
def document_all_models(module, output_file):
    # Create a workbook and select the active worksheet
    wb = Workbook()
    ws = wb.active
    ws.title = "Variables"

    # Add headers
    ws.append(["Field", "Data Type", "Response Option", "Description", "Market Segment"])

    # Create a new worksheet for enum mappings
    enum_ws = wb.create_sheet(title="Response Options (Enums)")
    enum_ws.append(["Enum", "Codes", "Labels"])

    enums_set = set()
    # Iterate over all classes in the module
    for name, obj in inspect.getmembers(module, inspect.isclass):
            if name in ['Respondent', 'Trip']:
                document_pydantic_model(obj, "All Respondents", ws, enum_ws, enums_set)

    for name, obj in inspect.getmembers(module, inspect.isclass):
            if name in ['AirPassenger','Employee']:
                document_pydantic_model(obj, name, ws, enum_ws, enums_set)
    # Save the workbook
    wb.save(output_file)

### Implementation

In [6]:
document_all_models(data_model, output_file)