### This code converts the excel data dictionary into word document

In [1]:
import pandas as pd
from docx import Document
from docx.shared import Pt

### Inputs/Outputs

In [2]:
#Input filename
input_data_dictionary_excel = '../reports/data_dictionary.xlsx'
#Output filename
output_data_dicitionary_word = '../reports/data_dictionary.docx'

### Data Loading

In [3]:
# Load the Excel sheets into 
df_fields = pd.read_excel(input_data_dictionary_excel, sheet_name='Variables')
df_enums = pd.read_excel(input_data_dictionary_excel, sheet_name='Response Options (Enums)')

### Script/Method

In [4]:
# Create a new Word document
doc = Document()
doc.add_heading('Data Dictionary', 0)

market_segments = {"All Respondents", "AirPassenger", "Employee"}

# Loop through each row in the fields DataFrame
for segment in market_segments:
    doc.add_heading(f'Market Segment: {segment}', level=2)
    for index, row in df_fields.iterrows():
        field_name = row['Field']
        data_type = row['Data Type']
        response_option = row['Response Option']
        description = row['Description']
        market_segment = row['Market Segment']

        if market_segment == segment:
                doc.add_heading(f'{field_name}', level=3)
                doc.add_paragraph(f'Description: {description}')
                doc.add_paragraph(f'Data Type: {data_type}')
                doc.add_paragraph(f'Response Type: {response_option}')


                # If Response Options exist
                if response_option != 'Actual Value':
                    doc.add_paragraph('Response Options:')

                    # Filter the enums for the specific Data Type
                    enum_rows = df_enums[df_enums['Enum'] == response_option]

                    # Create a table for enum mappings/ response options if there are any
                    if not enum_rows.empty:
                        table = doc.add_table(rows=1, cols=2)
                        table.style = 'Table Grid'

                        # Add header row
                        hdr_cells = table.rows[0].cells
                        hdr_cells[0].text = 'Code'
                        hdr_cells[1].text = 'Label'

                        # Add enum mappings to the table
                        for _, enum_row in enum_rows.iterrows():
                            row_cells = table.add_row().cells
                            row_cells[0].text = str(enum_row['Codes'])
                            row_cells[1].text = str(enum_row['Labels'])

### Saving the document

In [5]:
# Save the document
doc.save(output_data_dicitionary_word)