In [1]:
## ✅ User-Friendly: Non-coders can easily generate structured JSON with just a Jupyter Notebook.
## ✅ Flexible: Works for both fine-tuning and retrieval-augmented generation (RAG).
## ✅ Sanitized & Robust: Handles special characters, tags, and formatting issues correctly.
## ✅ Extensible: Others can build on it—e.g., adding an export to CSV, batch processing, 
##     or integrating directly into a chatbot training loop.



import ipywidgets as widgets
from IPython.display import display, clear_output, FileLink
import json
import datetime

# Ensure datetime works correctly
from datetime import datetime

# Create widgets for user input
id_widget = widgets.Text(
    description="ID:",
    placeholder="Optional: Unique ID (leave blank for auto-generated)",
    layout=widgets.Layout(width='50%')
)

date_widget = widgets.DatePicker(
    description="Date:",
    disabled=False
)

question_widget = widgets.Text(
    description="Question:",
    placeholder="Enter your question here",
    layout=widgets.Layout(width='90%')
)

answer_widget = widgets.Textarea(
    description="Answer:",
    placeholder="Enter your answer here",
    layout=widgets.Layout(width='90%', height='150px')
)

tags_widget = widgets.Text(
    description="Tags:",
    placeholder="Comma-separated tags",
    layout=widgets.Layout(width='90%')
)

submit_button = widgets.Button(
    description="Submit",
    button_style='success'
)

output = widgets.Output()

# Function to sanitize text input
def sanitize_text(text):
    """Ensure text is JSON-safe by escaping quotes and trimming excessive whitespace."""
    return text.strip().replace("\r", "").replace("\n", " ")

def process_tags(tags_string):
    """Ensures tags are clean, properly formatted, and don't pick up unwanted text."""
    if not tags_string or not isinstance(tags_string, str):
        return []  # If tags field is empty or not a string, return an empty list

    # Split on commas, strip whitespace, and filter out empty tags
    tags_list = [tag.strip() for tag in tags_string.split(",") if tag.strip()]
    
    # Ensure tags are only alphanumeric words (optional, but prevents junk data)
    tags_list = [tag for tag in tags_list if tag.isalnum()]
    
    return tags_list
    
def on_submit(b):
    with output:
        clear_output()  # Clear previous messages
        
        # Generate an ID if not provided
        rec_id = id_widget.value.strip() or "record_" + datetime.now().strftime("%Y%m%d%H%M%S")
        
        # Use provided date or default to now
        date_value = date_widget.value.isoformat() if date_widget.value else datetime.utcnow().isoformat() + "Z"
        
        # Sanitize and extract question & answer
        question_value = sanitize_text(question_widget.value)
        answer_value = sanitize_text(answer_widget.value)
        
        # Prevent saving empty questions/answers
        if not question_value or not answer_value:
            print("Error: Question and Answer fields cannot be empty!")
            return
        
        # Process tags properly
        tags_list = process_tags(tags_widget.value)
        
        # Create the structured JSON record
        record = {
            "id": rec_id,
            "date": date_value,
            "question": question_value,
            "answer": answer_value,
            "tags": tags_list
        }
        
        # Convert record to JSON
        record_json = json.dumps(record, indent=2, ensure_ascii=False)  # ensure_ascii=False allows Unicode
        
        # Save JSON file
        filename = rec_id + ".json"
        with open(filename, "w", encoding="utf-8") as f:
            f.write(record_json)
        
        print("✅ Record saved to:", filename)
        display(FileLink(filename))

# Bind the Submit button to function
submit_button.on_click(on_submit)

# Display widgets
display(
    id_widget,
    date_widget,
    question_widget,
    answer_widget,
    tags_widget,
    submit_button,
    output
)

Text(value='', description='ID:', layout=Layout(width='50%'), placeholder='Optional: Unique ID (leave blank fo…

DatePicker(value=None, description='Date:', step=1)

Text(value='', description='Question:', layout=Layout(width='90%'), placeholder='Enter your question here')

Textarea(value='', description='Answer:', layout=Layout(height='150px', width='90%'), placeholder='Enter your …

Text(value='', description='Tags:', layout=Layout(width='90%'), placeholder='Comma-separated tags')

Button(button_style='success', description='Submit', style=ButtonStyle())

Output()