In [None]:
import pandas as pd
import json
import os
from IPython.display import display, clear_output
import ipywidgets as widgets

# === Configuration ===
input_csv = "../data/test.csv"
output_csv = "../data/annotated_test_wip.csv"
json_column = "filters"
desc_column = "queries"


# === Load data ===
df = pd.read_csv(input_csv)

# Ensure the description column exists
# if desc_column not in df.columns:
#     df[desc_column] = ""
df[desc_column] = "" # ignore data from input if same desc column exists

# If an output file exists, load and merge progress
if os.path.exists(output_csv):
    existing = pd.read_csv(output_csv)
    if desc_column in existing.columns:
        df[desc_column] = existing[desc_column].combine_first(df[desc_column])

# Find where to start
start_index = df[df[desc_column].isna() | (df[desc_column] == "")].index.min()
if pd.isna(start_index):
    start_index = 0

# === Widgets ===
out = widgets.Output()
text_area = widgets.Textarea(
    value="",
    placeholder="Type a description here...",
    description="Description:",
    layout=widgets.Layout(width="100%", height="100px"),
)
save_button = widgets.Button(description="Save & Next", button_style="success")
skip_button = widgets.Button(description="Skip", button_style="warning")
back_button = widgets.Button(description="Back", button_style="info")
progress_label = widgets.Label()
index_slider = widgets.IntSlider(
    value=start_index,
    min=0,
    max=len(df)-1,
    step=1,
    description="Index:",
    continuous_update=False,
)

# === Functions ===
def display_entry(idx):
    """Show the current JSON object and annotation box."""
    clear_output(wait=True)
    with out:
        clear_output(wait=True)
        progress_label.value = f"Entry {idx+1} / {len(df)}"
        try:
            data = json.loads(df.loc[idx, json_column])
            pretty_json = json.dumps(data, indent=2)
        except Exception:
            pretty_json = str(df.loc[idx, json_column])
        print(pretty_json)
    text_area.value = df.loc[idx, desc_column] or ""
    buttons = widgets.HBox([back_button, save_button, skip_button])
    display(progress_label, out, text_area, buttons)

def save_progress():
    """Save annotations to disk."""
    df.to_csv(output_csv, index=False)

def move_to_index(idx):
    """Clamp and display the given index."""
    idx = max(0, min(idx, len(df)-1))
    index_slider.value = idx
    display_entry(idx)

def on_save_clicked(b):
    idx = index_slider.value
    df.loc[idx, desc_column] = text_area.value.strip()
    save_progress()
    next_idx = idx + 1
    # Move to next unannotated entry or end
    while next_idx < len(df) and df.loc[next_idx, desc_column]:
        next_idx += 1
    if next_idx < len(df):
        move_to_index(next_idx)
    else:
        clear_output()
        print("âœ… Annotation complete. All entries are annotated.")
        save_progress()

def on_skip_clicked(b):
    move_to_index(index_slider.value + 1)

def on_back_clicked(b):
    move_to_index(index_slider.value - 1)

# Attach event handlers
save_button.on_click(on_save_clicked)
skip_button.on_click(on_skip_clicked)
back_button.on_click(on_back_clicked)

# === Start annotation ===
display_entry(start_index)


In [None]:
import pandas as pd

temp = pd.read_csv("../data/annotated_test_wip.csv")
temp.dropna().to_csv("../data/annotated_test.csv", index=False)