In [2]:
import ipywidgets as widgets
from IPython.display import display, HTML
from backend_search_engine import load_data, preprocess_query, boolean_search, tfidf_ranking, bm25_ranking

data, processed_data, inverted_index = load_data()

def format_results(results, data):
    if isinstance(results, list) and results and isinstance(results[0], tuple): 
        return "<br>\n".join(
            [
                f'<a href="{data[doc_id].get("article_url", "#")}" target="_blank">'
                f'{data[doc_id].get("title", "Untitled Document")}</a> '
                f"(Score: {score:.4f})"
                for doc_id, score in results
                if 0 <= doc_id < len(data)
            ]
        )
    elif isinstance(results, (list, set)) and results and isinstance(next(iter(results)), int):  # Boolean results
        return "<br>\n".join(
            [
                f'<a href="{data[doc_id - 1].get("article_url", "#")}" target="_blank">'
                f'{data[doc_id - 1].get("title", "Untitled Document")}</a>'
                for doc_id in sorted(results)
                if 0 <= (doc_id - 1) < len(data)
            ]
        )
    return "No matching documents found."


def search_interface(query, operator, algorithm):
    if not data or not processed_data or not inverted_index:
        return "Data files not loaded correctly."

    query_terms = preprocess_query(query)

    if algorithm == "Boolean":
        results = boolean_search(query_terms, inverted_index, operator)
        return format_results(results, data) if results else "No matching documents found."
        
    elif algorithm == "TF-IDF":
        results = tfidf_ranking(query_terms, processed_data)
        return format_results(results, data) if results else "No matching documents found."
        
    elif algorithm == "BM25":
        results = bm25_ranking(query_terms, processed_data)
        return format_results(results, data) if results else "No matching documents found."


query_input = widgets.Text(description="Query:", placeholder="Enter your query")
operator_input = widgets.Dropdown(options=["AND", "OR", "NOT"], description="Operator:")
algorithm_input = widgets.Dropdown(options=["Boolean", "TF-IDF", "BM25"], description="Algorithm:")
output = widgets.Output()

def on_search_click(_):
    with output:
        output.clear_output()
        results = search_interface(query_input.value, operator_input.value, algorithm_input.value)
        display(HTML(results))

search_button = widgets.Button(description="Search")
search_button.on_click(on_search_click)

display(query_input, operator_input, algorithm_input, search_button, output)


Text(value='', description='Query:', placeholder='Enter your query')

Dropdown(description='Operator:', options=('AND', 'OR', 'NOT'), value='AND')

Dropdown(description='Algorithm:', options=('Boolean', 'TF-IDF', 'BM25'), value='Boolean')

Button(description='Search', style=ButtonStyle())

Output()