In [None]:
# Install necessary packages if not already installed
# !pip install PyPDF2
# !pip install ipywidgets
# !pip install ollama
# !pip install pandas
# !pip install python-docx
# !pip install textract

# Enable ipywidgets extension (Run in a separate cell if not already enabled)
# !jupyter nbextension enable --py widgetsnbextension

# Import necessary libraries
import os
from PyPDF2 import PdfReader
import ipywidgets as widgets
from IPython.display import display, clear_output
import pandas as pd
import ollama
from docx import Document
import textract  # For handling .doc files

# Define the path to the directory containing documents
PDF_DIR = input('Please enter the folder path')  # Update this path as needed

def extract_text_from_pdf(pdf_path):
    # (Function code remains the same)
    try:
        reader = PdfReader(pdf_path)
        text = ""
        num_pages = len(reader.pages)
        for page_num, page in enumerate(reader.pages, start=1):
            page_text = page.extract_text()
            if page_text:
                text += page_text + "\n"
            else:
                print(f"⚠️ Warning: No text found on page {page_num} of {os.path.basename(pdf_path)}.")
        return text, num_pages
    except Exception as e:
        print(f"❌ Error reading {pdf_path}: {e}")
        return "", 0

def extract_text_from_txt(txt_path):
    # (Function code remains the same)
    try:
        with open(txt_path, 'r', encoding='utf-8') as file:
            text = file.read()
        num_lines = len(text.splitlines())
        return text, num_lines
    except Exception as e:
        print(f"❌ Error reading {txt_path}: {e}")
        return "", 0

def extract_text_from_docx(docx_path):
    # (Function code remains the same)
    try:
        doc = Document(docx_path)
        text = '\n'.join([para.text for para in doc.paragraphs])
        num_paragraphs = len(doc.paragraphs)
        return text, num_paragraphs
    except Exception as e:
        print(f"❌ Error reading {docx_path}: {e}")
        return "", 0

def extract_text_from_doc(doc_path):
    # (Function code remains the same)
    try:
        text = textract.process(doc_path).decode('utf-8')
        num_paragraphs = len(text.split('\n\n'))
        return text, num_paragraphs
    except Exception as e:
        print(f"❌ Error reading {doc_path}: {e}")
        return "", 0

def extract_text_from_document(file_path):
    # (Function code remains the same)
    if file_path.lower().endswith('.pdf'):
        return extract_text_from_pdf(file_path)
    elif file_path.lower().endswith('.txt'):
        return extract_text_from_txt(file_path)
    elif file_path.lower().endswith('.docx'):
        return extract_text_from_docx(file_path)
    elif file_path.lower().endswith('.doc'):
        return extract_text_from_doc(file_path)
    else:
        print(f"❌ Unsupported file type: {file_path}")
        return "", 0

def load_documents_recursive(path):
    # (Function code remains the same)
    document_data = []
    for root, dirs, files in os.walk(path):
        for file in files:
            if file.lower().endswith(('.pdf', '.txt', '.doc', '.docx')):
                full_path = os.path.join(root, file)
                relative_path = os.path.relpath(full_path, path)
                print(f"📄 Processing: {relative_path}")
                text, num_pages = extract_text_from_document(full_path)
                document_data.append({
                    'Name': file,
                    'Relative Path': relative_path,
                    'Full Path': full_path,
                    'Number of Pages/Lines': num_pages
                })
    return document_data

def construct_system_prompt():
    # (Function code remains the same)
    system_prompt = '''
You are a Document Management Assistant. Your task is to help me by answering questions related to the provided documents. Provide clear and concise answers based solely on the content of the documents.
'''
    return system_prompt

def query_ollama(system_prompt, conversation_history, document_context, model="llama3.2:3b"):
    """
    Sends a prompt to the Ollama model and returns the response.

    Parameters:
    - system_prompt (str): The system-level instructions for the model.
    - conversation_history (list): A list of conversation messages.
    - document_context (str): The combined text from the selected documents.
    - model (str): The name of the Ollama model to use.

    Returns:
    - str or None: The response from Ollama or None if an error occurred.
    """
    try:
        # Construct the messages as per Ollama's expected format
        messages = [
            {
                'role': 'system',
                'content': system_prompt,
            },
            {
                'role': 'user',
                'content': f"Here is the context from the documents:\n\n{document_context}"
            }
        ] + conversation_history

        # Send the messages to Ollama
        response = ollama.chat(
            model=model,
            messages=messages
        )
        ollama_response = response['message']['content']
        print("✅ Received response from Ollama.")
        return ollama_response
    except Exception as e:
        print(f"❌ Error communicating with Ollama: {e}")
        return None

def interactive_pdf_qa():
    """
    Initializes and displays the interactive Document Q&A widgets in the Jupyter Notebook.
    """
    # Load documents and create DataFrame
    document_data = load_documents_recursive(PDF_DIR)
    if not document_data:
        print("⚠️ No supported document files found in the specified directory.")
        return
    df = pd.DataFrame(document_data)

    # Display the DataFrame
    display(df[['Name', 'Relative Path', 'Number of Pages/Lines']])

    # Create widgets
    options = [f"{row['Name']} ({row['Relative Path']})" for index, row in df.iterrows()]
    paths = df['Full Path'].tolist()
    selection_widget = widgets.SelectMultiple(
        options=options,
        description='Select Documents:',
        disabled=False,
        layout=widgets.Layout(width='80%'),
        style={'description_width': 'initial'}
    )
    question_widget = widgets.Text(
        description='Question:',
        placeholder='Enter your question here',
        layout=widgets.Layout(width='80%'),
        disabled=False
    )
    submit_button = widgets.Button(
        description='Ask Question',
        button_style='primary',
        tooltip='Click to send your question and receive an answer.'
    )
    output = widgets.Output()

    # Initialize conversation history and document context
    conversation_history = []
    document_context = ''

    def on_submit(b):
        nonlocal document_context
        with output:
            # Clear the output to prevent duplication
            clear_output()
            selected_options = list(selection_widget.value)
            if not selected_options:
                print("❌ No documents selected.")
                return
            question = question_widget.value.strip()
            if not question:
                print("❌ Please enter a question.")
                return

            # Process the documents only once
            if not document_context:
                print("📄 Selected Document(s):")
                selected_indices = [options.index(option) for option in selected_options]
                selected_paths = [paths[i] for i in selected_indices]
                for path in selected_paths:
                    print(f"- {path}")

                # Combine text from selected documents
                combined_text = ""
                for path in selected_paths:
                    text, num_pages = extract_text_from_document(path)
                    if text:
                        combined_text += f"\n\n--- Content from {os.path.basename(path)} ---\n\n{text}"
                    else:
                        print(f"⚠️ No text extracted from {os.path.basename(path)}.")

                if not combined_text.strip():
                    print("⚠️ No text extracted from the selected files.")
                    return

                # Set the document context
                document_context = combined_text

            # Append the user's question to the conversation_history
            conversation_history.append({
                'role': 'user',
                'content': question
            })

            # Construct the system prompt
            system_prompt = construct_system_prompt()

            # Query Ollama
            answer = query_ollama(system_prompt, conversation_history, document_context)
            if answer:
                # Append the assistant's response to the conversation_history
                conversation_history.append({
                    'role': 'assistant',
                    'content': answer
                })
            else:
                print("❌ Failed to get a response from Ollama.")
                return

            # Display the entire conversation history (excluding the document content)
            print("\n--- Conversation ---\n")
            for message in conversation_history:
                if message['role'] == 'user':
                    print(f"✅ {message['content']}\n")
                elif message['role'] == 'assistant':
                    print(f"**Assistant:** {message['content']}\n")

            # Clear the question widget after submission
            question_widget.value = ''

    # Reset conversation history and document context when selection changes
    def on_selection_change(change):
        nonlocal document_context
        if change['type'] == 'change' and change['name'] == 'value':
            conversation_history.clear()
            document_context = ''
            with output:
                clear_output()
                print("🔄 Conversation history has been reset due to document selection change.")

    selection_widget.observe(on_selection_change, names='value')

    # Link the submit button to the on_submit function
    submit_button.on_click(on_submit)

    # Display the widgets
    display(widgets.VBox([
        selection_widget,
        question_widget,
        submit_button,
        output
    ]))

# Initialize the interactive tool
interactive_pdf_qa()
