# Semantic Kernel OpenAI Assistant Agent File Search

## Prepare the files

In [None]:
import os

file_directory = "Data/nasabooks"

# List all files in the directory
try:
    filenames = os.listdir(file_directory)
    print(filenames)
except FileNotFoundError:
    print(f"Directory '{file_directory}' not found.")

# Get the full path of a file
def get_filepath_for_filename(filename: str) -> str:
    base_directory = file_directory
    return os.path.join(base_directory, filename)



## Reformat citations with the proper filenames

In [2]:
from semantic_kernel.contents.annotation_content import AnnotationContent

async def reformat_citations(agent, response):
    # Extract the annotations
    annotations = [item for item in response.items if isinstance(item, AnnotationContent)]
    
    # Original response
    paragraph = response.content
    
    # Dictionary to store key-value pairs of text and filename
    text_filename_pairs = {}

    # Iterate over the annotations and extract the relevant information
    for annotation in annotations:
        file_id = annotation.file_id
        text = annotation.quote
        # Retrieve the filename from the file_id
        cited_file = await agent.client.files.retrieve(file_id)
        filename = cited_file.filename

        if text not in text_filename_pairs:
            text_filename_pairs[text] = []
        text_filename_pairs[text].append(filename)

    # Replace the citation texts with their corresponding filenames prefixed with " Source: "
    for text, filenames in text_filename_pairs.items():
        sources = " Source: " + ", ".join(filenames)
        paragraph = paragraph.replace(text, sources)

    return paragraph

## Step 1-2: Create an Agent and Thread

In [3]:
from semantic_kernel.agents.open_ai.azure_assistant_agent import AzureAssistantAgent
from semantic_kernel.contents.chat_message_content import ChatMessageContent
from semantic_kernel.contents.utils.author_role import AuthorRole
from semantic_kernel.kernel import Kernel

# Step 1: Create an assistant agent
agent = await AzureAssistantAgent.create(
        kernel=Kernel(),
        service_id="agent",
        name="SK_OpenAI_Assistant_Agent_File_Search",
        instructions="""
            The document store contains pages from a Nasa book.
            Always analyze the document store to provide an answer to the user's question.
            Never rely on your knowledge of information not included in the document store.
            Always format response using markdown.
            """,
        enable_file_search=True,
        vector_store_filenames=[get_filepath_for_filename(filename) for filename in filenames],
    )

# Step 2: Create a thread
thread_id = await agent.create_thread()

## Step 3-6: Helper Function 
3. Add a message to the thread
4. Run the Assistant
5. Display the Assistant's Response

In [4]:
async def run_agent(user_question):
    # STEP 3: Add a user question to the thread
    await agent.add_chat_message(
            thread_id=thread_id, 
            message=ChatMessageContent(role=AuthorRole.USER, content=user_question)
    )

    # STEP 4: Invoke the agent to get a response
    async for response in agent.invoke(thread_id=thread_id):
        annotations = [item for item in response.items if isinstance(item, AnnotationContent)]
        #STEP 5: Print the Assistant response
        if annotations is None:
            print(f"{response.content}", end="", flush=True)
        else:
            print(f"{await reformat_citations(agent,response)}", end="", flush=True)

In [None]:
user_question = "How did the wide floodplains in Queensland originate?"
await run_agent(user_question)

## Appending Messages to the Thread

In [None]:
user_question = "What forms the Lower Amazon River?"
await run_agent(user_question)

## Display Chat History

In [None]:
async for message in agent.get_thread_messages(thread_id):
    print(f"{message.role} : {message.content}")

## Deleting Files, Thread, Agent

In [8]:
if agent is not None:
    [await agent.delete_file(file_id) for file_id in agent.file_search_file_ids]
    await agent.delete_thread(thread_id)
    await agent.delete()
