In [1]:
import os
from openai import OpenAI
from dotenv import load_dotenv
import rich
import requests
from io import BytesIO

In [2]:
load_dotenv()

api_key = os.getenv('OPENAI_API_KEY')
MODEL = "gpt-4o-mini"

openai = OpenAI()

## File Search is only part of Responses API

https://platform.openai.com/docs/guides/tools-file-search

File Search works with a vector store, which we will need to create within OpenAI. The entire process, including storing the file, generating embeddings, and searching within the vector store, will be managed by OpenAI

The `create_file` function uploads a file to the OpenAI API and supports both remote and local files.
If the file path is a URL, it downloads the file content, stores it in memory and then upload

In [3]:
# Create function to Upload a file
def create_file(openai, file_path):
    if file_path.startswith("http://") or file_path.startswith("https://"):
        # Download the file content from the URL
        response = requests.get(file_path)
        file_content = BytesIO(response.content)
        file_name = file_path.split("/")[-1]
        file_tuple = (file_name, file_content)
        result = openai.files.create(
            file=file_tuple,
            purpose="assistants"
        )
    else:
        # Handle local file path
        with open(file_path, "rb") as file_content:
            result = openai.files.create(
                file=file_content,
                purpose="assistants"
            )
    print(result.id)
    return result.id

##### Call function to upload a file

In [None]:
file_id = create_file(openai, "https://cdn.openai.com/API/docs/deep_research_blog.pdf")
# file_id = create_file(openai, "docs/Panaversity-Certified-Agentic-and-Robotic-AI-Engineer.pdf")

file-4GoREtVZ14bPdCKHXsva8k


### Create a vector store

In [5]:
vector_store = openai.vector_stores.create(
    name="knowledge_base_2"
)
print(vector_store.id)

vs_67e389ea42008191964bfc8f0d0e76ea


### Add a file to a vector store

In [None]:
result = openai.vector_stores.files.create(
    vector_store_id=vector_store.id,
    file_id=file_id
)
rich.print(result)

### Check the status of files in vector store

In [7]:
result = openai.vector_stores.files.list(
    vector_store_id=vector_store.id
)
rich.print(result)

Now in Responses API call, we can provide `file_search` tool and `vector_store_ids`.

Annotation object in response will provide information about file
```
AnnotationFileCitation(
    file_id='file-4GoREtVZ14bPdCKHXsva8k',
    index=1939,
    type='file_citation',
    filename='deep_research_blog.pdf'
)
```

In [8]:
response = openai.responses.create(
    model=MODEL,
    input="What is deep research by OpenAI?",
    tools=[{
        "type": "file_search",
        "vector_store_ids": [vector_store.id]
    }]
)
rich.print(response.output[1].content[0].text)
# rich.print(response)
rich.print(response.output)

Customize the number of results you want to retrieve from the vector stores.


You will notice a change in the result only if you also provide `include=["file_search_call.results"]`. This will allow you to see the difference.

In [9]:
response = openai.responses.create(
    model=MODEL,
    input="What is deep research by OpenAI?",
    tools=[{
        "type": "file_search",
        "vector_store_ids": [vector_store.id],
        "max_num_results": 2  
    }],
    include=["file_search_call.results"]  # This will include actual portion of text content from the file
)
rich.print(response.output[1].content[0].text)
# rich.print(response)
rich.print(response.output[0].results)
rich.print(response.output[1])