Automatically retrieve the specified company's annual 10-K SEC filing report for the given year.

The assistant can decide when to call predefined functions, enhancing flexibility and autonomy.

Functionality
Web Search:
Uses Google Custom Search API to find relevant web pages or PDFs.
Web Page Retrieval:
Fetches HTML content, allowing the assistant to analyze pages for links.
PDF Downloading:
Downloads PDFs and saves them locally, completing the information retrieval process.
Assistant Reasoning:
The assistant can interpret the query, decide on actions, and handle complex tasks like navigating web pages.

In [5]:
import os
import requests
import json
import openai
from dotenv import load_dotenv

# Load environment variables
load_dotenv()
OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')
GOOGLE_API_KEY = os.getenv('GOOGLE_API_KEY')
GOOGLE_CSE_ID = os.getenv('GOOGLE_CSE_ID')

# Initialize OpenAI client
openai.api_key = OPENAI_API_KEY
client = openai

# Function to perform a web search using Google Custom Search API
def web_search(query):
    api_key = GOOGLE_API_KEY
    cse_id = GOOGLE_CSE_ID
    search_url = "https://www.googleapis.com/customsearch/v1"
    params = {
        "key": api_key,
        "cx": cse_id,
        "q": query,
        "num": 5  # Number of search results to return
    }
    response = requests.get(search_url, params=params)
    if response.status_code == 200:
        results = response.json()
        urls = [item['link'] for item in results.get('items', [])]
        return json.dumps({"urls": urls})
    else:
        return json.dumps({"error": f"Web search failed with status code {response.status_code}"})

# Function to fetch web page content
def get_web_page(url):
    headers = {"User-Agent": "Mozilla/5.0"}
    response = requests.get(url, headers=headers)
    if response.status_code == 200:
        return json.dumps({"content": response.text})
    else:
        return json.dumps({"error": f"Failed to fetch web page with status code {response.status_code}"})

# Function to download a PDF file
def download_pdf(url, file_name):
    headers = {"User-Agent": "Mozilla/5.0"}
    response = requests.get(url, headers=headers)
    if response.status_code == 200:
        with open(file_name, 'wb') as f:
            f.write(response.content)
        return json.dumps({"message": f"PDF downloaded successfully as {file_name}."})
    else:
        return json.dumps({"error": f"Failed to download PDF with status code {response.status_code}"})

# Define function metadata for OpenAI API
function_definitions = [
    {
        "name": "web_search",
        "description": "Performs a web search using the Google Custom Search API.",
        "parameters": {
            "type": "object",
            "properties": {
                "query": {"type": "string", "description": "The search query."}
            },
            "required": ["query"]
        }
    },
    {
        "name": "get_web_page",
        "description": "Fetches the HTML content of a web page.",
        "parameters": {
            "type": "object",
            "properties": {
                "url": {"type": "string", "description": "The URL of the web page."}
            },
            "required": ["url"]
        }
    },
    {
        "name": "download_pdf",
        "description": "Downloads a PDF file from the given URL and saves it locally.",
        "parameters": {
            "type": "object",
            "properties": {
                "url": {"type": "string", "description": "The URL of the PDF file."},
                "file_name": {"type": "string", "description": "The name to save the PDF file as."}
            },
            "required": ["url", "file_name"]
        }
    }
]

# Assistant function to handle the query
def assistant_query(query):
    messages = [
        {
            "role": "system",
            "content": "You are a helpful assistant that can perform web searches, fetch web pages, and download PDFs using provided functions."
        },
        {"role": "user", "content": query}
    ]

    while True:
        response = client.chat.completions.create(
            model='gpt-4o-mini',  # Update the model as needed
            messages=messages,
            functions=function_definitions,
            function_call="auto",
            max_tokens=500,
            temperature=0
        )
        
        response_message = response.choices[0].message

        # Check if the assistant wants to call a function
        if response_message.function_call:
            function_name = response_message.function_call.name
            arguments = json.loads(response_message.function_call.arguments)

            # Call the appropriate function
            if function_name == "web_search":
                function_response = web_search(**arguments)
            elif function_name == "get_web_page":
                function_response = get_web_page(**arguments)
            elif function_name == "download_pdf":
                function_response = download_pdf(**arguments)
            else:
                function_response = json.dumps({"error": "Function not recognized."})

            # Add the assistant's message and function response to the messages
            messages.append(response_message)
            messages.append({
                "role": "function",
                "name": function_name,
                "content": function_response
            })
        else:
            # Assistant has provided the final answer
            return response_message.content

# Main execution
if __name__ == "__main__":
    company = input("Enter the company name: ").strip()
    year = input("Enter the year: ").strip()
    if company and year:
        query = f"""Find the {year} annual 10-K SEC filings report of the company '{company}' from the official website.
If necessary, get the content of the web page and find the actual link to the PDF report.
If the web page has a search, try to use it by passing the search parameters in the URL when you try to get a web page.
If the web page has pagination, try to search in multiple pages.
Once you have the link to the PDF file, use the download_pdf functionality to download the PDF locally.
If you fail to download the report, try to run a different web search or to get the content of a different web page.
"""
        result = assistant_query(query)
        print("Assistant's Response:")
        print(result)
    else:
        print("Company name and year are required.")


Assistant's Response:
I have successfully downloaded the 2022 annual 10-K SEC filing report for IBM. The file is saved as **IBM_2022_10-K.pdf**. If you need any further assistance, feel free to ask!


Improve by:
1. First check SEC Edgar database for 10K and 10Q.
2. Download for the past 5 years
3. Option to download quarterly report. 
4. Async for more speed

2. Add to Vector store

3. Extract financial statements

4. Use Code interpreter to combine over the years. Do not lose line items