In [13]:
from typing_extensions import override
from openai import AssistantEventHandler
from openai import OpenAI
from pydantic import BaseModel
from typing import Optional
import json
import inspect
import sys

client = OpenAI()

MAX_SIZE = 512 * 1024  # 512KB in bytes

# Function to truncate outputs
def truncate_outputs(outputs, max_size):
    current_size = sys.getsizeof(outputs) + sum(sys.getsizeof(s) for s in outputs)
    while current_size > max_size and outputs:
        outputs.pop()
        current_size = sys.getsizeof(outputs) + sum(sys.getsizeof(s) for s in outputs)
    return outputs

# Function to split outputs into chunks
def split_outputs(outputs, max_size):
    chunks = []
    current_chunk = []
    current_size = sys.getsizeof(current_chunk)

    for s in outputs:
        string_size = sys.getsizeof(s)
        if current_size + string_size > max_size:
            chunks.append(current_chunk)
            current_chunk = []
            current_size = sys.getsizeof(current_chunk)
        current_chunk.append(s)
        current_size += string_size

    if current_chunk:
        chunks.append(current_chunk)
    return chunks

def function_to_schema(func) -> dict:
    type_map = {
        str: "string",
        int: "integer",
        float: "number",
        bool: "boolean",
        list: "array",
        dict: "object",
        type(None): "null",
    }

    try:
        signature = inspect.signature(func)
    except ValueError as e:
        raise ValueError(
            f"Failed to get signature for function {func.__name__}: {str(e)}"
        )

    parameters = {}
    for param in signature.parameters.values():
        try:
            param_type = type_map.get(param.annotation, "string")
        except KeyError as e:
            raise KeyError(
                f"Unknown type annotation {param.annotation} for parameter {param.name}: {str(e)}"
            )
        parameters[param.name] = {"type": param_type}

    required = [
        param.name
        for param in signature.parameters.values()
        if param.default == inspect._empty
    ]

    return {
        "type": "function",
        "function": {
            "name": func.__name__,
            "description": (func.__doc__ or "").strip(),
            "parameters": {
                "type": "object",
                "properties": parameters,
                "required": required,
            },
        },
    }

class Agent(BaseModel):
    name: str = "Agent"
    model: str = "gpt-4o-mini"
    assistant: str = ""
    instructions: str = "You are a helpful Agent"
    tools: list = []
    
class Response(BaseModel):
    agent: Optional[Agent]
    messages: list

def execute_tool_call(tool_call, tools):
    name = tool_call.function.name
    args = json.loads(tool_call.function.arguments)
    result=""

    for tool in tools:
        if name == tool.__name__:
#           print(f"{name}({args}")
            result= globals()[name](**args)  # call corresponding function with provided arguments

    return result
    
def execute_call(tool_call):
    name = tool_call.function.name
    args = json.loads(tool_call.function.arguments)

    return globals()[name](**args)  # call corresponding function with provided arguments
    

# First, we create a EventHandler class to define
# how we want to handle the events in the response stream.

class EventHandler(AssistantEventHandler):
  agent: Optional[Agent]

  def __init__(self, agent):
        # Initialize the parent class
        super().__init__()
        # Initialize the new attribute
        self.agent = agent
      
  @override
  def on_text_created(self, text) -> None:
    name=client.beta.assistants.retrieve(self.agent.assistant).name
    print(f"\n{name} > on_text_created\n", end="", flush=True)

  @override
  def on_tool_call_created(self, tool_call):
      name=client.beta.assistants.retrieve(self.agent.assistant).name
      print(f"\n{name} >on_tool_call {tool_call.type} created\n", flush=True)

  @override
  def on_message_done(self, message) -> None:
      # print a citation to the file searched
      message_content = message.content[0].text
      annotations = message_content.annotations
      citations = []
      for index, annotation in enumerate(annotations):
          message_content.value = message_content.value.replace(
              annotation.text, f"[{index}]"
          )
          if file_citation := getattr(annotation, "file_citation", None):
              cited_file = client.files.retrieve(file_citation.file_id)
              citations.append(f"[{index}] {cited_file.filename}")

      name=client.beta.assistants.retrieve(self.agent.assistant).name
      print(f"\n{name} > on_message_done\n", end="", flush=True)
      print(message_content.value)
      print("\n".join(citations))
      
  @override
  def on_event(self, event):
    # Retrieve events that are denoted with 'requires_action'
    # since these will have our tool_calls
    if event.event == 'thread.run.requires_action':
      run_id = event.data.id  # Retrieve the run ID from the event data
      self.handle_requires_action(event.data, run_id)

  def handle_requires_action(self, data, run_id):
    tool_outputs = []
      
    for tool in data.required_action.submit_tool_outputs.tool_calls:
        result = execute_tool_call(tool,self.agent.tools)   
        tool_outputs.append({"tool_call_id": tool.id, "output":result})    

    # Submit all tool_outputs at the same time
    self.submit_tool_outputs(tool_outputs, run_id)

  def submit_tool_outputs(self, tool_outputs, run_id):
    # Use the submit_tool_outputs_stream helper
    # Check and handle output size
    if sys.getsizeof(tool_outputs) + sum(sys.getsizeof(s) for s in tool_outputs) > MAX_SIZE:
        # Option 1: Truncate
        #outputs = truncate_outputs(outputs, MAX_SIZE)

        # Option 2: Split
        chunks = split_outputs(tool_outputs, MAX_SIZE)

        # Submit each chunk separately
        for chunk in chunks:
            thread.submit_output(chunk)
            with client.beta.threads.runs.submit_tool_outputs_stream(
              thread_id=self.current_run.thread_id,
              run_id=self.current_run.id,
              tool_outputs=chunk,
              event_handler=EventHandler(self.agent),
            ) as stream:
              stream.until_done()
              print(f"\n---End Function call for run {self.current_run.id}---\n")
    else:
        with client.beta.threads.runs.submit_tool_outputs_stream(
          thread_id=self.current_run.thread_id,
          run_id=self.current_run.id,
          tool_outputs=tool_outputs,
          event_handler=EventHandler(self.agent),
        ) as stream:
          stream.until_done()
          print(f"\n---End Function call for run {self.current_run.id}---\n")
          for text in stream.text_deltas:    
            print(text, end="", flush=True)
          print()

def run_full_turn(agent, thread):

    current_agent = agent
    assistant = client.beta.assistants.retrieve(agent.assistant)
    #messages = thread.messages
    
    # turn python functions into tools and save a reverse map
    tool_schemas = [function_to_schema(tool) for tool in current_agent.tools]
    # tools = {tool.__name__: tool for tool in current_agent.tools}
    
    # === 1. get openai completion ===
    tool_schemas.append( 
         {
            "type": "file_search"
         }
    )
        
    with client.beta.threads.runs.stream(
        thread_id=thread.id,
        assistant_id=assistant.id,
        model=agent.model,
        instructions=current_agent.instructions,
        tools=tool_schemas,
        event_handler=EventHandler(current_agent),
    ) as stream:
        stream.until_done()

    # ==== 3. return last agent used and new messages =====
    #return Response(agent=current_agent, thread)




In [14]:
from sec_api import QueryApi
from sec_api import ExtractorApi
from sec_api import XbrlApi
from sec_api import SubsidiaryApi
from sec_api import ExecCompApi
from sec_api import RenderApi, PdfGeneratorApi, MappingApi
import pandas as pd
import numpy as np
import json
from datetime import datetime
from IPython.display import display, HTML

SEC_API_KEY = "af874bc49e22a0856ed80ec4c3f46b7e3b3a5891112931777181f3f0e7c7ca18"

queryApi = QueryApi(api_key= SEC_API_KEY)
extractorApi = ExtractorApi(SEC_API_KEY)
xbrlApi = XbrlApi(SEC_API_KEY)
subsidiary_api = SubsidiaryApi(SEC_API_KEY)
execCompApi = ExecCompApi(SEC_API_KEY)
renderApi = RenderApi(SEC_API_KEY)
pdfGeneratorApi = PdfGeneratorApi(SEC_API_KEY)
mappingApi = MappingApi(api_key=SEC_API_KEY)


# map to 'ticker' or 'cik' etc. by name
# may return multiple results as a list   
def sec_map_by_name(name,key='ticker'):
    result =[]
    by_name = mappingApi.resolve('name',name)
    for list in by_name:
        if key in list: 
            result.append(list[key])
#    print("Mapping company "+name+" to "+key+":")
    return result

# map by ticker
def sec_map_by_ticker(ticker,key='name'):
    result=[]
    by_ticker=mappingApi.resolve('ticker',ticker)
    for list in by_ticker:
        #print("Mapping ticker "+ticker+" to "+key+":",list)
        if key in list: 
            result.append(list[key])
    
    return result

# merge two income statements into one statement.
# row indicies of both statements have to be the same
# statement_b represents the most recent statement.
def merge_income_statements(statement_a, statement_b):
    return statement_a.merge(statement_b,
                     how="outer", 
                     on=statement_b.index, 
                     suffixes=('_left', ''))
# clean income statement.
# drop duplicate columns (= column name ends with "_left"), drop key_0 column, drop columns with +5 NaNs
def clean_income_statement(statement):
    for column in statement:

        # column has more than 5 NaN values
        is_nan_column = statement[column].isna().sum() > 5

        if column.endswith('_left') or column == 'key_0' or is_nan_column:
            statement = statement.drop(column, axis=1)
    
    # rearrange columns so that first column represents first quarter
    # e.g. 2018, 2019, 2020 - and not 2020, 2019, 2018
    sorted_columns = sorted(statement.columns.values)
    
    return statement[sorted_columns]

def extract_filing_items(filing_url,items,ex_type="text"):
#    print("request item list:",items)
    list_sections=[]
    for item in items:
#        print(f"Extracting item {item} from filing {filing_url}")
        try:
            section_text = extractorApi.get_section(
                filing_url=filing_url, section=item, return_type=ex_type
            )
            if ex_type == "html":
                display(HTML(section_text))
            #else:
            #    print(f"Section item {item} : {section_text}")    
            list_sections.append(section_text)
        except Exception as e:
            print(e)
    return list_sections

def get_filing_items(company_name,start_date='2024-01-01', end_date='2024-12-31', items=["1"], form="10-K", ex_type="text"):
    #form = '"10-K"'
    if sec_map_by_ticker(company_name) :
        name_query = 'ticker:'+company_name
    else :
        name_query = 'companyName:'+company_name
        
    search_query = name_query +' AND formType:"'+form+'" AND filedAt:['+ start_date+' TO '+end_date+']'
    parameters = {
        "query": search_query,
        "from": "0",
        "size": "50",
        "sort": [{"filedAt": {"order": "desc"}}],
    }

    response = queryApi.get_filings(parameters)
   
    # for each filing, get the URL of the filing
    # set in the dict key "linkToFilingDetails"
    urls_list = list(
        map(lambda x: x["linkToFilingDetails"], response["filings"])
    )

    # get the standardized and cleaned text of section 1A "Risk Factors"
    results = [extract_filing_items(filing_url,items,ex_type) for filing_url in urls_list]

    return results


def json_to_table(xbrl_json,table='BalanceSheets'):
    # convert XBRL-JSON of statement to pandas dataframe
    statement_store = {}

    # iterate over each US GAAP item in the income statement
    for usGaapItem in xbrl_json[table]:
        values = []
        indicies = []

        for fact in xbrl_json[table][usGaapItem]:
            #print(fact)
            try:
                if 'instant' in fact['period']:
                    index = fact['period']['instant']
                else :
                    index = fact['period']['startDate']+'-'+fact['period']['endDate']
                # ensure no index duplicates are created
                if index not in indicies:
                    values.append(fact['value'] if 'value' in fact else '')
                    indicies.append(index)                    
            except:
                print("❌ failed unpack element {fact} ".format(fact=fact))
        statement_store[usGaapItem] = pd.Series(values, index=indicies) 

    statement = pd.DataFrame(statement_store)
    # switch columns and rows so that US GAAP items are rows and each column header represents a date range
    return statement.T 


def get_filing_financials(company_name,start_date='2024-01-01', end_date='2024-12-31', table='BalanceSheets'):
    #start_date = '2024-01-01'
    #end_date = '2024-12-31'
    form = '(formType:"10-K" OR formType:"10-Q") AND dataFiles.description:"XBRL INSTANCE DOCUMENT"'
    if sec_map_by_ticker(company_name) :
        name_query = 'ticker:'+company_name
    else :
        name_query = 'companyName:'+company_name
    
    search_query = name_query +' AND '+form+' AND filedAt:['+ start_date+' TO '+end_date+']'
    parameters = {
        "query": search_query,
        "from": "0",
        "size": "1",
        "sort": [{"filedAt": {"order": "desc"}}],
    }

    response = queryApi.get_filings(parameters)
   
    # for each filing, get the URL of the filing
    # set in the dict key "linkToFilingDetails"
    urls_list = list(
        map(lambda x: x["linkToFilingDetails"], response["filings"])
    )

    # get the standardized and cleaned text of section 1A "Risk Factors"    
    #table = 'StatementsOfIncome'
    #table = 'BalanceSheets'
    #table = 'StatementsOfCashFlows'
    tables = [json_to_table(xbrlApi.xbrl_to_json(filing_url),table) for filing_url in urls_list ]
    return tables

def sec_get_subsidaries(company_name, start_date='2024-01-01',end_date='2024-12-31'):

    if sec_map_by_ticker(company_name) :
        name_query = 'ticker:'+company_name
    else :
        name_query = 'companyName:'+company_name

    search_query = name_query #+ ' AND fileAt:['+ start_date+' TO '+end_date+']'
    parameters = {
      "query": search_query,
      "from": "0",
      "size": "50",
      "sort": [ { "filedAt": { "order": "desc"  } } ]
    }
    list_subsidiaries = subsidiary_api.get_data(parameters)
    subsidiaries_df = pd.DataFrame(list_subsidiaries['data'])

    
    # drop columns: id, companyName
    subsidiaries_df = subsidiaries_df.drop(['id', 'companyName'], axis=1)
    # explode subsidiaries column
    subsidiaries_df = subsidiaries_df.explode('subsidiaries')

    try :
        # explode subsidiaries column to new "name" and "jurisdiction" columns
        subsidiaries_df[['name', 'jurisdiction']] = subsidiaries_df['subsidiaries'].apply(pd.Series)
        #print("subsidary names:", subsidiaries_df['name'].nunique(), "\n", subsidiaries_df['name'].sort_values().unique())
        #print("juristdictions:", subsidiaries_df['jurisdiction'].nunique(), "\n", subsidiaries_df['jurisdiction'].sort_values().unique())
    except Exception as e:
        print(e)    
     
    # drop subsidiaries column
    subsidiaries_df = subsidiaries_df.drop(['subsidiaries'], axis=1)

    return subsidiaries_df
    

def sec_get_exec_comp(company_name,start_year='2023',end_year='2024'):
    if sec_map_by_ticker(company_name) :
        ticker = company_name
    else :
        ticker = sec_map_by_name(company_name)[0]

    search_query = 'ticker:'+ ticker+ ' AND year:['+start_year+' TO '+end_year+']'
        
    query = {
        "query": search_query,
        "from": "0",
        "size": "50",
        "sort": [{"total": {"order": "desc"}}]
    }
        
    result_query = execCompApi.get_data(query)
    simplified = list(map(lambda x: {"Ticker": x["ticker"], 
                                 "Name": x["name"], 
                                 "Position": x["position"],
                                 "Salary": "${:,.0f}".format(x["salary"]),
                                 "Stocks": "${:,.0f}".format(x["stockAwards"]),
                                 "Options": "${:,.0f}".format(x["optionAwards"]),
                                 "Non-equity": "${:,.0f}".format(x["nonEquityIncentiveCompensation"]),
                                 "Deferred": "${:,.0f}".format(x["changeInPensionValueAndDeferredEarnings"]),
                                 "Other": "${:,.0f}".format(x["otherCompensation"]),
                                 "Total": "${:,.0f}".format(x["total"]),
                                 "Year": x["year"],
                                 }, result_query))

    df = pd.DataFrame(simplified)
        
    return df
    
def sec_get_new_releases(company_name,start_date,end_date):        

    if sec_map_by_ticker(company_name) :
        name_query = 'ticker:'+company_name
    else :
        name_query = 'companyName:'+company_name

    frames = []
    #for year in range(start_year, end_year + 1):
    #   for month in range(1, 13):
    for from_index in range(0, 9950, 50):
                date_range_query = 'filedAt:['+ start_date+" TO "+end_date+']'
                form_type_query = 'formType:"8-K"'
                document_format_query = "documentFormatFiles.type:(99, 99*, *99, *99*)"
                items_query = 'items:("9.01" AND "2.02")'
                query = (
                    name_query
                    + " AND "
                    + form_type_query
                    + " AND "
                    + document_format_query
                    + " AND "
                    + items_query
                    + " AND "
                    + date_range_query
                )

                search_params = {
                    "query": query,
                    "from": from_index,
                    "size": "50",
                    "sort": [{"filedAt": {"order": "desc"}}],
                }

                #print(json.dumps(query))

                response = queryApi.get_filings(search_params)
                
                if len(response["filings"]) == 0:
                    break

                filings = pd.DataFrame.from_records(response["filings"])

                documentFormatFiles = [
                    doc
                    for sublist in list(filings["documentFormatFiles"])
                    for doc in sublist
                ]

                exhibit_99s_list = list(
                    filter(lambda doc: "99" in doc["type"], documentFormatFiles)
                )

                exhibit_99s_df = pd.DataFrame.from_records(exhibit_99s_list)
                frames.append(exhibit_99s_df)
                urls = exhibit_99s_df["documentUrl"]
                for url in urls :
                    try :
                        file_content = renderApi.get_filing(url)
                        file_content_pdf = pdfGeneratorApi.get_pdf(url)   
                        display(HTML(file_content))
                    except:
                        print("❌ download failed: {url}".format(url=url))
    
    result =pd.DataFrame()
    if frames :
        result = pd.concat(frames)
    return result    

In [15]:
def escalate_to_human(summary):
    """Only call this if explicitly asked to."""
    print("Escalating to human agent...")
    print("\n=== Escalation Report ===")
    print(f"Summary: {summary}")
    print("=========================\n")
    exit()

triage_agent = Agent(
    name="Triage Agent",
    instructions=(
        "You are a Investment Advisor for Blue Hills Research."
        "Introduce yourself. Always be very brief. "
        "Gather information to direct the customer to the right analyst. "
        "But make your questions subtle and natural."
    ),
    tools=[ escalate_to_human],
    assistant="asst_rJSLLZ8SpDIh8Lhw3slQIecf",
)

def limit_tokens(text, max_tokens):
    tokens = text.split()  # Split by whitespace
    limited_tokens = tokens[:max_tokens]  # Get only the first `max_tokens` tokens
    return " ".join(limited_tokens)  # Rejoin into a string

def map_by_name(name,key='ticker'):
    """Use to map a name of company to a list of ticker(s) or other attributes
    here name is a name of any public company.
    key can be a list of the key items of
        -'name' for the company name,
        -'ticker' for the Ticker name,
        -'cik' for the CIK code.
        -'cusip' for company's CUSIP,
        -'exchange' for the exchange name, 
        -'sector': for the sector name,
        -'industry' for the industry name,
    default key value is 'ticker'."""
    result=sec_map_by_name(name,key)
    return next(iter(result),"Failed to locate "+key)

def map_by_ticker(ticker,key='name'):     
    """Use to map a ticker to a list of company name(s) or other attributes
    here ticker is the ticker of a listed company.
    key can be a list of the key items of
        -'name' for the company name,
        -'ticker' for the Ticker name,
        -'cik' for the CIK code.
        -'cusip' for company's CUSIP,
        -'exchange' for the exchange name, 
        -'sector': for the sector name,
        -'industry' for the industry name,
    default key value is 'name'."""
    result=sec_map_by_ticker(ticker,key)
    return next(iter(result), "Failed to locate "+key)

def get_exec_comp(company_name,start_year='2023',end_year='2023'):
    """Use to find execative compensations form the company 
    company is the name of the company.
    start_year and end_year are start year and end year.
    if not secified by user, the default start_year and end_year are current year.
    """
    result= sec_get_exec_comp(company_name,start_year,end_year)
    return result.to_string()

def get_new_releases(company_name,start_date='2024-01-01',end_date='2024-12-31'):
    """Use to get new releases from the company 
    company can be a name of company.
    start_date and end_date are both date in the form of year-month-date.
    if not secified by user, the default start_date is the start of the year, and
    the default end_date is today. """

    print("New releases:", company_name, start_date, end_date) # lazy summary

    result=sec_get_new_releases(company_name,start_date,end_date)
    return result.to_string()

def get_subsidaries(company_name, start_date='2024-01-01',end_date='2024-12-31'):
    """Use to get subsidaries from the company 
    company can be a name of company.
    start_date and end_date are both date in the form of year-month-date.
    if not secified by user, the default start_date is the start of the year, and
    the default end_date is today. """

    print("Subsidaries:", company_name, start_date, end_date) # lazy summary

    result=sec_get_subsidaries(company_name,start_date,end_date)
    return result.to_string()[:128000]


def study_filing_items (company, item, form ,start_date='2024-01-01',end_date='2024-12-31'):
    """Use to study specific sections of company's 10-K file.  
    company is a name of company.
    form can be string of "10-K", "10-Q" or "8-K" and the default value is "10-K".
    item is a literal string without spaces, representing a section for extracting. 
    start_date and end_date are both date in the form of year-month-date.
    if not secified by user, default start_date is the beginging of this year.
    default end_date is today. """

    print("Study filing items:", company) # lazy summary
    list_items = [item]
    result_type="html"    
    result=get_filing_items(company,start_date,end_date,list_items,form,ex_type=result_type)

    return "success"

def study_business (company,start_date='2024-01-01',end_date='2024-12-31'):
    """Use to study further business quality 
    company can be a name of company.
    start_date and end_date are both date in the form of year-month-date.
    if not secified by user, default start_date is the beginning of this year.
    default end_date is today. """

    print("Study business:", company) 

    result_type="text"
    result=get_filing_items(company,start_date,end_date,items=["1"],form="10-K", ex_type=result_type)

    
    message = "\n\n".join(line for pages in result for page in pages for line in page)
    return limit_tokens(message,125000)


def study_financial_performance (company,start_date='2024-01-01',end_date='2024-12-31'):
    """Use to study recent financial performance 
    company is the name of a company.
    start_date and end_date are both date in the form of year-month-date.
    if not secified by user, start_date value defaulted to the beginning of the year,
    and end_date value defaulted to today. """

    print("Study financial performance:", company) 

    result_type="text"
    # and 10-Qs, and 10-K
    result=get_filing_items(company,start_date,end_date,items=["part1item1","part1item2"],form="10-Q", ex_type=result_type)
    result.append(get_filing_items(company,start_date,end_date,items=["7"],form="10-K", ex_type=result_type))

    message = "\n\n".join(line for pages in result for page in pages for line in page)
    #keep return message within token limit
    return limit_tokens(message,125000)


def study_management (company,start_date='2024-01-01',end_date='2024-12-31'):
    """Use to study further management quality 
    company can be a name of company.
    start_date and end_date are both date in the form of year-month-date,
    if not secified by user, default start_date is the beginging of this year and
    default end_date is today. """

    print("Study management:", company) # lazy summary
    result_type="text"
    result=get_filing_items(company,start_date,end_date,items=["10","11"],form="10-K",ex_type=result_type)
    message = "\n\n".join(line for pages in result for page in pages for line in page)
    #keep return message within token limit
    return limit_tokens(message,125000)

def study_risks (company,start_date='2024-01-01',end_date='2024-12-31'):
    """Use to study Risk factors associated with the company  
    company is the name of a company.
    start_date and end_date are the dates in the form of yyyy-mm-dd, defined the period to study. 
    if not specified, the start_date is defaulted to the beginging of this year, and 
    the defaulted end_date is today. """


    print("Study Risk Factors:", company) # lazy summary
    result_type="text"
    result=get_filing_items(company,start_date,end_date,items=["1A"],form="10-K", ex_type=result_type)
    message = "\n\n".join(line for pages in result for page in pages for line in page)

    #keep return message within token limit
    return limit_tokens(message,120000)

def study_capital_allocation (company,start_date='2024-01-01',end_date='2024-12-31'):
    """Use to study further capital allocation quality 
    company is a name of a company.
    start_date and end_date are both date in the form of year-month-date,
    if not secified by user, default start_date is the beginging of this year.
    and default end_date is today. """


    print("Study capital allocation:", company) # lazy summary
    result_type="text"
    result=get_filing_items(company,start_date,end_date,items=["8"],form="10-K", ex_type=result_type)
    
    message = "\n\n".join(line for pages in result for page in pages for line in page)
    #keep return message within token limit
    return limit_tokens(message,125000)

def get_balance_sheet(company,start_date='2024-01-01',end_date='2024-12-31'):
    """Use to get a company's recent year balance sheet. 
    company here is a name of a company.
    start_date and end_date are both date in the form of year-month-date,
    if not secified by user, default start_date is the beginging of this year.
    default end_date is today. """


    print("Balance Sheet :", company) # lazy summary
    
    list_financials = get_filing_financials(company,start_date,end_date,table='BalanceSheets')
    for financial in list_financials:
        financial.dropna()
        financial.fillna("", inplace=True)
        financial.index = [
            index.replace("CashAndCashEquivalentsAtCarryingValue", "Cash & Equivalent")
            .replace("ShortTermInvestments","ST Investments")
            .replace("AccountsReceivableNetCurrent","Account Receivable")
            .replace("OtherReceivablesNetCurrent","Other Receivables")
            .replace("AarpAssetsUnderManagement","AARP AUM")
            .replace("PrepaidExpenseAndOtherAssetsCurrent","Prepaid Expense")
            .replace("AssetsCurrent","Current Assets")
            .replace("LongTermInvestments","LT Investments")
            .replace("PropertyPlantAndEquipmentNet","PP&E")
            .replace("IntangibleAssetsNetExcludingGoodwill","Intangibles")
            .replace("OtherAssetsNoncurrent","Other Current Assets")
            .replace("LiabilityForClaimsAndClaimsAdjustmentExpense","Liability for CACAE")
            .replace("AccountsPayableAndAccruedLiabilitiesCurrent","Payables & Accrued")
            .replace("DeferredIncomeTaxLiabilitiesNet","Deferred Taxliability")
            .replace("RedeemableNoncontrollingInterestEquityCarryingAmount","Redeemable non-controlling Interest")
            .replace("AccumulatedOtherComprehensiveIncomeLossNetOfTax","AOCI")
            .replace("PreferredStockParOrStatedValuePerShare","Preferred Stock Pershare")
            .replace("CommonStockParOrStatedValuePerShare","Common Stock Pershare")
            .replace("StockholdersEquityIncludingPortionAttributableToNoncontrollingInterest","Stockholders Equity")
            for index in financial.index
        ]
        display(HTML(financial.to_html()))

    return "\n\n".join(item.to_string() for item in list_financials)

def get_shareholders_equity(company,start_date='2024-01-01',end_date='2024-12-31'):
    """Use to get a company's statement of shareholder's equity. 
    company here is a name of a company.
    start_date and end_date are both date in the form of year-month-date,
    if not secified by user, default start_date is the beginging of this year.
    default end_date is today. """


    print("Statement of shareholders equity :", company) # lazy summary
    
    list_financials = get_filing_financials(company,start_date,end_date,table='StatementsOfShareholdersEquity')
    for financial in list_financials:
        financial.dropna()
        financial.fillna("", inplace=True)
        display(HTML(financial.to_html()))

    return "\n\n".join(item.to_string() for item in list_financials)


def get_income_statement(company,start_date='2024-01-01',end_date='2024-12-31'):
    """Use to get a company's recent year Income Statement 
    company can be a name of company.
    start_date and end_date are both date in the form of year-month-date,
    if not secified by user, default start_date is the beginging of this year.
    default end_date is today. """


    print("Income Statement :", company) # lazy summary
    
    list_financials = get_filing_financials(company,start_date,end_date,table='StatementsOfIncome')
    
    previous_income_statement_set = False
    income_statement_final = None

    for financial in list_financials:
        # for accession_no in accession_numbers: # doesn't work with filings filed before 2017 - indicies not equal
    
        income_statement_uncleaned = financial

        # clean the income statement
        income_statement_cleaned = clean_income_statement(income_statement_uncleaned)
    
        # print income statement on each iteration to monitor progress
        display(HTML(income_statement_cleaned.to_html()))
    
        # merge new income statement with previously generated income statement
        if previous_income_statement_set:
            income_statement_final = clean_income_statement(merge_income_statements(income_statement_final, income_statement_cleaned))
        else:
            income_statement_final = income_statement_cleaned
            previous_income_statement_set = True
        
    return "\n\n".join(item.to_string() for item in list_financials)

def get_cashflow_statement(company,start_date='2024-01-01',end_date='2024-12-31'):
    """Use to get a company's recent year Cashflow Statement 
    company can be a name of company.
    start_date and end_date are both date in the form of year-month-date,
    if not secified by user, default start_date is the beginning of this year.
    default end_date is today. """


    print("Cashflow Statement :", company) # lazy summary
    
    list_financials = get_filing_financials(company,start_date,end_date, table='StatementsOfCashFlows')
    for financial in list_financials:
        financial.dropna()
        financial.fillna("", inplace=True)
        display(HTML(financial.to_html()))

    return "\n\n".join(item.to_string() for item in list_financials)


In [16]:
# utilities to show assistants, vectores and vectore store files                
# get a dict of assistant
def list_assistants() :
    ass_list = client.beta.assistants.list(
        order="desc",
    )
    ass_dict = {ass.name: ass.id for ass in ass_list}
    return ass_dict

# get a dict of vectors
def list_vectors() :
    vector_stores = client.beta.vector_stores.list(
        order="desc",
    )
    vec_dict = {vec.name: vec.id for vec in vector_stores}
    return vec_dict

# get store files from a vector store
def list_vector_files(vec_id):
    vector_store_files = client.beta.vector_stores.files.list(vector_store_id=vec_id)
    store_files = {client.files.retrieve(v_f.id).filename: v_f.id for v_f in vector_store_files}
    return store_files

# print a list of vectors with associated files
def print_vec_list(list_files=False):
    v_list = list_vectors()
    for v in v_list.keys():
        print(f"name {v}, Id {v_list[v]} :")
        if list_files: 
            store_files=list_vector_files(v_list[v])        
            for v_f in store_files:
                print(f"------{v_f}")

# Define equity analyst agent and assign assistant to it
#
equity_analyst = Agent(
    name="Equity Analyst",
    assistant="",
    instructions=(
        "You are an Equity Analyst from Blue Hills Research."
        "Your client is a fund portfolio manager who rely on your deep knowledge on indivudal company and industry sector."
        "You should help him analyse a company, and you should ways carry analytical and critical reasoning."
        "If not specified the defaulted period always starts at the beginning of the year,"
        " and ends today."
        "Today is December 14, 2024.\n"
        "Follow the following routine with the client:"
        "1. First, ask probing questions and understand the what he really like to understand.\n"
        " Your knowledge base is the files supplied, and you should use your knowledge base first to answer questions.\n"
        "2. Only if he specified, or you can't find an answer, or answer does not satisfy him, then\n"
        " decide further if you should invoke following actions:\n"
        " studies of business qualities,\n"
        " study financial performance,\n"
        " management quality, risks, company subsidaries, company capital allocation strategy,new_releases\n"
        " or simply get company's financials like Balance Sheets, Income Statements, Cashflows statements,\n"
        " Statement of Shareholders equity, exective compensations.\n"
        " You may also freely map a public company name to a ticker, or vise verser \n"
        " by involking map_by_name or map_by_ticker.\n"
        " You also allow users to study sections of company's filings by function study_filing_items.\n" 
        ""
    ),
    tools=[
        study_business,study_financial_performance, study_management,study_capital_allocation, 
        study_risks, get_subsidaries, get_new_releases, 
        get_balance_sheet, get_income_statement, get_cashflow_statement, get_shareholders_equity,
        get_exec_comp, map_by_ticker, map_by_name, study_filing_items
    ],
    
)

In [17]:
import ipywidgets as widgets
from IPython.display import display, clear_output


#ass_name ="Equity Analyst Agent"
ass_dict= list_assistants()
vc_dict = list_vectors()

# print list of assistant
#print(" List of assistants:")
#for ass_name in ass_dict: 
#    print(f"---{ass_name}---")
# print all vector stores 
#print_vec_list(list_files=False)


# gui widgets
# Widgets for assistant selection
assistant_dropdown = widgets.Dropdown(
    options=ass_dict.keys(),
    description="Assistants:",
    layout=widgets.Layout(width="50%")
)
    
vector_store_dropdown = widgets.Dropdown(
    options=vc_dict.keys(),
    description="Vector stores:",
    layout=widgets.Layout(width="50%")
)

# Output area widget
output_area = widgets.Output(
    layout=widgets.Layout(font_size="20px")
)
# Section for user interaction (integrated from earlier code)
user_input = widgets.Text(
    description="User:",
    placeholder="Type your message here...",
    continuous_update=False,
    layout=widgets.Layout(font_size="20px", width="100%")
)
# send button
send_button = widgets.Button(description="Send",layout=widgets.Layout(width="20%"))
chat_output = output_area

# Inject CSS to control cursor behavior
display(HTML("""
<style>
.busy-cursor * {
    cursor: wait !important;
}
</style>
"""))
# Context manager to toggle the busy cursor
class BusyCursor:
    def __enter__(self):
        display(HTML('<div class="busy-cursor">'))  # Apply busy cursor CSS
    def __exit__(self, exc_type, exc_value, traceback):
        display(HTML('</div>'))  # Remove busy cursor CSS

# Variables to store selected assistant information
selected_assistant = {"name": None, "id": None}
# Variables to store selected vector store information
selected_vector = {"name": None, "id": None}
#
agent = equity_analyst
#create a thread
#thread = client.beta.threads.create()
#                      
thread= client.beta.threads.create()
#
# attach event to change selection of dropdown
def on_assistant_change(change):
    if change["type"] == "change" and change["name"] == "value":
        with output_area:
            selected_assistant["name"] = change["new"]
            selected_assistant["id"] = ass_dict[selected_assistant["name"]]
            print(f"Assistant {selected_assistant['name']} selected with ID {selected_assistant['id']}.\n")
            agent.assistant = selected_assistant['id']

def on_vector_change(change):
    if change["type"] == "change" and change["name"] == "value":
        with output_area:
            selected_vector["name"] = change["new"]
            selected_vector["id"] = vc_dict[selected_vector["name"]]
            print(f"Vector {selected_vector['name']} selected with ID {selected_vector['id']}.\n")
            client.beta.threads.update(
                thread_id=thread.id,
                tool_resources= {
                    "file_search": {
                        "vector_store_ids": [selected_vector['id']]
                    }
                }
            )
            vector_files = list_vector_files(selected_vector['id'])
            for f in vector_files:
                print(f"----{f}-----")
 
# Function to handle user messages
#def on_send_clicked(b):
def on_enter(change):
    if change['type'] == 'change' and change['name'] == 'value':
        with BusyCursor(),chat_output:
            ass_name=assistant_dropdown.value
            ass_id= ass_dict[ass_name]
            agent.assistant = ass_id

            #clear_output()
            user_message = user_input.value 
            if user_message:
                print(f"User: {user_message}")
                message = client.beta.threads.messages.create(
                    thread_id=thread.id,
                    role = "user",
                    content = user_message
                )
                response = run_full_turn(agent, thread)            
            # Mock: Sending message to the assistant
            # Replace with your API call
            #message = client.beta.threads.messages.create(
            #    thread_id=selected_assistant["id"],
            #    role="user",
            #    content=user_message
            #)
            
            # Mock: Getting a response from the assistant
            # Replace with your actual function
            #response = run_full_turn(agent, thread)
            
            # Clear the input field
            user_input.value = ""


# Display user interaction widgets
display(widgets.VBox([
        widgets.HBox([assistant_dropdown, vector_store_dropdown]), 
        chat_output,
        widgets.HBox([user_input])
]))


assistant_dropdown.observe(on_assistant_change, names="value")
vector_store_dropdown.observe(on_vector_change, names="value")
#send_button.on_click(on_send_clicked)
user_input.observe(on_enter, names='value')

VBox(children=(HBox(children=(Dropdown(description='Assistants:', layout=Layout(width='50%'), options=('Equity…