In [1]:
from llama_index.schema import TextNode
import json
import os
from dotenv import load_dotenv

env_file_path = "../.env"
load_dotenv(dotenv_path=env_file_path)
OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")
COHERE_API_KEY = os.environ.get("COHERE_API_KEY")

In [2]:
company = "Meta"

path = f"../data/chunks/{company}.json"


with open(path, "r") as f:
    chunk_json = json.load(f)


In [3]:
from llama_index import ServiceContext
from llama_index.llms import OpenAI
import os 

llm = OpenAI(temperature=0, model="gpt-3.5-turbo")

service_context = ServiceContext.from_defaults(
    llm=llm,
)

In [4]:
nodes = []
for chunk in chunk_json:
    node = TextNode()
    node.text = chunk["content"]
    node.metadata = chunk["metadata"]
    nodes.append(node)

nodes

[TextNode(id_='a2498df9-99eb-4a79-84aa-47f14a447391', embedding=None, metadata={'Header 2': '(I.R.S. Employer Identification Number)'}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, hash='44136fa355b3678a1146ad16f7e8649e94fb4fc21fe77e8310c060f61caaff8a', text='20-1665019  \n(Address of principal executive offices and Zip Code) 1601 Willow Road, Menlo Park, California 94025 (Registrant\'s telephone number, including area code) (650) 543-4800 Securities registered pursuant to Section 12(b) of the Act:  \nSecurities registered pursuant to Section 12(g) of the Act: None  \nIndicate by check mark whether the registrant (1) has filed all reports required to be filed by Section 13 or 15(d) of the Securities Exchange Act of 1934 (Exchange Act) during the preceding 12 months (or for such shorter period that the registrant was required to file such reports), and (2) has been subject to such filing requirements for the past 90 days. Yes ☒ No ☐  \nIndicate by ch

In [5]:
from llama_index.retrievers import RecursiveRetriever
from llama_index.query_engine import RetrieverQueryEngine
from llama_index import VectorStoreIndex
from llama_index.vector_stores import FaissVectorStore
import faiss
from llama_index import StorageContext
from llama_index.embeddings import CohereEmbedding, OpenAIEmbedding


service_context = ServiceContext.from_defaults(llm=llm) 
vector_index = VectorStoreIndex(
    nodes=nodes, 
    service_context=service_context,
)

vector_retriever = vector_index.as_retriever(similarity_top_k=2)
engine = vector_index.as_query_engine(similarity_top_k=2)

from llama_index.retrievers import RecursiveRetriever

# recursive_retriever = RecursiveRetriever(
#     "vector",
#     retriever_dict={"vector": vector_retriever},
#     verbose=True,
# )


In [6]:
from llama_index.query_engine import SubQuestionQueryEngine, RetrieverQueryEngine
from llama_index.tools import QueryEngineTool, ToolMetadata
import nest_asyncio

nest_asyncio.apply()

# engine = RetrieverQueryEngine.from_args(
#     retriever=recursive_retriever, 
#     service_context=service_context
# )

query_engine_tools = [
    QueryEngineTool(
        query_engine=engine,
        metadata=ToolMetadata(
            name="Annual Report",
            description=f"Provides information about the company from its annual report.",
        )
    
    )
]

sub_question_query_engine = SubQuestionQueryEngine.from_defaults(
    query_engine_tools=query_engine_tools,
    service_context=service_context,
    verbose=True,
    use_async=True
)

In [7]:
with open("../data/prompts/V1.json") as f:
    prompts = json.load(f)

prompt_template = """
You are a business analyst tasked with providing an insightful analysis of specific sections in a company's report.

Rules to be followed while generating the insight:
- Desired Length: {desired_length}
- Complexity Level: {complexity_level}
- format: {output_format}
----------------------------------------------

Generate Insight for:
- Section Name: {section_name}
- Insight to be generated: As a business analyst, {specific_topic} 
- Additional Details: {specific_elements}
----------------------------------------------

"""

In [8]:
section = "competition_strategy"
prompt = prompt_template.format(**prompts[section])
response = sub_question_query_engine.query(prompt)

Generated 3 sub questions.
[1;3;38;2;237;90;200m[Annual Report] Q: What are the primary competitors of the company?
[0m[1;3;38;2;90;149;237m[Annual Report] Q: What is the market position of the company?
[0m[1;3;38;2;11;159;203m[Annual Report] Q: What strategic approaches does the company use to compete with its competitors?
[0m[1;3;38;2;90;149;237m[Annual Report] A: The company has a global sales force that focuses on attracting and retaining advertisers, and it operates offices in more than 90 cities around the world. Additionally, the company invests in marketing its products and services to grow its brand and build community. Based on this information, it can be inferred that the company has a strong market position and is actively working to expand its presence.
[0m[1;3;38;2;11;159;203m[Annual Report] A: The company uses various strategic approaches to compete with its competitors. These approaches include focusing on innovation, rapid adaptation to change, and the develop

In [9]:
print(response)

The company faces competition from various companies that provide connection, sharing, discovery, and communication products and services online. Additionally, it competes with companies that sell advertising to businesses and develop tools for managing advertising campaigns. The company also competes with companies that enable users to create, share, communicate, and discover content online, as well as those that help marketers reach their target audiences. Furthermore, the company faces competition in attracting and retaining users, businesses, and developers who use its products.

In terms of market position, the company has a strong presence globally, with a sales force operating in over 90 cities worldwide. This indicates that the company has established itself as a key player in the market and is actively working to expand its reach. The company also invests in marketing its products and services to further enhance its brand and build a strong community.

To compete with its riva

In [10]:
total_node_score = 0
node_count = 0
for i, node in enumerate(response.source_nodes):
    if node.score is not None:
        total_node_score += node.score
        node_count += 1

print(f"Average node score: {total_node_score/node_count}")

Average node score: 0.8174614113331441
