In [28]:
from llama_index.schema import TextNode
import json
import os
from dotenv import load_dotenv

env_file_path = "../.env"
load_dotenv(dotenv_path=env_file_path)
OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")
COHERE_API_KEY = os.environ.get("COHERE_API_KEY")

In [29]:
company = "Apple"

path = f"../data/chunks/{company}.json"


with open(path, "r") as f:
    chunk_json = json.load(f)


In [30]:
from llama_index import ServiceContext
from llama_index.llms import OpenAI
import os 

llm = OpenAI(temperature=0, model="gpt-3.5-turbo")

service_context = ServiceContext.from_defaults(
    llm=llm,
)

In [31]:
nodes = []
for chunk in chunk_json:
    node = TextNode()
    node.text = chunk["content"]
    node.metadata = chunk["metadata"]
    nodes.append(node)

nodes

[TextNode(id_='50827071-a61d-45ad-9b84-6f64a849800e', embedding=None, metadata={'Header 2': '(Mark One)'}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, hash='44136fa355b3678a1146ad16f7e8649e94fb4fc21fe77e8310c060f61caaff8a', text='☒ ANNUAL REPORT PURSUANT TO SECTION 13 OR 15(d) OF THE SECURITIES EXCHANGE ACT OF 1934  \nFor the fiscal year ended September 24, 2022 or  \nFor the transition period from to. Commission File Number: 001-36743 (Exact name of Registrant as specified in its charter) Apple Inc.  \n(Registrant’s telephone number, including area code) (408) 996-1010 Securities registered pursuant to Section 12(b) of the Act:', start_char_idx=None, end_char_idx=None, text_template='{metadata_str}\n\n{content}', metadata_template='{key}: {value}', metadata_seperator='\n'),
 TextNode(id_='431e729b-40fc-4a1d-9997-6199c56d61fb', embedding=None, metadata={'Header 2': '(Mark One)', 'Header 3': 'Yes ☐ No ☒'}, excluded_embed_metadata_keys=[], excluded_l

In [39]:
from llama_index.retrievers import RecursiveRetriever
from llama_index.query_engine import RetrieverQueryEngine
from llama_index import VectorStoreIndex
from llama_index.vector_stores import FaissVectorStore
import faiss
from llama_index import StorageContext
from llama_index.embeddings import CohereEmbedding, OpenAIEmbedding


service_context = ServiceContext.from_defaults(llm=llm) 
vector_index = VectorStoreIndex(
    nodes=nodes, 
    service_context=service_context,
)

vector_retriever = vector_index.as_retriever(similarity_top_k=3)

from llama_index.retrievers import RecursiveRetriever

recursive_retriever = RecursiveRetriever(
    "vector",
    retriever_dict={"vector": vector_retriever},
    verbose=True,
)


In [40]:
query = "List out the products and services offered by the company in a neatly formatted way"
retrieved_nodes = recursive_retriever.retrieve(query)

[1;3;34mRetrieving with query id None: List out the products and services offered by the company in a neatly formatted way
[0m[1;3;38;5;200mRetrieving text node: Additionally, the Company faces significant competition as competitors imitate the Company’s product features and applications within their products or collaborate to offer solutions that are more competitive than those they currently offer. The Company also expects competition to intensify as competitors imitate the Company’s approach to providing components seamlessly within their offerings or work collaboratively to offer integrated solutions.  
The Company’s services also face substantial competition, including from companies that have significant resources and experience and have established service offerings with large customer bases. The Company competes with business models that provide content to users for free. The Company also competes with illegitimate means to obtain third-party digital content and applications

In [41]:
for node in retrieved_nodes:
    print(node.text)
    print(node.score)
    print(node.metadata)
    print("-"*30)

Additionally, the Company faces significant competition as competitors imitate the Company’s product features and applications within their products or collaborate to offer solutions that are more competitive than those they currently offer. The Company also expects competition to intensify as competitors imitate the Company’s approach to providing components seamlessly within their offerings or work collaboratively to offer integrated solutions.  
The Company’s services also face substantial competition, including from companies that have significant resources and experience and have established service offerings with large customer bases. The Company competes with business models that provide content to users for free. The Company also competes with illegitimate means to obtain third-party digital content and applications.  
The Company’s business, results of operations and financial condition depend substantially on the Company’s ability to continually improve its products and servi

In [42]:
# from llama_index.postprocessor.cohere_rerank import CohereRerank
from llama_index.postprocessor import LLMRerank
from llama_index.schema import QueryBundle

reranker = LLMRerank(
    top_n=5,
    service_context=service_context
)

query_bundle = QueryBundle(query)
reranked_nodes = reranker.postprocess_nodes(nodes=retrieved_nodes, query_bundle=query_bundle)


In [43]:
for node in reranked_nodes:
    print(node.text)
    print(node.score)
    print(node.metadata)
    print("-"*30)

The Company’s advertising services include various third-party licensing arrangements and the Company’s own advertising platforms.  
AppleCare The Company offers a portfolio of fee-based service and support products under the AppleCare® brand. The offerings provide priority access to Apple technical support, access to the global Apple authorized service network for repair and replacement services, and in many cases additional coverage for instances of accidental damage and/or theft and loss, depending on the country and type of product.  
The Company’s cloud services store and keep customers’ content up-to-date and available across multiple Apple devices and Windows personal computers.  
The Company operates various platforms, including the App Store®, that allow customers to discover and download applications and digital content, such as books, music, video, games and podcasts.  
The Company also offers digital content through subscription-based services, including Apple Arcade®, a ga