In [None]:
%env OPENAI_API_KEY=<PUT_YOUR_API_KEY_HERE>


In [2]:
import openai, os
from llama_index import GPTVectorStoreIndex, SimpleDirectoryReader

openai.api_key = os.environ.get("OPENAI_API_KEY")

documents = SimpleDirectoryReader('./source_data').load_data()
index = GPTVectorStoreIndex.from_documents(documents)

index.storage_context.persist(persist_dir='./data/index_macbook_air_m3')


In [28]:
from llama_index import StorageContext, load_index_from_storage

storage_context = StorageContext.from_defaults(persist_dir="./data/index_macbook_air_m3")

index = load_index_from_storage(storage_context)
query_index = index.as_query_engine()

response = query_index.query("What is the expected release date for the MacBook Air based on the M3 processor?")
print(response)


The expected release date for the MacBook Air based on the M3 processor is sometime in the first half of 2024, between the spring and summer of next year at the earliest.


In [29]:
response = query_index.query("When was the 13-inch MacBook Air based on the M2 processor released?")
print(response)


The 13-inch MacBook Air based on the M2 processor was released in June 2022.


In [32]:
from llama_index import QuestionAnswerPrompt
query_str = "What is the expected release date for the MacBook Air based on the M3 processor?"
DEFAULT_TEXT_QA_PROMPT_TMPL = (
    "Context information is below. \n"
    "---------------------\n"
    "{context_str}"
    "\n---------------------\n"
    "Given the context information and not prior knowledge, "
    "answer the question: {query_str}\n"
)
QA_PROMPT = QuestionAnswerPrompt(DEFAULT_TEXT_QA_PROMPT_TMPL)

query_index = index.as_query_engine(text_qa_template=QA_PROMPT)

response = query_index.query(query_str)
print(response)


The expected release date for the MacBook Air based on the M3 processor is sometime in the first half of 2024, between the spring and summer of next year at the earliest.


In [33]:
QA_PROMPT_TMPL = (
    "Based on the following information, where \"13-inch\" and \"15-inch\" refer to Apple's MacBook Air \n"
    "---------------------\n"
    "{context_str}"
    "\n---------------------\n"
    "please answer the question: {query_str}\n"
    "If you do not know the answer, please respond with \"I do not know.\"\n"
)
QA_PROMPT = QuestionAnswerPrompt(QA_PROMPT_TMPL)
query_index = index.as_query_engine(text_qa_template=QA_PROMPT)
response = query_index.query("When will the next generation of Google Pixel series smartphones be released?")

print(response)


I do not know.


In [36]:
from langchain.chat_models import ChatOpenAI
from langchain.text_splitter import SpacyTextSplitter
from llama_index import GPTListIndex, LLMPredictor, ServiceContext
from llama_index.node_parser import SimpleNodeParser

# define LLM
llm_predictor = LLMPredictor(llm=ChatOpenAI(temperature=0, model_name="gpt-3.5-turbo", max_tokens=1024))

text_splitter = SpacyTextSplitter(pipeline="en_core_web_sm", chunk_size = 2048)
parser = SimpleNodeParser(text_splitter=text_splitter)
documents = SimpleDirectoryReader('./source_data').load_data()
nodes = parser.get_nodes_from_documents(documents)

service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor)

list_index = GPTListIndex(nodes=nodes, service_context=service_context)




<llama_index.indices.list.base.SummaryIndex object at 0x7fac33891a20>


In [39]:
index = list_index.as_query_engine(response_mode="tree_summarize")
response = index.query("Please summarize the news about the MacBook Air that was read in the previous steps.")
print(response)


Apple is planning to release new versions of the MacBook Air with M3 chips. These models are expected to have similar CPU and GPU core counts as the current M2 chip. The M3 chip is said to be up to 20% faster than the M2 chip. The release of the new MacBook Airs is scheduled for the first half of 2024, with a possible launch between spring and summer. The exact order of release for the 13-inch and 15-inch models is uncertain and may depend on supplier production capacity.


In [42]:
from llama_index import SimpleDirectoryReader, GPTVectorStoreIndex
from llama_index.readers.file.base import (
    DEFAULT_FILE_READER_CLS,
    ImageReader,
)
from llama_index.response.notebook_utils import display_response, display_image
from llama_index.indices.query.query_transform.base import ImageOutputQueryTransform

image_parser = ImageReader(keep_image=True, parse_text=True)
file_extractor = DEFAULT_FILE_READER_CLS
file_extractor.update(
{
    ".jpg": image_parser,
    ".png": image_parser,
    ".jpeg": image_parser,
})

# NOTE: we add filename as metadata for all documents
filename_fn = lambda filename: {'file_name': filename}

receipt_reader = SimpleDirectoryReader(
    input_dir='./source_data/receipts',
    file_extractor=file_extractor,
    file_metadata=filename_fn,
)
receipt_documents = receipt_reader.load_data()


Could not find image processor class in the image processor config or the model config. Loading based on pattern matching with the model's feature extractor configuration.


(…)netuned-cord-v2/resolve/main/config.json:   0%|          | 0.00/4.74k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/806M [00:00<?, ?B/s]

In [44]:
receipts_index = GPTVectorStoreIndex.from_documents(receipt_documents)
index = receipts_index.as_query_engine(query_transform=ImageOutputQueryTransform(width=400))
receipts_response = index.query(
    'When was the last time I went to McDonald\'s and how much did I spend. \
    Also show me the receipt from my visit.'
)

display_response(receipts_response)


**`Final Response:`** The last time you went to McDonald's was on March 10, 2018 at 07:39:12 PM. You spent a total of $26.15. Here is the receipt from your visit:

QTY ITEM              TOTAL
1   10 McNuggets EVM  $10.29
1   Barbeque Sauce    $1.00
1   Barbeque Sauce    $0.40
1   L Coke            $0.40
1   M French Fries    $3.99
1   HM GrChS S-Fry Yog
1   Smoonya
1   HM Apple Juice    $2.89
6   Cookies           $2.89
6   Choc Chip Cookie  $1.19
1   Baked Apple Pie   $3.29
1   French Fries      $2.99
1   Iced Tea          $2.99

Subtotal: $25.04
Tax: $1.11
Total: $26.15

In [50]:
output_image = image_parser.load_data('./source_data/receipts/1100-receipt.jpg')
print(output_image[0].text)


<s_menu><s_nm> Story</s_nm><s_num> 16725 Stony Platin Rd</s_nm><s_num> Store#:</s_nm><s_num> 3659</s_num><s_price> 700-418-8362</s_price><sep/><s_nm> Welcome to all day breakfast dormist O Md Donald's</s_nm><s_num> 192</s_num><s_price> 192</s_price><sep/><s_nm> QTY ITEM</s_nm><s_num> OTAL</s_num><s_unitprice> 03/10/2018</s_unitprice><s_cnt> 1</s_cnt><s_price> 07:39:12 PM</s_price><sep/><s_nm> Delivery</s_nm><s_cnt> 1</s_cnt><s_price> 0.00</s_price><sep/><s_nm> 10 McNuggets EVM</s_nm><s_cnt> 1</s_cnt><s_price> 10.29</s_price><sep/><s_nm> Barbeque Sauce</s_nm><s_cnt> 1</s_cnt><s_price> 1</s_price><sep/><s_nm> Barbeque Sauce</s_nm><s_num> 1</s_cnt><s_price> 0.40</s_price><sep/><s_nm> L Coke</s_nm><s_cnt> 1</s_cnt><s_price> 0.40</s_price><sep/><s_nm> M French Fries</s_nm><s_cnt> 1</s_cnt><s_price> 3.99</s_price><sep/><s_nm> HM GrChS S-Fry Yog</s_nm><s_cnt> 1</s_cnt><sep/><s_nm> Smoonya</s_nm><s_cnt> 1</s_cnt><sep/><s_nm> HM Apple Juice</s_nm><s_cnt> 1</s_cnt><s_price> 2.89</s_price><sep/><