In [1]:
! pip install -q torch transformers accelerate transformers sentence-transformers faiss-cpu


[notice] A new release of pip is available: 24.0 -> 24.2
[notice] To update, run: python.exe -m pip install --upgrade pip


In [4]:
! pip install -q langchain langchain-community jq


[notice] A new release of pip is available: 24.0 -> 24.2
[notice] To update, run: python.exe -m pip install --upgrade pip


In [3]:
from langchain_community.document_loaders import JSONLoader
import os


# loop through PDFs and load them with PyPDFLoader
file_path = "../jsons"
docs = []
loader = JSONLoader(
    file_path+"/2023.json", 
    jq_schema=".PatentBulkData", 
    text_content=False
)
# for file in os.listdir(file_path):
#     if file.endswith('.pdf'):
#         pdf_path = os.path.join(file_path, file)
#         loader = PyPDFLoader(pdf_path)
#         docs.extend(loader.load())

collection = loader.load()

# Convert JSON data to a list of Document objects
docs = [Document(text=json.dumps(item)) for item in data["Data"]]



KeyboardInterrupt: 

In [2]:
# inspect the content of a document
collection[2].page_content

'Paleo Lunch Options:  a) Grilled chicken salad: • 4 oz grilled chicken breast • 2 cups mixed greens • 1/4 cup sliced cucumbers • 1/4 cup cherry tomatoes • 1/4 avocado, diced • 2 tbsp pumpkin seeds • Dressing: 1 tbsp olive oil, 1 tsp lemon juice, herbs b) Tuna lettuce wraps: • 4 oz canned tuna (in water) • 1 tbsp paleo-friendly mayonnaise • 1 tbsp diced celery • 1 tbsp diced red onion • 1 tsp dijon mustard • Serve in 3-4 large lettuce leaves c) Beef and vegetable stir-fry: • 4 oz grass-fed beef strips • 1 cup mixed vegetables (broccoli, carrots, snap peas) • 1 tbsp coconut aminos • 1 tbsp coconut oil for cooking • 1 clove garlic, minced • Serve over cauliflower rice         '

In [3]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

# split documents into smaller chunks
splitter = RecursiveCharacterTextSplitter(chunk_size=800, chunk_overlap=0)

chunked_docs = splitter.split_documents(docs)

In [4]:
# inspect a chunk
chunked_docs[2]

Document(metadata={'source': '../pdfs/paleo_internal_plan.pdf', 'page': 1}, page_content='Paleo Breakfast Options:  a) Paleo breakfast bowl: • 2 eggs, fried or poached • 1/4 avocado, sliced • 1/2 cup sautéed mixed vegetables (spinach, mushrooms, bell peppers) • 2 slices of bacon (uncured, no added nitrates) • 1/4 cup berries on the side b) Sweet potato hash: • 1 small sweet potato, diced and roasted • 3 oz ground turkey or chicken, seasoned and cooked • 1/4 cup diced onions • 1/4 cup diced bell peppers • 1 tbsp coconut oil for cooking • Herbs: rosemary, thyme c) Paleo smoothie: • 1 cup unsweetened almond milk • 1/2 banana • 1 tbsp almond butter • 1 scoop paleo-friendly protein powder • 1 cup spinach • 1/2 cup mixed berries • Ice cubes')

In [5]:
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings

# initialize vectorstore with FAISS and HuggingFace embeddings
db = FAISS.from_documents(chunked_docs, HuggingFaceEmbeddings(model_name="BAAI/bge-base-en-v1.5"))

  warn_deprecated(
  from tqdm.autonotebook import tqdm, trange


In [11]:
# create a retriever using the vectorstore
retriever = db.as_retriever(search_type="similarity", search_kwargs={"k": 1})

In [12]:
# test the retriever
test = retriever.invoke('Paleo Breakfast')
print(len(test))

print(test)

1
[Document(metadata={'source': '../pdfs/paleo_internal_plan.pdf', 'page': 1}, page_content='Paleo Breakfast Options:  a) Paleo breakfast bowl: • 2 eggs, fried or poached • 1/4 avocado, sliced • 1/2 cup sautéed mixed vegetables (spinach, mushrooms, bell peppers) • 2 slices of bacon (uncured, no added nitrates) • 1/4 cup berries on the side b) Sweet potato hash: • 1 small sweet potato, diced and roasted • 3 oz ground turkey or chicken, seasoned and cooked • 1/4 cup diced onions • 1/4 cup diced bell peppers • 1 tbsp coconut oil for cooking • Herbs: rosemary, thyme c) Paleo smoothie: • 1 cup unsweetened almond milk • 1/2 banana • 1 tbsp almond butter • 1 scoop paleo-friendly protein powder • 1 cup spinach • 1/2 cup mixed berries • Ice cubes')]


In [13]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig

model_name ="TinyLlama/TinyLlama-1.1B-Chat-v1.0"

save_directory = "model_directory"

# directly load model since we saved it in the previous notebook
model = AutoModelForCausalLM.from_pretrained(save_directory)
tokenizer = AutoTokenizer.from_pretrained(save_directory)

In [14]:
from langchain.llms import HuggingFacePipeline
from langchain.prompts import PromptTemplate
from transformers import pipeline
from langchain_core.output_parsers import StrOutputParser

# create a text generation pipeline
text_generation_pipeline = pipeline(
    model=model,
    tokenizer=tokenizer,
    task="text-generation",
    temperature=0.2,
    do_sample=True,
    repetition_penalty=1.1,
    return_full_text=True,
    max_new_tokens=400,
)

llm = HuggingFacePipeline(pipeline=text_generation_pipeline)

prompt_template = """
<|system|>
Answer the question based on your knowledge. Use the following context to help:

{context}

</s>
<|user|>
{question}
</s>
<|assistant|>

 """

prompt = PromptTemplate(
    input_variables=["context", "question"],
    template=prompt_template,
)

llm_chain = prompt | llm | StrOutputParser()

Hardware accelerator e.g. GPU is available in the environment, but no `device` argument is passed to the `Pipeline` object. Model will be on CPU.
  warn_deprecated(


In [15]:
from langchain_core.runnables import RunnablePassthrough

# extend llm chain with RAG
rag_chain = {"context": retriever, "question": RunnablePassthrough()} | llm_chain

In [16]:
question = "What are some ketogenic breakfast options?"

In [17]:
# invoke LLM chain for baseline response
result = llm_chain.invoke({"context": "", "question": question})

result

"\n<|system|>\nAnswer the question based on your knowledge. Use the following context to help:\n\n\n\n</s>\n<|user|>\nWhat are some ketogenic breakfast options?\n</s>\n<|assistant|>\n\n 1. Greek yogurt with berries and granola: This is a great option for a protein-packed breakfast that's also low in carbs. Greek yogurt is high in protein, while berries provide fiber and antioxidants. Top it off with granola for added crunch and flavor.\n\n2. Smoothie: Blend together frozen berries, almond milk, spinach, and a scoop of protein powder or Greek yogurt for a nutrient-dense breakfast that's easy to make.\n\n3. Overnight oats: Mix rolled oats, almond milk, chia seeds, and your choice of toppings (such as nuts, dried fruit, and honey) in a jar and let it sit in the fridge overnight. In the morning, top with fresh fruit and enjoy a satisfying and filling breakfast.\n\n4. Avocado toast: Toast a slice of whole grain bread and spread mashed avocado on top. Add sliced tomatoes, red onions, and a s

In [18]:
# invoke RAG chain for response based on knowledge base
result = rag_chain.invoke(question)

result

"\n<|system|>\nAnswer the question based on your knowledge. Use the following context to help:\n\n[Document(metadata={'source': '../pdfs/ketogenic_internal_plan.pdf', 'page': 1}, page_content='Detailed Meal Plan Ketogenic Breakfast Options:  a) Keto breakfast bowl: • 2 scrambled eggs • 1/4 avocado, sliced • 2 strips of bacon • 1/4 cup sautéed spinach • 1 tbsp butter for cooking b) Cream cheese pancakes: • 2 oz cream cheese • 2 eggs • 1/4 tsp cinnamon • 1 tbsp butter for cooking • Top with 2 tbsp sugar-free syrup c) Keto smoothie: • 1 cup unsweetened almond milk • 1/2 avocado • 1 tbsp almond butter • 1 scoop low-carb protein powder • 1 cup spinach • Ice cubes')]\n\n</s>\n<|user|>\nWhat are some ketogenic breakfast options?\n</s>\n<|assistant|>\n\n 1. Keto breakfast bowl:\n    - 2 scrambled eggs\n    - 1/4 avocado, sliced\n    - 2 strips of bacon\n    - 1/4 cup sautéed spinach\n    - 1 tablespoon butter for cooking\n\n2. Cream cheese pancakes:\n    - 2 oz cream cheese\n    - 2 eggs\n    

In [19]:
from pprint import pprint

pprint(result)

('\n'
 '<|system|>\n'
 'Answer the question based on your knowledge. Use the following context to '
 'help:\n'
 '\n'
 "[Document(metadata={'source': '../pdfs/ketogenic_internal_plan.pdf', 'page': "
 "1}, page_content='Detailed Meal Plan Ketogenic Breakfast Options:  a) Keto "
 'breakfast bowl: • 2 scrambled eggs • 1/4 avocado, sliced • 2 strips of bacon '
 '• 1/4 cup sautéed spinach • 1 tbsp butter for cooking b) Cream cheese '
 'pancakes: • 2 oz cream cheese • 2 eggs • 1/4 tsp cinnamon • 1 tbsp butter '
 'for cooking • Top with 2 tbsp sugar-free syrup c) Keto smoothie: • 1 cup '
 'unsweetened almond milk • 1/2 avocado • 1 tbsp almond butter • 1 scoop '
 "low-carb protein powder • 1 cup spinach • Ice cubes')]\n"
 '\n'
 '</s>\n'
 '<|user|>\n'
 'What are some ketogenic breakfast options?\n'
 '</s>\n'
 '<|assistant|>\n'
 '\n'
 ' 1. Keto breakfast bowl:\n'
 '    - 2 scrambled eggs\n'
 '    - 1/4 avocado, sliced\n'
 '    - 2 strips of bacon\n'
 '    - 1/4 cup sautéed spinach\n'
 '    - 1 ta