# Setup

In [None]:
# Import the userdata module from Google Colab
from google.colab import userdata
# Retrieve the API key stored under 'genai_course' from Colab's userdata
api_key = userdata.get('genai_course')

In [None]:
# Mount the drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# Change directory to this folder
%cd /content/drive/MyDrive/GenAI/RAG/RAG with OpenAI

# Perform OCR and transform to images

In [1]:
# Install the pdf2image library for converting PDF files to images
!pip install pdf2image
# Install the poppler-utils package, required by pdf2image to work with PDF files
!apt-get install -y poppler-utils

Collecting pdf2image
  Downloading pdf2image-1.17.0-py3-none-any.whl.metadata (6.2 kB)
Downloading pdf2image-1.17.0-py3-none-any.whl (11 kB)
Installing collected packages: pdf2image
Successfully installed pdf2image-1.17.0
Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
The following NEW packages will be installed:
  poppler-utils
0 upgraded, 1 newly installed, 0 to remove and 49 not upgraded.
Need to get 186 kB of archives.
After this operation, 696 kB of additional disk space will be used.
Get:1 http://archive.ubuntu.com/ubuntu jammy-updates/main amd64 poppler-utils amd64 22.02.0-2ubuntu0.5 [186 kB]
Fetched 186 kB in 1s (319 kB/s)
Selecting previously unselected package poppler-utils.
(Reading database ... 123633 files and directories currently installed.)
Preparing to unpack .../poppler-utils_22.02.0-2ubuntu0.5_amd64.deb ...
Unpacking poppler-utils (22.02.0-2ubuntu0.5) ...
Setting up poppler-utils (22.02.0-2ubuntu0.5) ...
Processing tr

In [2]:
# Import the libraries
from pdf2image import convert_from_path
import os

In [None]:
# Function to converts pdfs into images and stores the image paths
def pdf_to_images(pdf_path, output_folder):
  # Create the output folder if it doesn't exist
  if not os.path.exists(output_folder):
    os.makedirs(output_folder)

  # Convert PDF into images
  images = convert_from_path(pdf_path) # Convert each page of the PDF to an image
  image_paths = []

  # Save images and store their paths
  for i, image in enumerate(images):
    image_path = os.path.join(output_folder, f"page{i+1}.jpg") # Generate the image file path
    image.save(image_path, "JPEG") # Save the image as a JPEG file
    image_paths.append(image_path) # Append the image path to the list

  return image_paths # Return the list of image paths

In [None]:
# Define the path to the PDF and the output folder for images
pdf_path = "Things mother used to make.pdf"
output_folder = "images"

# Convert the PDF into images and store the image paths
image_paths = pdf_to_images(pdf_path, output_folder)

In [3]:
# Install the openAI library
!pip install openai



In [4]:
# Import the libraries
from openai import OpenAI
import base64

In [None]:
# Set up connection to OpenAI API
client = OpenAI(
    api_key=api_key, # Use the provided API key for authentication
)
# Specify the model to be used
model = "gpt-4o-mini"

In [None]:
# Read and encode one image
image_path = "images/page23.jpg" # Path to the image to be encoded

# Encode the image in base64 and decode to string
with open(image_path, "rb") as image_file:
  image_data = base64.b64encode(image_file.read()).decode('utf-8')
image_data

'/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAgGBgcGBQgHBwcJCQgKDBQNDAsLDBkSEw8UHRofHh0aHBwgJC4nICIsIxwcKDcpLDAxNDQ0Hyc5PTgyPC4zNDL/2wBDAQkJCQwLDBgNDRgyIRwhMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjL/wAARCAU2A0IDASIAAhEBAxEB/8QAHwAAAQUBAQEBAQEAAAAAAAAAAAECAwQFBgcICQoL/8QAtRAAAgEDAwIEAwUFBAQAAAF9AQIDAAQRBRIhMUEGE1FhByJxFDKBkaEII0KxwRVS0fAkM2JyggkKFhcYGRolJicoKSo0NTY3ODk6Q0RFRkdISUpTVFVWV1hZWmNkZWZnaGlqc3R1dnd4eXqDhIWGh4iJipKTlJWWl5iZmqKjpKWmp6ipqrKztLW2t7i5usLDxMXGx8jJytLT1NXW19jZ2uHi4+Tl5ufo6erx8vP09fb3+Pn6/8QAHwEAAwEBAQEBAQEBAQAAAAAAAAECAwQFBgcICQoL/8QAtREAAgECBAQDBAcFBAQAAQJ3AAECAxEEBSExBhJBUQdhcRMiMoEIFEKRobHBCSMzUvAVYnLRChYkNOEl8RcYGRomJygpKjU2Nzg5OkNERUZHSElKU1RVVldYWVpjZGVmZ2hpanN0dXZ3eHl6goOEhYaHiImKkpOUlZaXmJmaoqOkpaanqKmqsrO0tba3uLm6wsPExcbHyMnK0tPU1dbX2Nna4uPk5ebn6Onq8vP09fb3+Pn6/9oADAMBAAIRAxEAPwD3+iiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAooooAKKKKACiiigAo

In [None]:
# Define the system prompt
system_prompt = """
Please analyze the content of this image and extract any related recipe information.
"""

In [None]:
# Call the OpenAI API use the chat completion method
response = client.chat.completions.create(
    model = model,
    messages = [
        # Provide the system prompt
        {"role": "system", "content": system_prompt},

        # The user message contains both the text and image URL / path
        {"role": "user", "content": [
            "This is the imsage from the recipe page.",
            {"type": "image_url",
             "image_url": {"url": f"data:image/jpeg;base64,{image_data}",
                           "detail": "low"}}
        ]}
    ]
)

In [None]:
# Retrieve the content
gpt_response = response.choices[0].message.content

In [5]:
from IPython.display import Markdown, display

# Display the GPT response as Markdown
display(Markdown(gpt_response))

In [None]:
# Define a function to get the GPT response and display it in Markdown
def get_gpt_response():
  gpt_response = response.choices[0].message.content # Extract the response content from the API response
  return display(Markdown(gpt_response)) # Display the response as Markdown

# Call the function to display the GPT response
get_gpt_response()

Here are the recipes extracted from the image:

### Bannocks
**Ingredients:**
- 1 Cupful of Thick Sour Milk
- ½ Cupful of Sugar
- 2 Cupfuls of Flour
- ½ Cupful of Indian Meal
- 1 Teaspoonful of Soda
- A pinch of Salt

**Instructions:**
1. Make the mixture stiff enough to drop from a spoon.
2. Drop mixture, size of a walnut, into boiling fat.
3. Serve warm with maple syrup.

---

### Boston Brown Bread
**Ingredients:**
- 1 Cupful of Rye Meal
- 1 Cupful of Sour Milk
- 1 Cupful of Graham Meal
- 1 Cupful of Molasses
- 1 Cupful of Flour
- ½ Teaspoonful of Indian Meal
- 1 Cupful of Sweet Milk
- 1 Heaping Teaspoonful of Soda

**Instructions:**
1. Stir the meals and salt together.
2. Beat the soda into the molasses until it foams; add sour milk, mix well, and pour into a tin pan which has been well greased.
3. If you have no brown-bread steamer, bake in the oven.

Feel free to let me know if you need any more help!

In [None]:
# Define improved system prompt
system_prompt2 = """
Please analyze the content of this image and extract any related recipe information into structure components.
Specifically, extra the recipe title, list of ingredients, step by step instructions, cuisine type, dish type, any relevant tags or metadata.
The output must be formatted in a way suited for embedding in a Retrieval Augmented Generation (RAG) system.
"""

In [None]:
# Call the api to extract the information
response = client.chat.completions.create(
    model = model,
    messages = [
        # Provide the system prompt
        {"role": "system", "content": system_prompt2},

         # The user message contains both the text and image URL / path
        {"role": "user", "content": [
            "This is the image from the recipe page",
            {"type": "image_url",
             "image_url": {"url": f"data:image/jpeg;base64,{image_data}",
                           "detail": "low"}}
        ]}
    ],
    temperature = 0, # Set the temperature to 0 for deterministic output
)

In [None]:
# Print the info from the page with the improved prompt
get_gpt_response()

Here’s the structured information extracted from the recipe image:

### Recipe Title
Breads

### Ingredients
#### Bannocks
- 1 Cupful of Thick Sour Milk
- ½ Cupful of Sugar
- 2 Cupfuls of Flour
- ½ Cupful of Indian Meal
- 1 Teaspoonful of Soda
- A pinch of Salt

#### Boston Brown Bread
- 1 Cupful of Rye Meal
- 1 Cupful of Graham Meal
- 1 Cupful of Molasses
- 1 Cupful of Flour
- 1 Cupful of Sweet Milk
- 1 Cupful of Sour Milk
- ½ Teaspoonful of Salt
- 1 Teaspoonful of Soda
- 1 Heaping Teaspoonful of Baking Powder

### Step-by-Step Instructions
#### Bannocks
1. Make the mixture stiff enough to drop from a spoon.
2. Drop mixture, size of a walnut, into boiling fat.
3. Serve warm, with maple syrup.

#### Boston Brown Bread
1. Stir the meals and salt together.
2. Beat the soda into the molasses until it foams; add sour milk, mix well, and pour into a tin pan which has been well greased.
3. If you have no brown-bread steamer, use a regular oven.

### Cuisine Type
Traditional American

### Dish Type
Breads

### Relevant Tags/Metadata
- Quick Bread
- Breakfast
- Comfort Food
- Homemade

This format is suitable for embedding in a Retrieval Augmented Generation (RAG) system.

In [None]:
# Extract information about all of the images/recipes
extracted_recipes = []

for image_path in image_paths:
  print(f"Processing image {image_path}")

  # Reading and decoding the image
  with open(image_path, "rb") as image_file:
    image_data = base64.b64encode(image_file.read()).decode("utf-8") # Encode the image to base64 format

  # Call the API to extract the information
  response = client.chat.completions.create(
      model = model,
      messages = [
          # Provide system prompt for guidance
          {"role": "system", "content": system_prompt2},

          # The user message contains both the text and image URL / path
          {"role": "user", "content": [
              "This is the image from the recipe page", # Context for the image
              {"type": "image_url",
              "image_url": {"url": f"data:image/jpeg;base64,{image_data}", # Provide the base64 image
                            "detail": "low"}}
          ]}
      ],
      temperature = 0, # Set the temperature to 0 for deterministic output
  )

  # Extract the content and store it
  gpt_response = response.choices[0].message.content # Get the response content
  extracted_recipes.append({"image_path": image_path, "recipe_info": gpt_response}) # Store the path and extracted info
  print(f"Extracted information for {image_path}:\n{gpt_response}\n") # Print the extracted information for review

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
### Cuisine Type
American

### Dish Type
Dessert

### Relevant Tags
- Baking
- Fruit Dessert
- Traditional

---

### Recipe Title
Quick Graham Bread

### Ingredients
- 1 Pint of Graham Meal
- 1 Cup of Soda
- ½ Cup of Molasses
- 1 Cup of Sour Milk
- ½ Teaspoon of Salt

### Instructions
1. Stir the molasses, add sour milk and salt.
2. Mix well and sweeten with soda.
3. Bake thirty minutes or according to the heat of the oven. A moderate oven is best.

### Cuisine Type
American

### Dish Type
Bread

### Relevant Tags
- Baking
- Quick Bread
- Traditional

This structured format is suitable for embedding in a Retrieval Augmented Generation (RAG) system.

Processing image images/page28.jpg
Extracted information for images/page28.jpg:
Here’s the structured information extracted from the recipe image:

### Recipe Title
Graham Bread (raised over night)

### Ingredients
- 3 Cupfuls of Graham Flour
- 1 Tablespoonful of Lard
- 3 Cupf

In [None]:
# Filter out non-recipe content based on key recipe-related terms
filtered_recipes = []

for recipe in extracted_recipes:
  # Check if the extracted content contains any key recipe-related terms
  if any(keyword in recipe["recipe_info"].lower() for keyword in ["ingredients",
                                                                  "instructions",
                                                                  "recipe title"]):
     # If it does, add it to the filtered list
    filtered_recipes.append(recipe)

  # Print a message for non-recipe content
  else:
    print(f"Skipping recipe: {recipe['image_path']}")

Skipping recipe: images/page1.jpg
Skipping recipe: images/page2.jpg
Skipping recipe: images/page3.jpg
Skipping recipe: images/page4.jpg
Skipping recipe: images/page5.jpg
Skipping recipe: images/page6.jpg
Skipping recipe: images/page8.jpg
Skipping recipe: images/page10.jpg
Skipping recipe: images/page11.jpg
Skipping recipe: images/page12.jpg
Skipping recipe: images/page20.jpg
Skipping recipe: images/page21.jpg
Skipping recipe: images/page22.jpg
Skipping recipe: images/page106.jpg
Skipping recipe: images/page107.jpg
Skipping recipe: images/page108.jpg
Skipping recipe: images/page112.jpg
Skipping recipe: images/page126.jpg
Skipping recipe: images/page133.jpg
Skipping recipe: images/page134.jpg
Skipping recipe: images/page135.jpg
Skipping recipe: images/page136.jpg


In [6]:
# import json library
import json

In [None]:
# Define the output file path
output_file = "recipe_info.json"

# Write the filtered list to a json file
with open(output_file, "w") as json_file:
  json.dump(filtered_recipes, json_file, indent = 4)

# Embeddings

In [7]:
# import libraries
import numpy as np

In [None]:
# Load the filtered recipes
with open("recipe_info.json", "r") as json_file:
  filtered_recipes = json.load(json_file)

another options would be to organize per recipe, but it should be done in the preprocessing

In [None]:
# Generate embeddings for each recipe
recipe_texts = [recipe["recipe_info"] for recipe in filtered_recipes] # Extract the text content of each recipe

# Call the API to generate embeddings for the recipe texts
embedding_response = client.embeddings.create(
    input = recipe_texts, # Provide the list of recipe texts as input
    model = "text-embedding-3-large" # Specify the embedding model to use
)

In [None]:
# Extract the embeddings
embeddings = [data.embedding for data in embedding_response.data]
embeddings

[[-0.018192430958151817,
  -0.03411807492375374,
  -0.0201831366866827,
  -0.015010208822786808,
  0.026213375851511955,
  -0.035687390714883804,
  -0.016187194734811783,
  0.0008405099506489933,
  -0.015751274302601814,
  0.023336296901106834,
  0.030049478635191917,
  -0.03905851021409035,
  -0.007512369658797979,
  0.013651588931679726,
  0.03034009411931038,
  -0.03728576749563217,
  -0.013956733047962189,
  0.031240995973348618,
  -0.010883491486310959,
  -0.054228559136390686,
  0.016419686377048492,
  -0.0006638711784034967,
  0.04240057244896889,
  0.008747478947043419,
  -0.015068331733345985,
  -0.0026500357780605555,
  -0.037605442106723785,
  0.006851223297417164,
  0.017131689935922623,
  0.01920958049595356,
  0.010542020201683044,
  0.023103807121515274,
  0.017451366409659386,
  -0.024309854954481125,
  -0.02778269164264202,
  -0.028392979875206947,
  0.008355149999260902,
  0.04292367771267891,
  0.03504803776741028,
  0.026271497830748558,
  0.026431335136294365,
  -0

In [None]:
# Convert the embeddings to numpy array
embedding_matrix = np.array(embeddings)
embedding_matrix

array([[-0.01819243, -0.03411807, -0.02018314, ..., -0.00173733,
        -0.02522529,  0.00684396],
       [-0.01819243, -0.03411807, -0.02018314, ..., -0.00173733,
        -0.02522529,  0.00684396],
       [-0.00356826, -0.03058816, -0.01480166, ..., -0.00345601,
        -0.01368646,  0.02147833],
       ...,
       [-0.01836957, -0.03246572, -0.01109092, ...,  0.00375077,
        -0.00479223,  0.00559542],
       [-0.00718078, -0.02741507, -0.01103076, ...,  0.00263969,
         0.00469953, -0.00361736],
       [-0.0362394 , -0.03605177, -0.01267173, ..., -0.00439255,
        -0.00796757,  0.00993099]])

In [None]:
# Verify the embedding matrix
print(f"Generated embeddings for {len(filtered_recipes)} recipes.")
print(f"Each embedding is of size {len(embeddings[0])}")

Generated embeddings for 114 recipes.
Each embedding is of size 3072


Each time we retrieve information, we may get different results

# Retrieval System

In [8]:
# Install the faiss-cpu library
!pip install faiss-cpu

Collecting faiss-cpu
  Downloading faiss_cpu-1.9.0.post1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.4 kB)
Downloading faiss_cpu-1.9.0.post1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (27.5 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m27.5/27.5 MB[0m [31m19.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: faiss-cpu
Successfully installed faiss-cpu-1.9.0.post1


In [9]:
# Import the faiss library
import faiss

In [None]:
# Print the embedding matrix shape
print(f"Embedding matrix shape: {embedding_matrix.shape}")

Embedding matrix shape: (114, 3072)


In [None]:
# Initialize the FAISS index for similarity search
index = faiss.IndexFlatL2(embedding_matrix.shape[1]) # Create a FAISS index with L2 distance metric
index.add(embedding_matrix) # Add the embeddings to the index

In [None]:
# Save the FAISS index to a file
faiss.write_index(index, "filtered_recipe_index.index")

In [None]:
# Save the metadata for each recipe
metadata = [{'recipe_info': recipe['recipe_info'], # Include recipe information
             'image_path': recipe['image_path']} for recipe in filtered_recipes] # Include image path

# Write metadata to a JSON file with indentation
with open("recipe_metadata.json", "w") as json_file:
  json.dump(metadata, json_file, indent = 4)

In [None]:
# Generate the embeddings for the query
query = "How to make bread?"
k = 5 # Number of top results to retrieve
query_embedding = client.embeddings.create(
    input = [query],
    model = "text-embedding-3-large"
).data[0].embedding
print(f"The query embedding is {query_embedding}\n")
query_vector = np.array(query_embedding).reshape(1, -1)  # Convert embedding to a 2D numpy array for FAISS
print(f"The query vector is {query_vector}\n")

# Search the FAISS index for the nearest neighbors
distances, indices = index.search(query_vector, min(k, len(metadata))) # Perform the search
print(f"The distances are {distances}\n")
print(f"The indices are {indices}\n")

# Store the indices and distances
stored_indices = indices[0].tolist()
stored_distances = distances[0].tolist()
print(f"The stored indices are {stored_indices}\n")
print(f"The stored distances are {stored_distances}\n")

# Print the metadata content for the top results
print("The metadata content is")
for i, dist in zip(stored_indices, stored_distances):
  if 0 <=i < len(metadata):
    print(f"Distance: {dist}, Metadata: {metadata[i]['recipe_info']}")

# Return the results
results = [(metadata[i]['recipe_info'], dist) for i, dist in zip(stored_indices, stored_distances) if 0 <= i < len(metadata)]
results # Output the results as a list of tuples containing recipe info and distance

The query embedding is [-0.019708624109625816, -0.028040051460266113, -0.022090725600719452, 0.016627300530672073, -0.04790274053812027, -0.048874542117118835, 0.03797139599919319, 0.01790723390877247, 0.0015688088024035096, 0.004139048047363758, -0.004817531909793615, -0.013901513069868088, -0.012917859479784966, -0.0015480691799893975, 0.03806620463728905, -0.02385655976831913, 0.016994688659906387, -0.016651002690196037, -0.007258888799697161, 0.002297660568729043, -0.03185615316033363, 0.02015897072851658, 0.014991827309131622, -0.007780343759804964, 0.006287086755037308, 0.017836127430200577, 0.003407233627513051, -0.011205354705452919, -0.03963056951761246, 0.042308952659368515, -0.0036798121873289347, 0.023690642789006233, -0.03465304523706436, -0.021154476329684258, -0.01591622456908226, 0.011329792439937592, -0.01473109982907772, 0.005253065377473831, 0.008515121415257454, 0.019412342458963394, -0.012408255599439144, 0.007792194839566946, 0.00038775798748247325, -0.00825439393

[('Here’s the structured information extracted from the recipe image:\n\n### Recipe Title:\nNut Bread and Oatmeal Bread\n\n### Ingredients:\n#### Nut Bread:\n- 2½ Cups of Flour\n- 3 Teaspoons of Baking Powder\n- ¾ Cup of Milk\n- ½ Cup of Sugar\n- 1 Cup of Nuts, chopped\n\n#### Oatmeal Bread:\n- 2¾ Cups of Rolled Oats\n- 1½ Cups of Molasses\n- 1 Yeast Cake\n- Water\n\n### Instructions:\n#### Nut Bread:\n1. Mix flour, baking powder, and sugar together.\n2. Add chopped nuts.\n3. Stir in milk until well combined.\n4. Pour into a greased loaf pan.\n5. Bake for one hour.\n\n#### Oatmeal Bread:\n1. Boil oats and water until cool.\n2. Add molasses and yeast cake; stir until mixed.\n3. Let the mixture sit until it rises.\n4. Shape into a loaf and let rise again.\n5. Bake for one hour.\n\n### Cuisine Type:\nAmerican\n\n### Dish Type:\nBread\n\n### Tags/Metadata:\n- Baking\n- Quick Bread\n- Homemade Bread\n- Traditional Recipes\n\nThis format is suitable for embedding in a Retrieval Augmented Gen

In [None]:
# Define a function to query the embeddings
def query_embeddings(query, index, metadata, k = 5):
  # Generate the embeddings for the query
  query_embedding = client.embeddings.create(
      input = [query],
      model = "text-embedding-3-large"
  ).data[0].embedding
  print(f"The query embedding is {query_embedding}\n")
  query_vector = np.array(query_embedding).reshape(1, -1)
  print(f"The query vector is {query_vector}\n")

  # Search faiss index
  distances, indices = index.search(query_vector, min(k, len(metadata)))
  # print(f"The distances are {distances}\n")
  # print(f"The indices are {indices}\n")

  # Store the indices and distances
  stored_indices = indices[0].tolist()
  stored_distances = distances[0].tolist()
  print(f"The stored indices are {stored_indices}\n")
  print(f"The stored distances are {stored_distances}\n")

  # # Print the metadata content
  # print("The metadata content is")
  # for i, dist in zip(stored_indices, stored_distances):
  #   if 0 <=i < len(metadata):
  #     print(f"Distance: {dist}, Metadata: {metadata[i]['recipe_info']}")

  # Return the results
  results = [(
      metadata[i]['recipe_info'], dist) for i, dist in zip(
          stored_indices, stored_distances) if 0 <= i < len(metadata)]
  return results


In [None]:
# Test the retrieval system
query = "chocolate query"
results = query_embeddings(query, index, metadata)
print(f"The results are {results}")

The query embedding is [-0.014632204547524452, -0.005892922170460224, -0.016869252547621727, -0.006876526866108179, -0.03285064920783043, 0.00999272521585226, 0.023484645411372185, 0.051948241889476776, -0.015319856815040112, 0.0464470200240612, 0.005814582109451294, -0.018105285242199898, -0.04975471645593643, 0.007598997093737125, -0.025800032541155815, 0.014623500406742096, -0.013761757872998714, -0.015163176693022251, -0.020403264090418816, -0.030204491689801216, 0.02104739472270012, -0.01762654073536396, -0.005466403439640999, 0.02754092402756214, 0.023345373570919037, 0.049232449382543564, -0.01134191732853651, -0.00453067384660244, -0.03861300274729729, 0.012421270832419395, 0.012673700228333473, 0.018714597448706627, 0.0279065128415823, -0.021569661796092987, -0.020246583968400955, 0.009722886607050896, 0.0176091305911541, -0.03878709301352501, 0.015885647386312485, -0.009322481229901314, 0.02214415743947029, -0.0359320268034935, -0.02160448022186756, -0.0011413728352636099, 0.

In [None]:
# Combine the results into a single string
def combined_retrived_content(results):
  combined_content = "\n\n".join([result[0] for result in results]) # Join the recipe information with double newlines
  return combined_content

# Get the combined content from results
combined_content = combined_retrived_content(results)
print(f"The combined content is {combined_content}")

The combined content is Based on the content of the image, here is the structured information extracted:

### Recipe Information

- **Title**: Chocolate Sauce
- **Cuisine Type**: Not specified
- **Dish Type**: Sauce
- **Ingredients**: Not listed in the image
- **Instructions**: Not listed in the image
- **Relevant Tags/Metadata**: Sauces, Desserts

### Additional Recipes Listed
1. **COLD SAUCE**
2. **CRANBERRY SAUCE**
3. **CREAM MUSTARD**
4. **EGG SAUCE FOR CHOCOLATE PUDDING**
5. **PUDDING SAUCE**
6. **SAUCE FOR GRAHAM PUDDING**

### Sections
- **Soups**
  - Bean Porridge
  - Connecticut Clam Chowder
  - Massachusetts Clam Chowder
  - New England Fish Chowder
  - Lamb Broth
  - A Good Oyster Stew
  - Potato Soup

- **Vegetables**
  - Green Corn Fritters
  - Delicious Stuffed Baked Potatoes
  - Creamed Potatoes
  - Scalloped Potatoes
  - Baked Tomatoes
  - Fried Tomatoes

This format is suitable for embedding in a Retrieval Augmented Generation (RAG) system.

Here’s the structured infor

# Generative System

In [None]:
# Define the system prompt
system_prompt3 = f"""
You are highly experienced and expert chef specialized in providing cooking advice.
Your main task is to provide information precise and accurate on the combined content.
You answer diretly to the query using only information from the provided {combined_content}.
If you don't know the answer, just say that you don't know.
Your goal is to help the user and answer the {query}
"""

In [None]:
# Define function to retrieve a response from the API
def generate_response(query, combined_content, system_prompt):
  response = client.chat.completions.create(
      model = model,
      messages = [
          {"role": "system", "content": system_prompt3}, # Provide system prompt for guidance
          {"role": "user", "content": query}, # Provide the query as user input
          {"role": "assistant", "content": combined_content} # Provide the combined content from the results
      ],
      temperature = 0, # Set temperature to 0 for deterministic output
  )
  return response

In [None]:
# Get the results from the API
query = "How to make bread?"
combined_content = combined_retrived_content(results)
response = generate_response(query, combined_content, system_prompt3)

In [None]:
# Display the outcome
get_gpt_response()

I'm sorry, but the provided content does not include a recipe for making bread. If you have a specific bread recipe in mind or need guidance on a particular type of bread, please let me know!

In [None]:
# Get the results
query = "Get me the best chocolate cake recipe"
combined_content = combined_retrived_content(results)
response = generate_response(query, combined_content, system_prompt3)

In [None]:
# Display the outcome
get_gpt_response()

I'm sorry, but I don't have a specific chocolate cake recipe available. However, I can provide you with a chocolate sauce recipe if you're interested in making a sauce to accompany a cake. Would you like that?

# Rag system

In [None]:
# Build the function for Retrieval-Augmented Generation (RAG)
def rag_system(query, index, metadata, system_prompt, k = 5):
  # Retrieval System: Retrieve relevant results based on the query
  results = query_embeddings(query, index, metadata, k)

  # Content Merge: Combine the retrieved content into a single string
  combined_content = combined_retrived_content(results)

  # Generation: Generate a response based on the query and combined content
  response = generate_response(query, combined_content, system_prompt)

  # Return the generated response
  return response

In [None]:
# Test the rag system
query1 = "How to make the best chocolate cake?"
response = rag_system(query1, index, metadata, system_prompt3)
get_gpt_response()

The query embedding is [-0.0025930912233889103, -0.03965035080909729, -0.013300711289048195, 0.0075972783379256725, -0.019310662522912025, 0.022375846281647682, 0.00811179168522358, -0.005364072974771261, -0.006425940431654453, 0.03741714358329773, -0.0004070948052685708, 0.007121080067008734, -0.051801618188619614, -0.007602752186357975, 0.0003212285810150206, -0.0403071753680706, 0.02113882638514042, -0.004690826870501041, -0.004521146882325411, -0.015851382166147232, 0.014964668080210686, -0.024806099012494087, -0.011735277250409126, 0.03402354568243027, 0.0030405535362660885, 0.006721511483192444, 0.022551000118255615, -0.0006903507164679468, -0.03689168393611908, 0.056793488562107086, 0.04457654058933258, 0.0018993195844814181, -0.01622358337044716, -0.014745726250112057, -0.029163040220737457, 0.01690230332314968, -0.004753772635012865, -0.0019444763893261552, 0.03478984162211418, 0.01440636720508337, -0.005741747096180916, 4.0773622458800673e-05, 0.0025917228776961565, 0.0107719

To make the best chocolate cake, you can follow this classic recipe:

### Recipe Title: Chocolate Cake

### Ingredients:
- 1 ¾ cups all-purpose flour
- 1 ¾ cups granulated sugar
- ¾ cup unsweetened cocoa powder
- 1 ½ teaspoons baking powder
- 1 ½ teaspoons baking soda
- 1 teaspoon salt
- 2 large eggs
- 1 cup whole milk
- ½ cup vegetable oil
- 2 teaspoons vanilla extract
- 1 cup boiling water

### Instructions:
1. **Preheat the Oven**: Preheat your oven to 350°F (175°C). Grease and flour two 9-inch round cake pans.
   
2. **Mix Dry Ingredients**: In a large mixing bowl, combine the flour, sugar, cocoa powder, baking powder, baking soda, and salt. Whisk together until well combined.

3. **Add Wet Ingredients**: Add the eggs, milk, vegetable oil, and vanilla extract to the dry ingredients. Beat on medium speed for about 2 minutes until well blended.

4. **Incorporate Boiling Water**: Carefully stir in the boiling water (the batter will be thin). Mix until smooth.

5. **Bake**: Pour the batter evenly into the prepared cake pans. Bake for 30 to 35 minutes, or until a toothpick inserted in the center comes out clean.

6. **Cool**: Allow the cakes to cool in the pans for 10 minutes, then remove from pans and transfer to wire racks to cool completely.

7. **Frosting**: Once cooled, frost with your favorite chocolate frosting.

### Cuisine Type:
American

### Dish Type:
Dessert

### Relevant Tags/Metadata:
- Chocolate Cake
- Baking
- Dessert
- Sweet Treat

Enjoy your delicious homemade chocolate cake! If you need any variations or additional tips, feel free to ask!

In [None]:
# Test with a different query
query2 = "I want something vegan"
response = rag_system(query2, index, metadata, system_prompt3)
get_gpt_response()

The query embedding is [-0.034529902040958405, -0.026633311063051224, -0.017109284177422523, 0.03441669046878815, -0.01732155680656433, -0.008498035371303558, 3.413520607864484e-05, -0.01890653744339943, -0.033737413585186005, 0.02354826219379902, 0.003265481675043702, -0.012984092347323895, 0.0179583802819252, -0.0071642473340034485, 0.036850765347480774, -0.008830597624182701, 0.0097221489995718, -0.011399113573133945, 0.007479120511561632, -0.0183546245098114, -0.023519957438111305, 0.006028581410646439, 0.0375017374753952, -0.006902442779392004, 0.007719697430729866, 0.00817254837602377, -0.022090647369623184, -0.012241133488714695, 0.002090898808091879, 0.018467837944626808, 0.010189151391386986, 0.024496419355273247, -0.005041507072746754, 0.055304449051618576, -0.013684596866369247, -0.003852772992104292, 0.03874707967042923, -0.0019228485180065036, 0.01562336552888155, 0.026958797127008438, -0.047747496515512466, 0.022005736827850342, -0.02418508380651474, 0.001378896413370967,

It seems that the recipes provided do not include any vegan options. However, if you're looking for vegan alternatives or suggestions, I can help you create a vegan recipe or modify an existing one. Please let me know what type of dish you're interested in, and I'll be happy to assist!