# Retrieval

In [1]:
from langchain_openai import OpenAIEmbeddings
from dotenv import load_dotenv

load_dotenv()
embedder = OpenAIEmbeddings(model='text-embedding-ada-002')

In [2]:
from langchain_pinecone import PineconeVectorStore
import os 

index_name = os.getenv("PINECONE_INDEX_NAME")

docsearch = PineconeVectorStore.from_existing_index(index_name=index_name, embedding=embedder)

# docsearch.as_retriever()

In [4]:
docs = docsearch.max_marginal_relevance_search('correction tape', fetch_k=10, k=6)
for doc in docs:
    print(doc, end='\n\n')

page_content='{"material": "Plastic", "item": "Video tape", "similar_items": ["Video tape"], "recyclable": false, "instructions": "should be disposed of as general waste."}' metadata={'seq_num': 82.0, 'source': '/home/mightymagnus/projects/blooapp-api/app/data_cleaned.json'}

page_content='{"material": "Others", "item": "Rechargeable battery", "similar_items": ["Rechargeable battery"], "recyclable": true, "instructions": "can be recycled at e-waste collection points, located <a href=\'https://www.nea.gov.sg/our-services/waste-management/3r-programmes-and-resources/e-waste-management/where-to-recycle-e-waste\' target=\'_blank\' style=\'color:black; font-weight:600; text-decoration: underline; font-style: italic;\'>here</a>. <br/><br/> Remember to tape the ends of the battery and seal leaking batteries in a leak-proof container or bag first to prevent short-circuit or fire incidents."}' metadata={'seq_num': 261.0, 'source': '/home/mightymagnus/projects/blooapp-api/app/data_cleaned.json'}

In [25]:
from langchain_openai import ChatOpenAI

vision_model = ChatOpenAI(model="gpt-4o", temperature=0)


In [2]:
# base64 encode an image
import base64

def encode_image(image_path):
    """Getting the base64 string"""
    with open(image_path, "rb") as image_file:
        return base64.b64encode(image_file.read()).decode("utf-8")


img_base64 = encode_image(
    "../images/test/Paper towel tube (2).jpeg"
)
# Write the base64 string to a text file
with open("encoded_image.txt", "w") as file:
    file.write(img_base64)


In [28]:
import os
filenames = [f for f in os.listdir("../images/test") if f.endswith(".jpg") or f.endswith(".jpeg")]
len(filenames)


41

In [29]:
# read all files in a folder as base64
base64_images = [encode_image(f"../images/test/{filename}") for filename in filenames]
len(base64_images)


41

In [13]:
import csv
responses = [{"items": ['item3', 'item4']}, {"items": 'item5'}]

with open("responses.csv", "w") as file:
    writer = csv.writer(file)
    writer.writerow(["filename", "response"])
    for filename, response in zip(filenames, responses):
        writer.writerow([filename, response])



In [34]:
with open('results.csv', 'w', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(['Filename', 'Result'])

        for filename, result in zip(filenames, res):
            writer.writerow([filename, result.items])



In [None]:
from langchain_core.prompts import ChatPromptTemplate

with open('results.csv', 'w', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(['Filename', 'Result'])

        for filename in filenames:
            base64_image = encode_image(f"../images/test/{filename}")

            # region: Identifying the items in the image
            image_prompt = """
            I have an image containing items that I am unsure of whether they are recyclable. Please help me to identify the item(s) in the image. 
            Map the items you have identified to the following NEA_ITEM_NAMES: {NEA_ITEM_NAMES}

            Return the item as "Other" if the item is not in the list of NEA_ITEM_NAMES.

            Return the answer as JSON output according to the following schema:
            {{
                "items": ['item1', 'item2', ...]
            }}

            """

            image_prompt = image_prompt.format(NEA_ITEM_NAMES=NEA_ITEM_NAMES)

            vision_model.batch()
            image_prompt_template = ChatPromptTemplate.from_messages(
                [
                    (
                        "system",
                        "You are an expert on answering questions briefly and accurately about recycling in Singapore. Your name is Bloo. Users may send you images of items to check if the items can be recycled, and your task is to correctly identify what are the items in the image.",
                    ),
                    (
                        "human",
                        [
                            {"type": "text", "text": "{image_prompt}"},
                            {
                                "type": "image_url",
                                "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"},
                            },
                        ],
                    ),
                ]
            )

            # endregion

            vision_model_json_output = vision_model.with_structured_output(schema=Items)

            chain_text_stream = image_prompt_template | vision_model_json_output

            res = chain_text_stream.invoke({"image_prompt": image_prompt})
            print(res)

# Returning item names by placing all NEA items in prompt

In [6]:
from langchain_openai import ChatOpenAI

vision_model = ChatOpenAI(model="gpt-4o", temperature=0)


In [1]:
# base64 encode an image
import base64

def encode_image(image_path):
    """Getting the base64 string"""
    with open(image_path, "rb") as image_file:
        return base64.b64encode(image_file.read()).decode("utf-8")


base64_image = encode_image(
    "../images/test/Sports shoes.jpg"
)
# Write the base64 string to a text file
with open("encoded_image.txt", "w") as file:
    file.write(base64_image)


In [31]:
NEA_ITEM_NAMES = [
    "Printed paper (Glossy and non-glossy)",
    "Writing paper",
    "Paper",
    "Clothes",
    "Newspaper",
    "Flyer (Glossy and non-glossy)",
    "Magazine (Glossy and non-glossy)",
    "Telephone directory",
    "Envelope (With and without plastic window)",
    "Plastic envelope",
    "Red packet",
    "Namecard",
    "Greeting card",
    "Shredded paper",
    "Paper receipt",
    "Carton box",
    "Cardboard box",
    "Printed paper box",
    "Paper egg trays",
    "Milk carton",
    "Drink packet",
    "Juice packet",
    "Paper towel tube",
    "Toilet roll tube",
    "Tissue box",
    "Paper bag",
    "Paper disposables",
    "Paper cup",
    "Paper plate",
    "Glitter paper",
    "Crayon drawing",
    "Tissue paper",
    "Paper towel",
    "Toilet paper",
    "Disposable wooden chopsticks",
    "Wooden chopsticks",
    "Pizza boxes",
    "Wax paper",
    "Paper packaging contaminated with food",
    "Paper packaging with food",
    "Plastic bottle",
    "Plastic container",
    "Soft drink bottle",
    "Carbonated drink bottle",
    "Milk bottles",
    "Medicine bottle",
    "Shampoo bottle",
    "Bodywash bottle",
    "Facial cleanser bottle",
    "Detergent bottle",
    "Soap bottle",
    "DVD",
    "CD casing",
    "DVD casing",
    "Plastic bag",
    "Toilet paper packaging",
    "Tissue box packaging",
    "Plastic film",
    "Magazine wrapper",
    "Plastic packaging for packet drink",
    "Bubble wrap",
    "Fruit box",
    "Ziplock bag",
    "Plastic packaging",
    "Plastic egg trays",
    "Plastic clothes hanger",
    "Plastic takeaway food container",
    "Bubble tea cups",
    "Polystyrene foam product",
    "Styrofoam",
    "Styrofoam cup",
    "Styrofoam clamshell container",
    "Plastic disposables",
    "Plastic crockery",
    "Plastic packaging with foil",
    "Potato chip bags",
    "Expired credit cards",
    "Oxo-degradable bag",
    "Bio-degradable bag",
    "Drinking straw",
    "Cassette",
    "Video tape",
    "Plastic packaging contaminated with food/oil stains",
    "Melamine products",
    "Melamine cups",
    "Melamine plates",
    "Beverage glass bottle",
    "Glass bottle",
    "Wine bottle",
    "Liquor bottle",
    "Food glass bottle",
    "Sauce bottle",
    "Condiment bottle",
    "Jam spread bottle",
    "Food jars",
    "Cosmetic glass bottle",
    "Perfume glass bottle",
    "Medicine glass bottle",
    "Supplement glass bottle",
    "Glassware",
    "Glass cup",
    "Glass plate",
    "Drinking glass",
    "Wine glass",
    "Borosilicate glassware",
    "Pyrex glassware",
    "Bakeware",
    "Tempered glass",
    "Oven-safe food containers",
    "Crystal glass",
    "Glass with metal wires",
    "Windows",
    "Mirror",
    "Ceramic products",
    "Ceramic plate",
    "Tea pot",
    "Porcelain",
    "Light bulb",
    "Lamp",
    "Tube",
    "Incandescent lamp",
    "Fluorescent lamp",
    "Incandescent bulb",
    "Fluorescent bulb",
    "LED lamp",
    "LED bulb",
    "Beverage metal can",
    "Carbonated drink can",
    "Soft drink can",
    "Beer can",
    "Food metal can",
    "Biscuit tin",
    "Food tin",
    "Canned food",
    "Medals",
    "Metal container",
    "Metal bottle cap",
    "Clean aluminium tray",
    "Clean aluminium foil",
    "Non-food metal container",
    "Paint container",
    "Paint cans",
    "Laptop",
    "Router",
    "Docking station",
    "Hard disk drive",
    "Computer mouse",
    "Household battery",
    "Bag",
    "Toys",
    "Umbrella",
    "Spectacles",
    "Textile",
    "Curtains",
    "Bedsheet",
    "Blanket",
    "Food waste",
    "Leftover medicine",
    "Diaper",
    "Sanitary pad",
    "Stationery",
    "Pen",
    "Pencil",
    "Plant waste",
    "Horticultural waste",
    "Luggage bag",
    "Furniture",
    "Sports shoes",
    "School shoes",
    "Football shoes (without metal studs)",
    "Large household appliances",
    "Refrigerator",
    "Fridge",
    "Washing machine",
    "Dryer",
    "Television",
    "TV",
    "Air Conditioner",
    "Electric mobility devices",
    "Personal mobility devices",
    "Electric scooter",
    "E-scooter",
    "Power-assisted bicycle",
    "Electric bicycle",
    "Electric mobility scooter",
    "Rice cooker",
    "Microwave oven",
    "Toaster oven",
    "Electric kettle",
    "Food processor",
    "Food blender",
    "Electric fan",
    "Standing fan",
    "CD player",
    "DVD player",
    "Music player",
    "Speaker",
    "Audio sound system",
    "Radio",
    "Vacuum cleaner",
    "Gaming console",
    "Lamp stand",
    "Lamp fixture",
    "Rusty metal cans",
    "Dirty aluminium foil",
    "Dirty aluminium tray",
    "Metal cutlery",
    "Steel wool",
    "Metal accessories",
    "Cigarettes",
    "Joss sticks",
    "9-Volt battery",
    "Button cell battery",
    "Spray cans",
    "Aerosol cans",
    "Table",
    "Chairs",
    "Printed paper (Glossy and non-glossy)",
    "Writing paper",
    "Newsletter",
    "Flyer (Glossy and non-glossy)",
    "Brochure (Glossy and non-glossy)",
    "Magazine (Glossy and non-glossy)",
    "Books",
    "Textbooks",
    "Telephone directory",
    "Envelope (With and without plastic window)",
    "Plastic envelope",
    "Red packet",
    "Namecard",
    "Calendar",
    "Greeting card",
    "Gift wrapping paper",
    "Shredded paper",
    "Carton box",
    "Cardboard box",
    "Paper Packaging (printed paper box etc)",
    "Printed paper box",
    "Paper box",
    "Beverage carton",
    "Plastic bottle",
    "Plastic container",
    "Mineral water bottle",
    "Soft drink bottle",
    "Carbonated drink bottle",
    "Water bottle",
    "Medicine bottle",
    "Fruit box",
    "Ziplock bag",
    "Plastic packaging",
    "Bread bag",
    "Plastic cups",
    "Plastic disposables",
    "Plastic cutlery",
    "Plastic crockery",
    "Plastic packaging with foil",
    "Potato chip bags",
    "Blister pack",
    "Expired credit cards",
    "Plastic packaging contaminated with food/oil stains",
    "Shampoo",
    "CD",
    "Glass bottle",
    "Beer bottle",
    "Wine bottle",
    "Liquor bottle",
    "Food glass bottle",
    "Sauce bottle",
    "Condiment bottle",
    "Jam spread bottle",
    "Food jars",
    "Cosmetic glass bottle",
    "Perfume glass bottle",
    "Drinking glass",
    "Paint container",
    "Paint cans",
    "Ribbons",
    "Cotton bud",
    "Cotton wool",
    "Wet wipes",
    "Briefcase",
    "Child seat",
    "Vase",
    "Cooking pot",
    "Disposable shaver",
    "Electric shaver",
    "Plastic bottle cap",
    "Mouthwash bottle",
    "Masks",
    "Coffee capsules",
    "Plastic food wrap",
    "Glad wrap",
    "Saran wrap",
    "Cling film",
    "Laptop",
    "Tablet computer",
    "Mobile phone",
    "Mobile phone battery",
    "Desktop monitor",
    "Battery charger",
    "Portable charger",
    "Keyboard",
    "Rechargeable battery",
    "Desktop computer",
    "Laptop",
    "Tablet computer",
    "Mobile phone",
    "Computer battery",
    "Printer",
    "Modem",
    "Power bank",
    "Electronic cables",
    "Electronic waste",
    "Household battery",
    "Alkaline battery",
    "Shoes",
    "Food waste",
    "Stationery",
    "Bulky waste",
    "Furniture",
    "Sports shoes",
    "School shoes",
]


In [32]:
from langchain_core.prompts import ChatPromptTemplate

image_prompt = """
What are the item(s) in the image?
If you are not sure what the item is in the image, just say you are unsure of what the item is from the image, but do provide a list of maximum 5 possible items that you think this item is.

Return the answer in a numbered format arranged in the order of the items that are most prominent (obvious) in the image to items that are least prominent (obvious) in the image.
items:
1. item1
2. item2
...
5. item5

For each of the image item that you have identified, find the item in the context that is most similar to it, and return the item from the context that is most similar to it instead of the original image items.

Return the answer in JSON format.

Context:
{context}
"""

image_prompt = image_prompt.format(context=NEA_ITEM_NAMES)

image_prompt_template = ChatPromptTemplate.from_messages([
            ("system", "You are an expert on answering questions briefly and accurately about recycling in Singapore. Your name is Bloo. Users may send you images of items to check if the items can be recycled, and your task is to correctly identify what are the items in the image, and eventually answer the users questions in a helpful, correct and concise manner."),
            ("human", [
                    {"type": "text", "text": image_prompt},
                    {
                        "type": "image_url",
                        "image_url": {"url": f"data:image/jpeg;base64,{img_base64}"},
                    },
                ]),
        ])


In [33]:
from langchain_core.pydantic_v1 import BaseModel, Field
from typing import List
class Items(BaseModel):
    items: List[str] = Field(description="List of item names from the context that best match the names of the items in the image")


In [34]:
vision_model_json_output = vision_model.with_structured_output(Items, method="json_mode")

chain = image_prompt_template | vision_model_json_output

vision_res = chain.invoke({"image_prompt": image_prompt, "image_base64": img_base64})
vision_res


Items(items=['Envelope (With and without plastic window)'])

# Streaming text

In [28]:
# JSON Output parser
from langchain_core.pydantic_v1 import BaseModel, Field
from typing import List
from enum import Enum


class RecyclableStatus(Enum):
    TRUE = "true"
    FALSE = "false"
    PARTIAL = "partial"


class Item(BaseModel):
    name: str = Field(description="item identified by the model")
    description: str = Field(description="description of the item")
    recyclable: RecyclableStatus = Field(description="whether the item is recyclable, non-recyclable or partially recyclable if there are some sub-items that are recyclable")
    instructions: str = Field(description="recycling instructions")


class ImageResponse(BaseModel):
    item: Item = Field(description="primary item identified by the model")
    other_items: List[Item] = Field(
        description="list of other possible items identified by the model"
    )


In [5]:
from langchain_openai import ChatOpenAI

vision_model = ChatOpenAI(model="gpt-4-turbo", temperature=0)


In [1]:
# base64 encode an image
import base64

def encode_image(image_path):
    """Getting the base64 string"""
    with open(image_path, "rb") as image_file:
        return base64.b64encode(image_file.read()).decode("utf-8")


base64_image = encode_image(
    "../images/plastic-bread_packaging.JPEG"
)
# Write the base64 string to a text file
with open("encoded_image.txt", "w") as file:
    file.write(base64_image)


In [23]:
image_prompt = """
I have an image containing items that I am unsure of whether they are recyclable. Please help me to identify the item(s) in the image. Return the answer as image_items where the number of items is according to the items you have identified.
"""

image_prompt_template = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You are an expert on answering questions briefly and accurately about recycling in Singapore. Your name is Bloo. Users may send you images of items to check if the items can be recycled, and your task is to correctly identify what are the items in the image, and help answer users' questions on whether the items are recyclable or not in a helpful, correct and concise manner.",
        ),
        (
            "human",
            [
                {"type": "text", "text": "{image_prompt}"},
                {
                    "type": "image_url",
                    "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"},
                },
            ],
        ),
    ]
)

In [24]:
image_prompt_template.input_variables

['image_prompt']

In [17]:
from langchain_core.runnables import RunnablePassthrough, RunnableLambda, RunnableParallel
from langchain_core.output_parsers import StrOutputParser, JsonOutputParser
from langchain_openai import ChatOpenAI

from langchain_core.prompts import ChatPromptTemplate
from IPython.display import Markdown

# region: Identifying the items in the image
image_prompt = """
I have an image containing items that I am unsure of whether they are recyclable. Please help me to identify the item(s) in the image. Return the answer as image_items where the number of items is according to the items you have identified.
"""

image_prompt_template = ChatPromptTemplate.from_messages([
            ("system", "You are an expert on answering questions briefly and accurately about recycling in Singapore. Your name is Bloo. Users may send you images of items to check if the items can be recycled, and your task is to correctly identify what are the items in the image, and help answer users' questions on whether the items are recyclable or not in a helpful, correct and concise manner."),
            ("human", [
                    {"type": "text", "text": image_prompt},
                    {
                        "type": "image_url",
                        "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"},
                    },
                ]),
        ])

# endregion

# region: Answering question on what items are recyclable, and providing instructions
qa_model = ChatOpenAI(model="gpt-3.5-turbo", temperature=0)

retriever = docsearch.as_retriever(
    search_type="mmr", search_kwargs={"k": 6, "fetch_k": 10}
)

recycling_question = """
For each of the image_items, is the item recyclable in Singapore? If so, provide the recycling instructions. If the item is not recyclable. If the item is not recyclable, answer why the item(s) are not recyclable and how to properly dispose it.


"""

template = """Answer the question referring to the following context:
{context}

If the answer is not found in the context, refer to your pre-trained knowledge and/or online information to answer the question, and if you do answer the question based on information gotten aside from the context, do tell the user that the answer is generated from information obtained from the source where you got your information from.

Question: {question}

"""

prompt = ChatPromptTemplate.from_template(template)

# endregion

chain_text_stream = image_prompt_template \
| vision_model \
| StrOutputParser() \
| RunnableParallel(
    {
        "context": retriever,
        "question": RunnableLambda(
            lambda vision_model_output: f"{vision_model_output}\n {recycling_question}"
        ),
    }
) \
| prompt \
| qa_model \
| StrOutputParser()

async for chunk in chain_text_stream.astream({"image_prompt": image_prompt, "image_base64": base64_image}):
    print(chunk, end='|', flush=True)


|Based| on| the| context| provided|,| the| item| in| the| image| is| a| Milk| cart|on| made| of| Paper|.| Milk| cart|ons| are| recycl|able| in| Singapore|.| The| recycling| instructions| for| the| Milk| cart|on| are| to| empty|,| rinse|,| and| flatten| it| before| recycling|.||

In [13]:
display(Markdown(res))

image_items: 1

The item in the image is a Tetra Pak carton used for packaging low fat fresh milk. In Singapore, Tetra Pak cartons are recyclable. You should rinse the carton to remove any milk residue and flatten it before placing it in the recycling bin.

In [9]:
chain_text_stream = image_prompt_template \
| vision_model \
| RunnableLambda(lambda ai_msg: ai_msg.content)

res = chain_text_stream.invoke({"image_prompt": image_prompt, "image_base64": base64_image})
res


'image_items:\n1. Envelope with a plastic window\n2. Paper\n3. Plastic window (part of the envelope)\n4. Adhesive label (postage stamp area)\n5. Torn edge (part of the envelope)'

In [47]:
from langchain_core.runnables import RunnablePassthrough, RunnableLambda, RunnableParallel
from langchain_core.output_parsers import StrOutputParser, JsonOutputParser
from langchain_openai import ChatOpenAI

from langchain_core.prompts import ChatPromptTemplate
from IPython.display import Markdown

image_prompt = """
What are the item(s) in the image?
If you are not sure what the item is in the image, just say you are unsure of what the item is from the image, but do provide a list of maximum 5 possible items that you think this item is.

Return the answer in a numbered format arranged in the order of the items that are most prominent (obvious) in the image to items that are least prominent (obvious) in the image.
image_items:
1. item1
2. item2
...
5. item5

"""

image_prompt_template = ChatPromptTemplate.from_messages([
            ("system", "You are an expert on answering questions briefly and accurately about recycling in Singapore. Your name is Bloo. Users may send you images of items to check if the items can be recycled, and your task is to correctly identify what are the items in the image, and eventually answer the users questions in a helpful, correct and concise manner."),
            ("human", [
                    {"type": "text", "text": image_prompt},
                    {
                        "type": "image_url",
                        "image_url": {"url": f"data:image/jpeg;base64,{img_base64}"},
                    },
                ]),
        ])


qa_model = ChatOpenAI(model="gpt-3.5-turbo", temperature=0)

retriever = docsearch.as_retriever(search_type="mmr",
                search_kwargs={'k': 6, 'fetch_k': 10})

question = """
For each of the image_items, is the item recyclable in Singapore? If so, provide the recycling instructions. If the item is not recyclable. If the item is not recyclable, answer why the item(s) are not recyclable and how to properly dispose it.


"""

template = """Answer the question referring to the following context:
{context}

If the answer is not found in the context, refer to your pre-trained knowledge and/or online information to answer the question, and if you do answer the question based on information gotten aside from the context, do tell the user that the answer is generated from information obtained from the source where you got your information from.

Question: {question}

"""
prompt = ChatPromptTemplate.from_template(template)

# output_parser = JsonOutputParser(pydantic_object=ImageResponse)
output_parser = StrOutputParser()

chain_text_stream = image_prompt_template \
| vision_model \
| RunnableLambda(lambda ai_msg: ai_msg.content) \
| RunnableParallel({"context": retriever, "question": RunnableLambda(lambda image_items: f"{image_items}\n {question}")}) \
| prompt \
| qa_model \
| output_parser

res = chain_text_stream.invoke({"image_prompt": image_prompt, "image_base64": img_base64})

display(Markdown(res))



# async for chunk in chain_text_stream.astream({"image_prompt": image_prompt, "image_base64": img_base64}):
#     print(chunk, end='|', flush=True)


1. Envelope: The item "Envelope" is recyclable in Singapore. The recycling instructions are to make sure it is clean before recycling.

2. Plastic window on the envelope: The item "Plastic window on the envelope" is not recyclable in Singapore due to the presence of plastic. To properly dispose of it, the plastic window should be removed and disposed of in the general waste bin, while the paper envelope can be recycled.

3. Postage label: The item "Postage label" is not recyclable in Singapore as it is typically made of paper with adhesive backing. To properly dispose of it, the postage label should be removed from the envelope or package and disposed of in the general waste bin. The paper part of the envelope or package can be recycled.

# Returning JSON of items (according to NEA list) including their description, recyclable status and instructions



In [None]:
# JSON Output parser
from langchain_core.pydantic_v1 import BaseModel, Field
from typing import List
from enum import Enum


class RecyclableStatus(Enum):
    TRUE = "true"
    FALSE = "false"
    PARTIAL = "partial"


class Item(BaseModel):
    name: str = Field(description="item identified by the model")
    description: str = Field(description="description of the item")
    recyclable: RecyclableStatus = Field(description="whether the item is recyclable, non-recyclable or partially recyclable if there are some sub-items that are recyclable")
    instructions: str = Field(description="recycling instructions")


class ImageResponse(BaseModel):
    item: Item = Field(description="primary item identified by the model")
    other_items: List[Item] = Field(
        description="list of other possible items identified by the model"
    )


In [48]:
from langchain_core.runnables import RunnablePassthrough, RunnableLambda, RunnableParallel
from langchain_core.output_parsers import StrOutputParser, JsonOutputParser
from langchain_openai import ChatOpenAI

from langchain_core.prompts import ChatPromptTemplate

image_prompt = """
What are the item(s) in the image?
If you are not sure what the item is in the image, just say you are unsure of what the item is from the image, but do provide a list of maximum 5 possible items that you think this item is.

Return the answer in a numbered format arranged in the order of the items that are most prominent (obvious) in the image to items that are least prominent (obvious) in the image.
image_items:
1. item1
2. item2
...
5. item5

"""

image_prompt_template = ChatPromptTemplate.from_messages([
            ("system", "You are an expert on answering questions briefly and accurately about recycling in Singapore. Your name is Bloo. Users may send you images of items to check if the items can be recycled, and your task is to correctly identify what are the items in the image, and eventually answer the users questions in a helpful, correct and concise manner."),
            ("human", [
                    {"type": "text", "text": image_prompt},
                    {
                        "type": "image_url",
                        "image_url": {"url": f"data:image/jpeg;base64,{img_base64}"},
                    },
                ]),
        ])


qa_model = ChatOpenAI(model="gpt-3.5-turbo", temperature=0)

retriever = docsearch.as_retriever(search_type="mmr",
                search_kwargs={'k': 6, 'fetch_k': 10})

question = """
For each of the image_items, is the item recyclable in Singapore? If so, provide the recycling instructions. If the item is not recyclable. If the item is not recyclable, answer why the item(s) are not recyclable and how to properly dispose it.


"""

template = """Answer the question referring to the following context:
{context}

If the answer is not found in the context, refer to your pre-trained knowledge and/or online information to answer the question, and if you do answer the question based on information gotten aside from the context, do tell the user that the answer is generated from information obtained from the source where you got your information from.

Question: {question}

Return the response in JSON format.
Example of a valid response with single item identified in the image:
{{
    "item": {{
        "name": <item name>,
        "description": <item description from the image item>,
        "recyclable": 'true' | 'false' | 'partial',
        "instructions": <recycling instructions>
    }},
    "other_items": []
}}

Example of a valid response with multiple items identified in the image:
{{
    "item": {{
        "name": <item name>,
        "description": <item description from the image item>,
        "recyclable": 'true' | 'false' | 'partial',
        "instructions": <recycling instructions>
    }},
    "other_items": [
        {{
            "name": <item name>,
            "description": <item description from the image item>,
            "recyclable": 'true' | 'false' | 'partial',
            "instructions": <recycling instructions>
        }},
        ...
    ]
}}

"""
prompt = ChatPromptTemplate.from_template(template)
qa_model_json_output = qa_model.with_structured_output(ImageResponse, method="json_mode")

# output_parser = JsonOutputParser(pydantic_object=ImageResponse)
# output_parser = StrOutputParser()

chain_text_stream = image_prompt_template \
| vision_model \
| RunnableLambda(lambda ai_msg: ai_msg.content) \
| RunnableParallel({"context": retriever, "question": RunnableLambda(lambda image_items: f"{image_items}\n {question}")}) \
| prompt \
| qa_model_json_output

res = chain_text_stream.invoke({"image_prompt": image_prompt, "image_base64": img_base64})
res

# async for chunk in chain_text_stream.astream({"image_prompt": image_prompt, "image_base64": img_base64}):
#     print(chunk, end='|', flush=True)


ImageResponse(item=Item(name='Envelope', description='Envelope (With and without plastic window)', recyclable=<RecyclableStatus.TRUE: 'true'>, instructions='Make sure it is clean before recycling.'), other_items=[Item(name='Plastic window on the envelope', description='Plastic envelope', recyclable=<RecyclableStatus.TRUE: 'true'>, instructions='Make sure it is clean before recycling.'), Item(name='Postage label', description='N/A', recyclable=<RecyclableStatus.FALSE: 'false'>, instructions='Postage labels are not recyclable. Please remove and dispose of them properly in the general waste.')])

# Returning only the names of the items that are most similar to the image items (with retrieval)



In [17]:
from langchain_core.pydantic_v1 import BaseModel, Field
from typing import List
class Items(BaseModel):
    items: List[str] = Field(description="List of item names from the context that best match the names of the items in the image")


In [19]:
from langchain_core.prompts import ChatPromptTemplate

image_prompt = f"""
What are the item(s) in the image?
If you are not sure what the item is in the image, just say you are unsure of what the item is from the image, but do provide a list of maximum 5 possible items that you think this item is.

Return the answer in a numbered format arranged in the order of the items that are most prominent (obvious) in the image to items that are least prominent (obvious) in the image.
image_items:
1. item1
2. item2
...
5. item5

"""

image_prompt_template = ChatPromptTemplate.from_messages([
            ("system", "You are an expert on answering questions briefly and accurately about recycling in Singapore. Your name is Bloo. Users may send you images of items to check if the items can be recycled, and your task is to correctly identify what are the items in the image, and eventually answer the users questions in a helpful, correct and concise manner."),
            ("human", [
                    {"type": "text", "text": image_prompt},
                    {
                        "type": "image_url",
                        "image_url": {"url": f"data:image/jpeg;base64,{img_base64}"},
                    },
                ]),
        ])


from langchain_core.runnables import RunnablePassthrough, RunnableLambda, RunnableParallel
from langchain_core.output_parsers import StrOutputParser, JsonOutputParser
from langchain_openai import ChatOpenAI

qa_model = ChatOpenAI(model="gpt-3.5-turbo", temperature=0)
qa_model_json_output = qa_model.with_structured_output(Items, method="json_mode")

retriever = docsearch.as_retriever(search_type="mmr",
                search_kwargs={'k': 6, 'fetch_k': 10})

question = """
Based on the image_items, return the name of the items in the context that best match each item in the image_items.
Refer to the 'items' and 'similar_items' fields in the context to match each item in image_items.
If the item in the image is not found in the context, simply do not return that item in the response.
"""

template = """Answer the question based only on the following context:
{context}

Question: {question}

Return the response in JSON format.
Example of a valid response if the image_items are 'Bottled Milk', 'Drinking Straw', and 'BPA-free Water Bottle':
{{
    "items": ['Milk Bottle', 'Plastic Straw', 'Water Bottle']
}}
"""
prompt = ChatPromptTemplate.from_template(template)


# output_parser = JsonOutputParser(pydantic_object=ImageResponse)
# output_parser = StrOutputParser()

chain = image_prompt_template \
| vision_model \
| RunnableLambda(lambda ai_msg: ai_msg.content) \
| RunnableParallel({"context": retriever, "question": RunnableLambda(lambda image_items: f"{image_items}\n {question}")}) \
| prompt \
| qa_model_json_output
# | output_parser

res = chain.invoke({"image_prompt": image_prompt, "image_base64": img_base64})
res


Items(items=['Plastic packaging', 'Bubble wrap'])