In [14]:
import csv
from dotenv import load_dotenv
import os
import random
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain
from langchain.llms import GPT4All
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler

In [15]:
template_with_context = """
You are Bob, a knowledgeable food expert. I will be giving you an Instagram text caption about ingredient substitutions. This text contains the given food, and its substitute. I want you to figure out and output the given food, and the substitute food accurately. The output should be a the given food and the replacement food, with each new pair in a new line.

If there are no obvious food substitutions, output -1, instead of the given food and the replacement food. There maybe multiple substitutions in a caption, so make sure to output all of them.

Some examples of what is expected:
{context}

Now answer as Bob and provide the output for the following caption, which would be the given food and the replacement food, each in a new line.

Caption: {caption}
Bob(Your Answer):
"""



In [16]:
def set_prompt(context=True):
    template = template_with_context
    return PromptTemplate(template=template, input_variables=["caption", "context"])

In [17]:
posts = []
with open('./t5_results/eatthisnotthat_2023-09-04_image_text.csv') as file:
    reader = csv.reader(file)
    header = next(reader)
    for row in reader:
        if(row[2].lower()=='no'):
            continue
        posts.append(row)


In [18]:
print(len(posts))

57


In [19]:
def gen_context(path, num=10):
    annotated_posts = []
    with open(path) as file:
        reader = csv.DictReader(file)
        for row in reader:
            annotated_posts.append(row)
    
    # select random posts
    annotated_posts = random.sample(annotated_posts, num)
    
    # generate context
    context = ''
    for post in annotated_posts:
        context += f'''Caption: {post['Post']}\nBob: {post['Ingredient'], post['Substitute']}\n\n'''
    
    return context

In [20]:
context = gen_context('./data/output.csv', num=8)

In [21]:
local_path = "./mistral-7b-instruct-v0.1.Q4_0.gguf"

In [22]:
# Callbacks support token-wise streaming
callbacks = [StreamingStdOutCallbackHandler()]

# Verbose is required to pass to the callback manager
llm = GPT4All(model=local_path, callbacks=callbacks, verbose=True)

In [23]:
prompt = set_prompt(context=context)
llm_chain = LLMChain(prompt=prompt, llm=llm)

In [24]:
context

"Caption:  Agar is a vegan substitute for gelatin, commonly used in desserts and marshmallows. To make a delicious vegan chocolate mousse, use agar instead of gelatin. Mix it with cocoa powder, sugar, and non-dairy milk, then whip until light and fluffy. Chill before serving. Enjoy! #vegan #chocolatemousse #agar\nBob: ('agar', 'gelatin')\n\nCaption: Almond extract is a common ingredient in baking, particularly for cakes and cookies. It has a distinct nutty flavor that can be overpowering if used directly. To avoid this, you can use bitter almond as an alternative with less of the strong flavor. Bitter almonds are also known to have health benefits due to their high antioxidant content. Try using them in your next baking recipe for a unique taste experience!\nBob: ('almond extract', 'bitter almond')\n\nCaption:  Hey friends! ���� Today, I'm in the mood for some hearty German food. Let me show you how to make a delicious Bockwurst Sausage Pasta! �������� All you need is some fresh pasta,

In [25]:
results = []
for post in posts[:10]:
    pk = post[0]
    caption = post[1]
    out = llm_chain.run(caption=caption, context=context)
    results.append({
        'pk': pk,
        'caption': caption,
        'output': out
    })

('apple', 'almond')
('lemon', 'dates')
('milk', 'prayer')('Bean dip', 'Five minute bean dip')

Exception ignored on calling ctypes callback function: <function LLModel._prompt_callback at 0x127db9bc0>
Traceback (most recent call last):
  File "/Users/akshitsinha3/Library/CloudStorage/OneDrive-InternationalInstituteofInformationTechnology/Projects/Precog/FoodComputation/.venv/lib/python3.11/site-packages/gpt4all/pyllmodel.py", line 479, in _prompt_callback
    @staticmethod

KeyboardInterrupt: 


In [22]:
# save results to csv
with open('./results.csv', 'w') as file:
    writer = csv.DictWriter(file, fieldnames=['pk', 'caption', 'output'])
    writer.writeheader()
    for row in results:
        writer.writerow(row)