In [2]:
import pandas as pd
import json

In [3]:
df = pd.read_csv('recipes.csv')
columns_to_check = ['name', 'description', 'ingredients', 'id', 'steps']

# Count the number of rows containing NaNs in specific columns
nan_counts = df[columns_to_check].isna().any(axis=1).sum()

print(f"Number of rows containing NaNs in specific columns: {nan_counts}")

Number of rows containing NaNs in specific columns: 4980


In [4]:
df = df.dropna()

In [5]:
df.head()

Unnamed: 0,name,id,minutes,contributor_id,submitted,tags,nutrition,n_steps,steps,description,ingredients,n_ingredients
0,arriba baked winter squash mexican style,137739,55,47892,2005-09-16,"['60-minutes-or-less', 'time-to-make', 'course...","[51.5, 0.0, 13.0, 0.0, 2.0, 0.0, 4.0]",11,"['make a choice and proceed with recipe', 'dep...",autumn is my favorite time of year to cook! th...,"['winter squash', 'mexican seasoning', 'mixed ...",7
1,a bit different breakfast pizza,31490,30,26278,2002-06-17,"['30-minutes-or-less', 'time-to-make', 'course...","[173.4, 18.0, 0.0, 17.0, 22.0, 35.0, 1.0]",9,"['preheat oven to 425 degrees f', 'press dough...",this recipe calls for the crust to be prebaked...,"['prepared pizza crust', 'sausage patty', 'egg...",6
2,all in the kitchen chili,112140,130,196586,2005-02-25,"['time-to-make', 'course', 'preparation', 'mai...","[269.8, 22.0, 32.0, 48.0, 39.0, 27.0, 5.0]",6,"['brown ground beef in large pot', 'add choppe...",this modified version of 'mom's' chili was a h...,"['ground beef', 'yellow onions', 'diced tomato...",13
3,alouette potatoes,59389,45,68585,2003-04-14,"['60-minutes-or-less', 'time-to-make', 'course...","[368.1, 17.0, 10.0, 2.0, 14.0, 8.0, 20.0]",11,['place potatoes in a large pot of lightly sal...,"this is a super easy, great tasting, make ahea...","['spreadable cheese with garlic and herbs', 'n...",11
4,amish tomato ketchup for canning,44061,190,41706,2002-10-25,"['weeknight', 'time-to-make', 'course', 'main-...","[352.9, 1.0, 337.0, 23.0, 3.0, 0.0, 28.0]",5,['mix all ingredients& boil for 2 1 / 2 hours ...,my dh's amish mother raised him on this recipe...,"['tomato juice', 'apple cider vinegar', 'sugar...",8


In [98]:
step = df['steps'][0]
step

"['make a choice and proceed with recipe', 'depending on size of squash , cut into half or fourths', 'remove seeds', 'for spicy squash , drizzle olive oil or melted butter over each cut squash piece', 'season with mexican seasoning mix ii', 'for sweet squash , drizzle melted honey , butter , grated piloncillo over each cut squash piece', 'season with sweet mexican spice mix', 'bake at 350 degrees , again depending on size , for 40 minutes up to an hour , until a fork can easily pierce the skin', 'be careful not to burn the squash especially if you opt to use sugar or butter', 'if you feel more comfortable , cover the squash with aluminum foil the first half hour , give or take , of baking', 'if desired , season with salt']"

In [99]:
ingred = df['ingredients'][0]
ingred

"['winter squash', 'mexican seasoning', 'mixed spice', 'honey', 'butter', 'olive oil', 'salt']"

In [6]:
import ast

def list_to_text_steps(row):
    row_list = ast.literal_eval(row)
    row_text = " ".join(row_list)
    return row_text

def list_to_text_ingredients(row):
    row_list = ast.literal_eval(row)
    row_text = ",".join(row_list)
    return row_text

df_processed = df.copy()
df_processed['steps'] = df_processed['steps'].apply(list_to_text_steps)
df_processed['ingredients'] = df_processed['ingredients'].apply(list_to_text_ingredients)
df_processed['tags'] = df_processed['tags'].apply(list_to_text_ingredients)

In [7]:
df_processed['tags'][0]

'60-minutes-or-less,time-to-make,course,main-ingredient,cuisine,preparation,occasion,north-american,side-dishes,vegetables,mexican,easy,fall,holiday-event,vegetarian,winter,dietary,christmas,seasonal,squash'

In [13]:
df_processed.to_csv('recipes_cleaned.csv')

df_processed = df_processed.loc[:2000]

In [104]:
from elasticsearch import Elasticsearch
from openai import OpenAI
from dotenv import load_dotenv
import os
es_client = Elasticsearch('http://localhost:9200') 

load_dotenv()
secret_key = os.getenv('OPEN_AI_API')
client = OpenAI(api_key=secret_key)

index_settings = {
    "settings": {
        "number_of_shards": 1,
        "number_of_replicas": 0
    },
    "mappings": {
        "properties": {
            "ingredients": {"type": "text"},
            "steps": {"type": "text"},
            "name": {"type": "text"},
            "description": {"type": "text"},
            "tags": {"type": "text"},
            "n_ingredients": {"type": "text"},
            "n_steps": {"type": "text"},
            "id": {"type": "keyword"},
            "text_vector": {
                "type": "dense_vector",
                "dims": 384,
                "index": True,
                "similarity": "cosine"
            },
        }
    }
}

index_name = "recipes"

es_client.indices.delete(index=index_name, ignore_unavailable=True)
es_client.indices.create(index=index_name, body=index_settings)

ObjectApiResponse({'acknowledged': True, 'shards_acknowledged': True, 'index': 'recipes'})

In [8]:
from sentence_transformers import SentenceTransformer
from tqdm.auto import tqdm

model_name = 'multi-qa-MiniLM-L6-cos-v1'
model = SentenceTransformer(model_name)

  from tqdm.autonotebook import tqdm, trange


In [9]:
len(model.encode('recipe with squash and tomatoes?'))

384

In [14]:
def row_to_dict(row):
    name = row['name']
    desc = row['description']
    steps = row['steps']
    tags = row['tags']
    ingred = row['ingredients']
    id_num = row['id']
    n_steps = row['n_steps']
    n_ingred = row['n_ingredients']

    total_string = ingred + ' ' + name + ' ' + desc + ' ' + tags + ' ' + steps
    
    return {
        "name": name,
        "description": desc,
        "steps": steps,
        "ingredients": ingred,
        "id": id_num,
        "tags": tags,
        "text_vector": model.encode(total_string).tolist()
    }

In [11]:
row_to_dict(df_processed.loc[0])

{'name': 'arriba   baked winter squash mexican style',
 'description': 'autumn is my favorite time of year to cook! this recipe \r\ncan be prepared either spicy or sweet, your choice!\r\ntwo of my posted mexican-inspired seasoning mix recipes are offered as suggestions.',
 'steps': 'make a choice and proceed with recipe depending on size of squash , cut into half or fourths remove seeds for spicy squash , drizzle olive oil or melted butter over each cut squash piece season with mexican seasoning mix ii for sweet squash , drizzle melted honey , butter , grated piloncillo over each cut squash piece season with sweet mexican spice mix bake at 350 degrees , again depending on size , for 40 minutes up to an hour , until a fork can easily pierce the skin be careful not to burn the squash especially if you opt to use sugar or butter if you feel more comfortable , cover the squash with aluminum foil the first half hour , give or take , of baking if desired , season with salt',
 'ingredients': 

In [15]:
from tqdm.auto import tqdm
dict_list = [row_to_dict(row) for i, row in tqdm(df_processed.iterrows(), total=df_processed.shape[0])]

  0%|          | 0/1974 [00:00<?, ?it/s]

In [124]:
# Save to a JSON file
with open('recipes.json', 'w') as json_file:
    json.dump(dict_list, json_file, indent=4)

In [63]:
for _, row in tqdm(df_processed.iterrows(), total=df_processed.shape[0]):
    document = row_to_dict(row)
    es_client.index(index=index_name, document=document)

  0%|          | 0/1974 [00:00<?, ?it/s]

In [74]:
def recipe_vector_knn(q):
    question = q
    vector = model.encode(question)
    return elastic_search_knn(vector)

def elastic_search_knn(vector):
    knn = {
        "field": "name_description_steps_ingredients_vector",
        "query_vector": vector,
        "k": 5,
        "num_candidates": 10000
    }

    search_query = {
        "knn": knn,
        "_source": ["name", "description", "steps", "ingredients", "id"]
    }

    es_results = es_client.search(
        index=index_name,
        body=search_query
    )
    
    result_docs = []
    
    for hit in es_results['hits']['hits']:
        result_docs.append(hit['_source'])

    return result_docs
    

In [78]:
query = 'I have sausages, apples, and potatoes. What can I make?'
results = recipe_vector_knn(query)

In [80]:
def build_prompt(query, search_results):
    prompt_template = """
You're a recipe creator. Provide an answer to the QUERY based on the CONTEXT from the recipe database.
Utilize the recipes from the CONTEXT when answering the QUERY.

QUESTION: {question}

CONTEXT: 
{context}
""".strip()

    context = ""
    
    for doc in search_results:
        context = context + f"name: {doc['name']}\ndescription: {doc['description']}\ningredients: {doc['ingredients']}\nsteps: {doc['steps']}\n\n"
    
    prompt = prompt_template.format(question=query, context=context).strip()
    return prompt

def rag(query: dict, model='gpt-4o') -> str:
    search_results = recipe_vector_knn(query)
    prompt = build_prompt(query, search_results)
    answer = llm(prompt, model=model)
    return answer

def llm(prompt, model='gpt-4o'):
    response = client.chat.completions.create(
        model=model,
        messages=[{"role": "user", "content": prompt}]
    )
    
    return response.choices[0].message.content

In [83]:
prompt = build_prompt(query, results)


In [85]:
answer = llm(prompt)
print(answer)

You can make a delicious **Sausage, Apple, and Potato Bake** using your ingredients. Here's a modified version inspired by the recipes in the context:

### Sausage, Apple, and Potato Bake

**Description:** This hearty dish combines the earthiness of potatoes, the sweetness of apples, and the savory flavor of sausages. It's a perfect meal for brunch or supper.

**Ingredients:**
- 4 sausages (smoked or kielbasa)
- 4 medium potatoes, diced
- 2 apples, cored and diced
- Salt and pepper to taste
- 1 tablespoon olive oil
- Optional: A sprinkle of your favorite herbs (rosemary, thyme, etc.)

**Steps:**
1. **Preheat Oven:** Preheat your oven to 350°F (175°C).
2. **Prepare Ingredients:** Cut the sausages into bite-sized pieces. Dice the potatoes and apples into similar-sized chunks.
3. **Mix Ingredients:** In a large mixing bowl, toss the sausages, potatoes, and apples together with olive oil, salt, and pepper. If you're using any herbs, add them now.
4. **Bake:** Transfer the mixture to a cass