In [2]:
from dotenv import load_dotenv
import tiktoken
import openai
import pandas as pd
import os
import numpy as np
from openai.embeddings_utils import cosine_similarity, get_embedding

In [3]:
load_dotenv()
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") 
OPENAI_DEPLOYMENT_ENDPOINT = os.getenv("OPENAI_DEPLOYMENT_ENDPOINT")
OPENAI_DEPLOYMENT_NAME = os.getenv("OPENAI_DEPLOYMENT_NAME")
OPENAI_MODEL_NAME = os.getenv("OPENAI_MODEL_NAME")
OPENAI_DEPLOYMENT_VERSION = os.getenv("OPENAI_DEPLOYMENT_VERSION")

OPENAI_ADA_EMBEDDING_DEPLOYMENT_NAME = os.getenv("OPENAI_ADA_EMBEDDING_DEPLOYMENT_NAME")
OPENAI_ADA_EMBEDDING_MODEL_NAME = os.getenv("OPENAI_ADA_EMBEDDING_MODEL_NAME")

OPENAI_DAVINCI_DEPLOYMENT_NAME = os.getenv("OPENAI_DAVINCI_DEPLOYMENT_NAME")
OPENAI_DAVINCI_MODEL_NAME = os.getenv("OPENAI_DAVINCI_MODEL_NAME")

# Configure OpenAI API
openai.api_type = "azure"
openai.api_version = OPENAI_DEPLOYMENT_VERSION
openai.api_base = OPENAI_DEPLOYMENT_ENDPOINT
openai.api_key = OPENAI_API_KEY

#### Read CSV using pandas

In [4]:
input_datapath = "./data/recipes_onecol.csv"  
df = pd.read_csv(input_datapath)

In [6]:
df.head()

Unnamed: 0,Recipe
0,Recipe: Caramelized Bread Pudding with Chocola...
1,Recipe: White Sauce or Bechamel Sauce - Ingred...
2,"Recipe: Grilled Whole Mackerel with Lemon, Ore..."
3,Recipe: Christmas Croquembouche - Ingredients:...
4,Recipe: Chocolate Roll-Out Cookies - Ingredien...


### Using OpenAI model ada for embeddings

OpenAI offers one second-generation embedding model (denoted by -002 in the model ID) and 16 first-generation models (denoted by -001 in the model ID).
We recommend using text-embedding-ada-002 for nearly all use cases. It’s better, cheaper, and simpler to use. 

#### Initialize Embeddings model 

In [36]:
# embedding model parameters
# encoding for text-embedding-ada-002
embedding_encoding = "cl100k_base"  
# the maximum for text-embedding-ada-002 is 8191
max_tokens = 8000 
# the number of reviews to embed
#in case of rate limit error, reduce the number of reviews
top_n = 150
encoding = tiktoken.get_encoding(embedding_encoding)

#### Count number of tokens in each row

In [37]:
# skip Recipes that are too long to embed > max_tokens
df["n_tokens"] = df.Recipe.apply(lambda x: len(encoding.encode(x)))
df = df[df.n_tokens <= max_tokens].tail(top_n)

In [38]:
df.head()

Unnamed: 0.1,Unnamed: 0,Recipe,n_tokens,embedding,similarity
0,489,Recipe: Cumin-Crusted Monster Pork Chop with P...,1057,"[0.013822183012962341, 0.01743599772453308, 0....",0.747383
1,490,Recipe: Ham Persillade with Mustard Potato Sal...,637,"[-0.01765388622879982, 0.0025091650895774364, ...",0.747511
2,491,Recipe: Moroccan Lamb with Garbanzo Bean Mash ...,830,"[0.005156847648322582, 0.00642572483047843, -0...",0.727041
3,492,Recipe: Spaghetti with Tomatoes and Anchovy Bu...,357,"[0.016582747921347618, 0.007461572531610727, 0...",0.734125
4,493,Recipe: SSC Brownies - Ingredients: Nonstick c...,801,"[-0.007866213098168373, 0.007626429665833712, ...",0.787025


#### Create embeddings

In [39]:
# This may take a few minutes
df["embedding"] = df.Recipe.apply(lambda x: get_embedding(x, engine=OPENAI_ADA_EMBEDDING_DEPLOYMENT_NAME))
# save the file
df.to_csv("./data/recipes_onecol_with_embeddings.csv")

#### Read embeddings

In [40]:
datafile_path = "./data/recipes_onecol_with_embeddings.csv"
df = pd.read_csv(datafile_path)

# convert the string representation of the embedding to a numpy array
df["embedding"] = df.embedding.apply(eval).apply(np.array)

#### Define a function to ask a question and get an answer from GPT-3.5

In [68]:
def ask_question(question, n=1):
    # get embedding for question
    question_embedding = get_embedding(
        question,
        engine=OPENAI_ADA_EMBEDDING_DEPLOYMENT_NAME
    )

    # find the most similar embedding in the dataset
    df["similarity"] = df.embedding.apply(lambda x: cosine_similarity(x, question_embedding))

    # sort the results by similarity
    results = (
        df.sort_values("similarity", ascending=False)
        .head(n)
    )
    
    # if we asked for more than one anwser (n=1 or more), we would return a list of answers
    return (results.similarity.iloc[0], ' '.join(results.Recipe.tolist()) )
    

#### Berries is similar to raspberries

In [70]:
answer = ask_question("Chocolate and berries")
print(f"Similarity Score: {answer[0]}, {answer[1]}")

Similarity Score: 0.7870251500340449, Recipe: SSC Brownies - Ingredients: Nonstick cooking spray or unsalted butter for greasing;1 cup (8 oz/250 g) cold unsalted butter, cut into small cubes;2 cups (12 oz/375 g) semisweet chocolate chips, plus 1 cup (6 oz/185 g);4 oz (125 g) bittersweet chocolate, chopped;3 large eggs;1 cup (8 oz/250 g) sugar;1 tbsp pure vanilla extract;3/4 cup (4 oz/125 g) all-purpose flour;1 tsp baking powder;1/2 tsp kosher salt;3/4 cup (1 1/2 oz/45 g) crisped rice cereal (optional)- Instructions: 1. Preheat the oven to 375°F (190°C). Lightly spray or grease a 9-by-12-inch (23-by-30-cm) baking pan with cooking spray or butter. 2. Combine the butter, the 2 cups chocolate chips, and the bittersweet chocolate in a metal bowl or the top pan of a double boiler and place over (not touching) simmering water. Heat, stirring, until all of the chocolate is melted and the mixture is smooth. Remove from the heat and let cool until still warm but not too hot to touch, about 5 min

#### Typo: Bukgur instead of Bulgur

In [72]:
answer = ask_question("Bukgur vegan recipe")
print(f"Similarity Score: {answer[0]}, {answer[1]}")

Similarity Score: 0.792000634945569, Recipe: Moroccan Lamb with Garbanzo Bean Mash - Ingredients: 1 1 1/2-pound piece butterflied boneless leg of lamb, trimmed;2 tablespoons olive oil, divided;1/2 cup dry red wine;6 tablespoons fresh orange juice;1 tablespoon grated peeled fresh ginger;2 teaspoons finely grated orange peel;2 teaspoons ground cumin;1 teaspoon ground coriander;1 teaspoon ground cinnamon;4 tablespoons chopped fresh cilantro, divided;2 cups chopped red onions;6 garlic cloves, chopped;2 15 1/2-ounce cans garbanzo beans (chickpeas), rinsed, drained;1 cup low-salt chicken broth- Instructions: Place lamb in shallow bowl or glass baking dish; coat with 1 tablespoon oil. Add wine, orange juice, ginger, and orange peel to bowl. Mix to coat. Mix cumin, coriander, and cinnamon in small bowl. Sprinkle spice mixture over lamb, turning to coat evenly. Sprinkle with 2 tablespoons cilantro. Cover and marinate at room temperature 1 hour or chill up to 4 hours. Preheat oven to 400°F. Heat

In [73]:
answer = ask_question("Fish no alcohol")
print(f"Similarity Score: {answer[0]}, {answer[1]}")

Similarity Score: 0.7293221340369455, Recipe: Ham Persillade with Mustard Potato Salad and Mashed Peas - Ingredients: 6 long parsley sprigs, divided;1 3/4 cups reduced-sodium chicken broth;1 large garlic clove, minced;2 teaspoon unflavored gelatin (from 1 envelope);3 tablespoons water;1 (3/4-pound) piece baked ham, cut into 1/2-inch cubes (2 cups);1/2 cup mayonnaise;2 tablespoons Dijon mustard;2 tablespoons white-wine vinegar;2 celery ribs, finely chopped (1 cup);1/4 cup chopped cornichons or sour gherkins;1 pound boiled potatoes, peeled and cut into 1/2-inch cubes (2 1/2 cups);1 (10-ounce) box frozen baby peas, thawed;2 teaspoons finely chopped marjoram;3 tablespoons extra-virgin olive oil;Equipment: 4 (16-ounce) wide jars or containers with lids;Garnish: celery leaves- Instructions: Make ham persillade: Chop enough parsley leaves to measure 1 tablespoon; reserve. Chop remaining leaves and stems and simmer with broth and garlic in a small saucepan, covered, 5 minutes. Meanwhile, sprin

In [74]:
answer = ask_question("Fish with wine")
print(f"Similarity Score: {answer[0]}, {answer[1]}")

Similarity Score: 0.7712677951217192, Recipe: Spaghetti with Tomatoes and Anchovy Butter - Ingredients: 1/2 pound spaghetti;4 tablespoons unsalted butter;2 anchovy fillets;4 sliced garlic cloves;2 pounds quartered medium tomatoes;Salt;Pepper;Chopped tender herbs (such as flat-leaf parsley, tarragon, and/or chives)- Instructions: Cook 1/2 pound spaghetti; drain, reserving 1/2 cup cooking liquid. Meanwhile, heat 4 tablespoons unsalted butter in a large skillet over medium heat. Cook 2 anchovy fillets and 4 sliced garlic cloves, stirring often, until anchovies are broken down and garlic is soft, about 4 minutes. Add 2 pounds quartered medium tomatoes; season with salt and pepper and cook, stirring occasionally, until falling apart, 8–10 minutes. Toss in pasta and cooking liquid; cook until sauce coats pasta, about 2 minutes. Toss in chopped tender herbs (such as flat-leaf parsley, tarragon, and/or chives).
Cook 1/2 pound spaghetti; drain, reserving 1/2 cup cooking liquid. Meanwhile, heat 

In [75]:

answer = ask_question("Fish with alcohol")
print(f"Similarity Score: {answer[0]}, {answer[1]}")

Similarity Score: 0.7521979154412288, Recipe: Spaghetti with Tomatoes and Anchovy Butter - Ingredients: 1/2 pound spaghetti;4 tablespoons unsalted butter;2 anchovy fillets;4 sliced garlic cloves;2 pounds quartered medium tomatoes;Salt;Pepper;Chopped tender herbs (such as flat-leaf parsley, tarragon, and/or chives)- Instructions: Cook 1/2 pound spaghetti; drain, reserving 1/2 cup cooking liquid. Meanwhile, heat 4 tablespoons unsalted butter in a large skillet over medium heat. Cook 2 anchovy fillets and 4 sliced garlic cloves, stirring often, until anchovies are broken down and garlic is soft, about 4 minutes. Add 2 pounds quartered medium tomatoes; season with salt and pepper and cook, stirring occasionally, until falling apart, 8–10 minutes. Toss in pasta and cooking liquid; cook until sauce coats pasta, about 2 minutes. Toss in chopped tender herbs (such as flat-leaf parsley, tarragon, and/or chives).
Cook 1/2 pound spaghetti; drain, reserving 1/2 cup cooking liquid. Meanwhile, heat 

In [76]:

answer = ask_question("Meat with alcohol")
print(f"Similarity Score: {answer[0]}, {answer[1]}")

Similarity Score: 0.7765437897896662, Recipe: Belgian Ale-Braised Brisket - Ingredients: 1 4-pound piece flat-cut beef brisket, untrimmed;Kosher salt;1/4 cup Dijon mustard;1/4 cup (packed) dark brown sugar;1 tablespoon grated peeled ginger;2 tablespoons bacon fat or vegetable oil;2 medium yellow onions, thinly sliced;1/4 cup all-purpose flour;1 bay leaf;1 750-milliliter bottle Belgian-style tripel ale;4 cups beef stock or low-sodium chicken broth- Instructions: Season brisket with salt. Wrap tightly in plastic and chill at least 8 hours. Let brisket sit at room temperature 1 hour. Preheat oven to 400°F. Combine mustard, brown sugar, and ginger in a small bowl. Unwrap brisket, place on a wire rack set inside a large rimmed baking sheet, and rub mustard mixture all over brisket. Roast until top is nicely browned, 30–40 minutes. Remove brisket from oven and reduce oven temperature to 300°F. Meanwhile, heat bacon fat in a large heavy ovenproof pot over medium heat. Add onions; season with 