In [None]:
from dotenv import load_dotenv
import tiktoken
import openai
import pandas as pd
import os
import numpy as np
from openai.embeddings_utils import cosine_similarity, get_embedding

In [None]:
load_dotenv()
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") 
OPENAI_DEPLOYMENT_ENDPOINT = os.getenv("OPENAI_DEPLOYMENT_ENDPOINT")
OPENAI_DEPLOYMENT_NAME = os.getenv("OPENAI_DEPLOYMENT_NAME")
OPENAI_MODEL_NAME = os.getenv("OPENAI_MODEL_NAME")
OPENAI_EMBEDDING_DEPLOYMENT_NAME = os.getenv("OPENAI_EMBEDDING_DEPLOYMENT_NAME")
OPENAI_EMBEDDING_MODEL_NAME = os.getenv("OPENAI_EMBEDDING_MODEL_NAME")
OPENAI_DEPLOYMENT_VERSION = os.getenv("OPENAI_DEPLOYMENT_VERSION")

# Configure OpenAI API
openai.api_type = "azure"
openai.api_version = OPENAI_DEPLOYMENT_VERSION
openai.api_base = OPENAI_DEPLOYMENT_ENDPOINT
openai.api_key = OPENAI_API_KEY

#### Read CSV using pandas

In [None]:
input_datapath = "./data/recipes_onecol.csv"  
df = pd.read_csv(input_datapath)

### Using OpenAI model ada for embeddings

OpenAI offers one second-generation embedding model (denoted by -002 in the model ID) and 16 first-generation models (denoted by -001 in the model ID).
We recommend using text-embedding-ada-002 for nearly all use cases. It’s better, cheaper, and simpler to use. 

#### Initialize Embeddings model 

In [None]:
# embedding model parameters
# encoding for text-embedding-ada-002
embedding_encoding = "cl100k_base"  
# the maximum for text-embedding-ada-002 is 8191
max_tokens = 8000 
# the number of reviews to embed
top_n = 1000
encoding = tiktoken.get_encoding(embedding_encoding)

#### Count number of tokens in each row

In [None]:
# skip Recipes that are too long to embed > max_tokens
df["n_tokens"] = df.Recipe.apply(lambda x: len(encoding.encode(x)))
df = df[df.n_tokens <= max_tokens].tail(top_n)

#### Create embeddings

In [32]:
# This may take a few minutes
df["embedding"] = df.Recipe.apply(lambda x: get_embedding(x, engine=OPENAI_EMBEDDING_MODEL_NAME))
# save the file
df.to_csv("./data/recipes_onecol_with_embeddings.csv")

#### Read embeddings

In [None]:
datafile_path = "./data/recipes_onecol_with_embeddings.csv"
df = pd.read_csv(datafile_path)
# convert the string representation of the embedding to a numpy array
df["embedding"] = df.embedding.apply(eval).apply(np.array)

#### Define a function to ask a question and get an answer from GPT-3.5

In [None]:
def ask_question(question):
    n=1
    question_embedding = get_embedding(
        question,
        engine=OPENAI_EMBEDDING_DEPLOYMENT_NAME
    )
    df["similarity"] = df.embedding.apply(lambda x: cosine_similarity(x, question_embedding))

    results = (
        df.sort_values("similarity", ascending=False)
        .head(n)
    )
    answer =  ' '.join(results.Recipe.tolist()) 
    print("Answer:", answer)

#### Berries is similar to raspberries

In [None]:
ask_question("Chocolate and berries")

#### Bukgur instead of Bulgur

In [None]:
ask_question("Bukgur vegan recipe")

In [None]:
ask_question("Fish no alcohol")

In [None]:
ask_question("Fish with wine")

Answer: Recipe: Trout in Riesling - Ingredients: 2 tablespoons (1 oz) unsalted butter plus additional for greasing;1 medium carrot;1 celery rib;4 (6- to 8-oz) trout fillets with skin;1 1/2 teaspoons salt;1 teaspoon black pepper;4 medium shallots, cut lengthwise into 1/8-inch-thick slices (1 cup);2 fresh parsley sprigs;5 black peppercorns;1 1/2 cups water;1 1/2 cups dry Riesling;3/4 cup heavy cream;2 teaspoons cornstarch;2 teaspoons chopped fresh tarragon;1/2 teaspoon fresh lemon juice;Special equipment: an adjustable-blade slicer; tweezers or needlenose pliers- Instructions: Put oven rack in middle position and preheat oven to 375°F. Butter bottom and sides of a 13- by 9-inch glass baking dish. Cut carrot and celery lengthwise with slicer into very thin matchsticks (less than 1/8 inch wide and 2 1/2 inches long). Rinse trout and remove any pin bones with tweezers or needlenose pliers. Pat dry, then sprinkle 1/2 teaspoon each of salt and pepper on fish (flesh sides only). Arrange fillet

In [None]:
ask_question("Fish with alcohol")