In [27]:
from pinecone import Pinecone,ServerlessSpec
import os
import dotenv
import time
dotenv.load_dotenv()
from openai import OpenAI
import pandas as pd

In [4]:
token = os.getenv("RUNPOD_TOKEN")
open_ai_base_url = os.getenv("RUNPOD_EMBEDDING_URL")
model_name = os.getenv("MODEL_NAME")
Pinecone_api_key = os.getenv("PINECONE_API_KEY")
index_name = os.getenv("PINECONE_INDEX_NAME")

In [5]:
pc = Pinecone(api_key=Pinecone_api_key)
client = OpenAI(
    api_key=token,
    base_url=open_ai_base_url
)

# Try out embedding

In [6]:
output= client.embeddings.create(input = ["Hello world!"], model = model_name)
embeddings = output.data[0].embedding
print(embeddings)

[-0.0034090846311300993, -0.011942288838326931, 0.0417560413479805, -0.03821532428264618, 0.02428138256072998, 0.013712647370994091, 0.010973170399665833, 0.0481049120426178, 0.02124430239200592, 0.0142391761764884, -0.0030962196178734303, -0.03845951333642006, 0.00943173747509718, 0.04923427850008011, 0.038154277950525284, 0.022419452667236328, 0.035346124321222305, -0.013048762455582619, -0.1088465228676796, -0.020649094134569168, 0.10616046190261841, 0.04743339866399765, -0.02048121578991413, -0.04987527057528496, 0.0010606888681650162, -0.015276972204446793, -0.014643611386418343, 0.035498738288879395, -0.001640633912757039, -0.17605909705162048, -0.013804217800498009, -0.011927027255296707, 0.09682029485702515, -0.010431379079818726, 0.03757433220744133, -0.018237745389342308, -0.0025963985826820135, 0.047769155353307724, -0.04456419497728348, 0.008119230158627033, 0.041939180344343185, -0.012537495233118534, -0.0015004170127213001, -0.009607247076928616, 0.014063666574656963, -0.

In [7]:
len(embeddings)

384

# Wrangel dataset 

In [8]:
df = pd.read_json("products/products.jsonl", lines=True)

In [9]:
df.head()

Unnamed: 0,name,category,description,ingredients,price,rating,image_path
0,Cappuccino,Coffee,A rich and creamy cappuccino made with freshly...,"[Espresso, Steamed Milk, Milk Foam]",4.5,4.7,cappuccino.jpg
1,Jumbo Savory Scone,Bakery,"Deliciously flaky and buttery, this jumbo savo...","[Flour, Butter, Cheese, Herbs, Baking Powder, ...",3.25,4.3,SavoryScone.webp
2,Latte,Coffee,"Smooth and creamy, our latte combines rich esp...","[Espresso, Steamed Milk, Milk Foam]",4.75,4.8,Latte.jpg
3,Chocolate Chip Biscotti,Bakery,"Crunchy and delightful, this chocolate chip bi...","[Flour, Sugar, Chocolate Chips, Eggs, Almonds,...",2.5,4.6,chocolat_biscotti.jpg
4,Espresso shot,Coffee,"A bold shot of rich espresso, our espresso is ...",[Espresso],2.0,4.9,Espresso_shot.webp


In [10]:
df['text'] =  df['name']+" : "+df['description'] + \
                " -- Ingredients: " + df['ingredients'].astype(str) + \
                " -- Price: " + df['price'].astype(str) + \
                " -- rating: " + df['rating'].astype(str) 

In [11]:
df['text'].head()

0    Cappuccino : A rich and creamy cappuccino made...
1    Jumbo Savory Scone : Deliciously flaky and but...
2    Latte : Smooth and creamy, our latte combines ...
3    Chocolate Chip Biscotti : Crunchy and delightf...
4    Espresso shot : A bold shot of rich espresso, ...
Name: text, dtype: object

In [12]:
texts = df['text'].tolist()

In [14]:
texts[:2]

["Cappuccino : A rich and creamy cappuccino made with freshly brewed espresso, steamed milk, and a frothy milk cap. This delightful drink offers a perfect balance of bold coffee flavor and smooth milk, making it an ideal companion for relaxing mornings or lively conversations. -- Ingredients: ['Espresso', 'Steamed Milk', 'Milk Foam'] -- Price: 4.5 -- rating: 4.7",
 "Jumbo Savory Scone : Deliciously flaky and buttery, this jumbo savory scone is filled with herbs and cheese, creating a mouthwatering experience. Perfect for a hearty snack or a light lunch, it pairs beautifully with your favorite coffee or tea. -- Ingredients: ['Flour', 'Butter', 'Cheese', 'Herbs', 'Baking Powder', 'Salt'] -- Price: 3.25 -- rating: 4.3"]

In [None]:
with open('products/Merry\'s_way_about_us.txt') as f: # Create introduce about the coffee shop
    Merry_way_about_section = f.read()
    
Merry_way_about_section = "Coffee shop Merry's Way about section: " + Merry_way_about_section
texts.append(Merry_way_about_section)

In [None]:
with open('products/menu_items_text.txt') as f: # Create introduce about coffeshop's menu items
    menue_items_text = f.read()
    
menue_items_text = "Menu Items: " + menue_items_text
texts.append(menue_items_text)

# Generate Embeddings

In [17]:
output = client.embeddings.create(input = texts, model = model_name)

In [18]:
embeddings = output.data

# Push data to database

In [26]:
pc.create_index(
    name=index_name,
    dimension=384, # Replace with your model dimensions
    metric="cosine", # Replace with your model metric
    spec=ServerlessSpec(
        cloud="aws",
        region="us-east-1"
    ) 
)

In [31]:
while not pc.describe_index(index_name).status['ready']:
    time.sleep(1)

index = pc.Index(index_name)

vectors = []
for text, e in zip(texts, embeddings):
    entry_id = text.split(":")[0].strip()
    vectors.append({
        "id": entry_id,
        "values": e.embedding,
        "metadata": {'text': text}
    })
    
index.upsert(
    vectors=vectors,
    namespace="ns1"
)

{'upserted_count': 20}

# Get closet documents

In [32]:
output = client.embeddings.create(input = ["Is Cappuccino lactose-free?"],model=model_name)
embeding = output.data[0].embedding

In [34]:
results = index.query(
    namespace="ns1",
    vector=embeding,
    top_k=3,
    include_values=False,
    include_metadata=True
)

print(results)

{'matches': [{'id': 'Cappuccino',
              'metadata': {'text': 'Cappuccino : A rich and creamy cappuccino '
                                   'made with freshly brewed espresso, steamed '
                                   'milk, and a frothy milk cap. This '
                                   'delightful drink offers a perfect balance '
                                   'of bold coffee flavor and smooth milk, '
                                   'making it an ideal companion for relaxing '
                                   'mornings or lively conversations. -- '
                                   "Ingredients: ['Espresso', 'Steamed Milk', "
                                   "'Milk Foam'] -- Price: 4.5 -- rating: 4.7"},
              'score': 0.734829128,
              'values': []},
             {'id': 'Sugar Free Vanilla syrup',
              'metadata': {'text': 'Sugar Free Vanilla syrup : Enjoy the sweet '
                                   'flavor of vanilla without the 