In [2]:
## Load Settings from .env file
from dotenv import find_dotenv, dotenv_values

# _ = load_dotenv(find_dotenv()) # read local .env file
config = dotenv_values(find_dotenv())

# debug
# print (config)

ATLAS_URI = config.get('ATLAS_URI')
OPENAI_API_KEY= config.get('OPENAI_API_KEY')

if not ATLAS_URI:
    raise Exception ("'ATLAS_URI' is not set.  Please set it above to continue...")

In [3]:
import openai
import pymongo

client = pymongo.MongoClient(ATLAS_URI)
db = client.PromptingPractice
collection = db.practices

openai.api_key = OPENAI_API_KEY

In [4]:
ATLAS_VECTOR_SEARCH_INDEX_NAME = "vector_index"
EMBEDDING_FIELD_NAME = "description_embedding"

In [5]:
model = "text-embedding-3-small"
def generate_embedding(text: str) -> list[float]:
    return openai.embeddings.create(input = [text], model=model).data[0].embedding


In [6]:
from pymongo import ReplaceOne

# Update the collection with the embeddings
requests = []

for doc in collection.find({'description':{"$exists": True}}).limit(500):
  doc[EMBEDDING_FIELD_NAME] = generate_embedding(doc['description'])
  requests.append(ReplaceOne({'_id': doc['_id']}, doc))

collection.bulk_write(requests)

BulkWriteResult({'writeErrors': [], 'writeConcernErrors': [], 'nInserted': 0, 'nUpserted': 0, 'nMatched': 2, 'nModified': 0, 'nRemoved': 0, 'upserted': []}, acknowledged=True)

In [7]:
def query_results(query, k):
  results = collection.aggregate([
    {
        '$vectorSearch': {
            "index": ATLAS_VECTOR_SEARCH_INDEX_NAME,
            "path": EMBEDDING_FIELD_NAME,
            "queryVector": generate_embedding(query),
            "numCandidates": 50,
            "limit": 5,
        }
    },
    {
          "$project": {
              '_id' : 1,
              'ex_problem' : 1,
              "search_score": { "$meta": "vectorSearchScore" }
        }
    }
    ])
  return results

In [8]:
query="sponge bob"
practices = query_results(query, 5)

for practice in practices:
    print(f'Search Score: {practice["search_score"]}\nPractice Problem: {practice["ex_problem"]}\n')

Search Score: 0.5516932010650635
Practice Problem: Please read the chat transcript between the user and the assistant above. What could be the system message for this conversation? Note that the model output can be slightly different every time you run a same prompt.

Search Score: 0.5375242829322815
Practice Problem: 

| Category       | Cost  | 
| -------------- | ----- | 
| Groceries      | $150  | 
| Utilities      | $75   | 
| Transportation | $100  | 
| Rent           | $1200 |
                    
You have a table in Excel like above, and you want to create a new column (with its value calculated automatically) so that the table looks like below. Write a prompt so that the LLM can teach you how to do it.
                    
| Category       | Cost  | Total  |
| -------------- | ----- | ------ |
| Groceries      | $150  | $1525  |
| Utilities      | $75   |        |
| Transportation | $100  |        |
| Rent           | $1200 |        |


