In [1]:
import os
import openai
import pandas as pd
from tqdm import tqdm
from openai import OpenAI, AzureOpenAI
from dotenv import load_dotenv
import qa_package.dataclasses.orm as d
from sqlalchemy.engine import Engine, create_engine
from sqlalchemy.orm import Session, sessionmaker
from sqlalchemy.sql import select

load_dotenv()

API_BASE = os.getenv('API_BASE')
API_KEY = os.getenv('API_KEY')
API_VERSION = os.getenv('API_VERSION')
CHAT_DEPLOYMENT_NAME = os.getenv('CHAT_DEPLOYMENT_NAME')
EMBEDDING_DEPLOYMENT_NAME = os.getenv('EMBEDDING_DEPLOYMENT_NAME')

db_url = "postgresql://postgres:postgres@localhost/postgres"
engine = create_engine(db_url)

CSV_FILE = "/Users/spare/Documents/data/articles.csv"
df = pd.read_csv(CSV_FILE)

client = AzureOpenAI(
    azure_endpoint=API_BASE,
    api_version=API_VERSION,
    api_key=API_KEY
)

def embed_docs(docs: list[str]) -> list[list[float]]:
    vec = client.embeddings.create(input=docs, model=EMBEDDING_DEPLOYMENT_NAME)
    return [tmp.embedding for tmp in vec.data]

In [2]:
TEST_QUERY = "Do you have the Howie shorts in stock in blue?"
VEC_QUERY = embed_docs([TEST_QUERY])

with Session(engine) as sess:
    res = sess.execute(
        select(d.record.id)\
            .order_by(d.record.factors.cosine_distance(VEC_QUERY[0]))\
            .limit(3)
    ).scalars().all()

In [3]:
res

[663463002, 717196001, 651242002]

In [6]:
df[df.article_id.isin(res)].detail_desc.tolist()

['Shorts in a cotton weave with an elasticated drawstring waist, side pockets and a welt back pocket.',
 'Shorts in sweatshirt fabric with striped ribbing and a drawstring at the waist, side pockets and a zipped back pocket.',
 'Short shorts in sweatshirt fabric with an elasticated drawstring waist, side pockets and slits in the sides.']