In [52]:
import psycopg2
import numpy as np
from langchain.embeddings import OpenAIEmbeddings

# 구조화된 문자열

In [53]:
# texts for content
texts = [
    "Type: Desktop, OS: Ubuntu, GPU: NVIDIA, CPU: AMD, RAM: 64GB, SSD: 2TB",
    "Type: Desktop, OS: Linux Mint, GPU: NVIDIA, CPU: AMD, RAM: 64GB, SSD: 2TB",
    "Type: Desktop, OS: Manjaro, GPU: NVIDIA, CPU: AMD, RAM: 64GB, SSD: 2TB",
    "Type: Desktop, OS: Windows, GPU: NVIDIA, CPU: AMD, RAM: 64GB, SSD: 2TB",
    "Type: Desktop, OS: Fedora, GPU: AMD, CPU: AMD, RAM: 16GB, SSD: 1TB",
    "Type: Desktop, OS: Windows, GPU: NVIDIA, CPU: AMD, RAM: 16GB, SSD: 1TB",
    "Type: Desktop, OS: Ubuntu, GPU: AMD, CPU: AMD, RAM: 32GB, SSD: 1TB",
    "Type: Laptop, OS: Windows, GPU: NVIDIA, CPU: Intel, RAM: 16GB, SSD: 1TB",
    "Type: Laptop, OS: Ubuntu, GPU: AMD, CPU: AMD, RAM: 16GB, SSD: 500GB",
    "Type: Laptop, OS: Mac OS, GPU: NVIDIA, CPU: AMD, RAM: 16GB, SSD: 1TB"
]

def show(rows):
    for row in rows:
        print(row)

In [57]:
# OpenAI 임베딩 사용
import os
os.environ['OPENAI_API_KEY'] = open('API_KEY', 'r').read()
embeddings = OpenAIEmbeddings()

In [55]:
# embeddings_list에 텍스트의 순서와 동일하게 임베딩을 생성해서 저장
embeddings_list = []

for text in texts:
    embeddings_list.append(embeddings.embed_query(text))

emb_dim = len(embeddings_list[0])

# PG에 연결
conn = psycopg2.connect(host='192.168.0.47', dbname='postgres', user='postgres', password='postgres1016', port=55432)
cursor = conn.cursor()

# items라는 이름의 테이블 생성
# embedding vector열은 emb_dim 차원의 벡터
cursor.execute("CREATE TABLE IF NOT EXISTS items (id serial primary key, content text, embedding vector({}));".format(emb_dim))

# content와 embedding을 짝지워 INSERT
for i in range(len(embeddings_list)):
    content= texts[i]
    embedding = embeddings_list[i]
    cursor.execute("INSERT INTO items (content, embedding) VALUES (%s, %s)", (content, embedding))

# PG에 커밋
conn.commit()

In [56]:
# 자연어 쿼리와 임베딩 벡터
query_text = "Type: Desktop, OS: Arch Linux, GPU: NVIDA, CPU: AMD, RAM: 64GB, SSD: 2TB"
query_embedding = embeddings.embed_query(query_text)

# query_embedding과 L2거리 기준으로 유사한 행을 5개 찾아서 반환
cursor.execute("""SELECT id, content
FROM items
ORDER BY embedding <-> %s::vector
LIMIT 5
""", (query_embedding,))

# results에 쿼리 결과를 저장하고 show()를 통해 확인
results = cursor.fetchall()
show(results)

# PG 연결 해제
cursor.close()
conn.close()

(23, 'Type: Desktop, OS: Manjaro, GPU: NVIDIA, CPU: AMD, RAM: 64GB, SSD: 2TB')
(3, 'Type: Desktop, OS: Manjaro, GPU: NVIDIA, CPU: AMD, RAM: 64GB, SSD: 2TB')
(13, 'Type: Desktop, OS: Manjaro, GPU: NVIDIA, CPU: AMD, RAM: 64GB, SSD: 2TB')
(11, 'Type: Desktop, OS: Ubuntu, GPU: NVIDIA, CPU: AMD, RAM: 64GB, SSD: 2TB')
(1, 'Type: Desktop, OS: Ubuntu, GPU: NVIDIA, CPU: AMD, RAM: 64GB, SSD: 2TB')
