In [1]:
import psycopg2
import numpy as np
from langchain.embeddings import OpenAIEmbeddings

In [2]:
# texts for content
texts = [
    "Type: Desktop, OS: Ubuntu, GPU: NVIDIA, CPU: AMD, RAM: 64GB, SSD: 2TB",
    "Type: Desktop, OS: Linux Mint, GPU: NVIDIA, CPU: AMD, RAM: 64GB, SSD: 2TB",
    "Type: Desktop, OS: Manjaro, GPU: NVIDIA, CPU: AMD, RAM: 64GB, SSD: 2TB",
    "Type: Desktop, OS: Windows, GPU: NVIDIA, CPU: AMD, RAM: 64GB, SSD: 2TB",
    "Type: Desktop, OS: Fedora, GPU: AMD, CPU: AMD, RAM: 16GB, SSD: 1TB",
    "Type: Desktop, OS: Windows, GPU: NVIDIA, CPU: AMD, RAM: 16GB, SSD: 1TB",
    "Type: Desktop, OS: Ubuntu, GPU: AMD, CPU: AMD, RAM: 32GB, SSD: 1TB",
    "Type: Laptop, OS: Windows, GPU: NVIDIA, CPU: Intel, RAM: 16GB, SSD: 1TB",
    "Type: Laptop, OS: Ubuntu, GPU: AMD, CPU: AMD, RAM: 16GB, SSD: 500GB",
    "Type: Laptop, OS: Mac OS, GPU: NVIDIA, CPU: AMD, RAM: 16GB, SSD: 1TB"
]

def show_items(rows):
    for row in rows:
        print(row)
    print("-"*50)

### OpenAI Embedding

In [3]:
# OpenAI 임베딩 사용
import os
os.environ['OPENAI_API_KEY'] = open('API_KEY', 'r').read()
embeddings = OpenAIEmbeddings()

  embeddings = OpenAIEmbeddings()


In [4]:
# embeddings_list에 텍스트의 순서와 동일하게 임베딩을 생성해서 저장
embeddings_list = []

for text in texts:
    embeddings_list.append(embeddings.embed_query(text))

emb_dim = len(embeddings_list[0])

# PG에 연결
conn = psycopg2.connect(host='localhost', dbname='my_vec_db',user='jaesolshin',port=5432)
cursor = conn.cursor()

# computer_spec라는 이름의 테이블 생성
# embedding vector열은 emb_dim 차원의 벡터
cursor.execute("DROP TABLE IF EXISTS computer_spec;")
cursor.execute("CREATE TABLE IF NOT EXISTS computer_spec (id serial primary key, content text, embedding vector({}));".format(emb_dim))

# content와 embedding을 짝지워 INSERT
for content, embedding in zip(texts, embeddings_list):
    cursor.execute("INSERT INTO computer_spec (content, embedding) VALUES (%s, %s)", (content, embedding))

# PG에 커밋
conn.commit()

In [5]:

conn = psycopg2.connect(host='localhost', dbname='my_vec_db',user='jaesolshin',port=5432)
cursor = conn.cursor()


In [6]:
# 자연어 쿼리와 임베딩 벡터
query_text = "Redhat"
query_embedding = embeddings.embed_query(query_text)

# query_embedding과 L2거리 기준으로 유사한 행을 5개 찾아서 반환
cursor.execute("""SELECT id, content
FROM computer_spec
ORDER BY embedding <-> %s::vector
LIMIT 5
""", (query_embedding,))

# results에 쿼리 결과를 저장하고 show()를 통해 확인
results = cursor.fetchall()
show_items(results)

# PG 연결 해제
cursor.close()
conn.close()

(5, 'Type: Desktop, OS: Fedora, GPU: AMD, CPU: AMD, RAM: 16GB, SSD: 1TB')
(2, 'Type: Desktop, OS: Linux Mint, GPU: NVIDIA, CPU: AMD, RAM: 64GB, SSD: 2TB')
(9, 'Type: Laptop, OS: Ubuntu, GPU: AMD, CPU: AMD, RAM: 16GB, SSD: 500GB')
(3, 'Type: Desktop, OS: Manjaro, GPU: NVIDIA, CPU: AMD, RAM: 64GB, SSD: 2TB')
(7, 'Type: Desktop, OS: Ubuntu, GPU: AMD, CPU: AMD, RAM: 32GB, SSD: 1TB')
--------------------------------------------------


### SentenceTransformer Embedding

In [7]:
import psycopg2
from sentence_transformers import SentenceTransformer

# embedding model 정의
model = SentenceTransformer('all-MiniLM-L6-v2')

# 텍스트 임베딩
embeddings = model.encode(texts).tolist()
emb_dim2 = len(embeddings[0])
print(emb_dim2)

  from .autonotebook import tqdm as notebook_tqdm


384


In [8]:
# PG 연결
conn = psycopg2.connect(host='localhost', dbname='my_vec_db',user='jaesolshin',port=5432) 
cursor = conn.cursor()

# embedding2 열 추가
cursor.execute("ALTER TABLE computer_spec ADD COLUMN IF NOT EXISTS embedding2 vector(%s)", (emb_dim2,))

# content와 embedding2를 짝지워 INSERT
for content, embedding in zip(texts, embeddings):
    cursor.execute('INSERT INTO computer_spec (content, embedding2) VALUES (%s, %s)', (content, embedding))

In [9]:
# 자연어 쿼리와 임베딩 벡터
query_text = "Type: Desktop, OS: Arch Linux, GPU: NVIDA, CPU: AMD, RAM: 64GB, SSD: 2TB"
query_embedding2 = model.encode(texts).tolist()[0]

# query_embedding과 L2거리 기준으로 유사한 행을 5개 찾아서 반환
cursor.execute("""SELECT id, content
FROM computer_spec
ORDER BY embedding2 <-> %s::vector
LIMIT 5
""", (query_embedding2,))

# results에 쿼리 결과를 저장하고 show()를 통해 확인
results = cursor.fetchall()
show_items(results)

(11, 'Type: Desktop, OS: Ubuntu, GPU: NVIDIA, CPU: AMD, RAM: 64GB, SSD: 2TB')
(14, 'Type: Desktop, OS: Windows, GPU: NVIDIA, CPU: AMD, RAM: 64GB, SSD: 2TB')
(16, 'Type: Desktop, OS: Windows, GPU: NVIDIA, CPU: AMD, RAM: 16GB, SSD: 1TB')
(17, 'Type: Desktop, OS: Ubuntu, GPU: AMD, CPU: AMD, RAM: 32GB, SSD: 1TB')
(18, 'Type: Laptop, OS: Windows, GPU: NVIDIA, CPU: Intel, RAM: 16GB, SSD: 1TB')
--------------------------------------------------


In [10]:
# 
document_id = 1
cursor.execute("""SELECT content 
FROM computer_spec
WHERE id != %(id)s 
ORDER BY
    embedding2 
    <=> (
        SELECT embedding2
        FROM computer_spec 
        WHERE id = %(id)s
        LIMIT 1
        ) 
LIMIT 5""", {'id': document_id})
               
results = cursor.fetchall()
show_items(results)

# PG 연결 해제
cursor.close()
conn.close()

('Type: Desktop, OS: Manjaro, GPU: NVIDIA, CPU: AMD, RAM: 64GB, SSD: 2TB',)
('Type: Desktop, OS: Windows, GPU: NVIDIA, CPU: AMD, RAM: 64GB, SSD: 2TB',)
('Type: Desktop, OS: Fedora, GPU: AMD, CPU: AMD, RAM: 16GB, SSD: 1TB',)
('Type: Desktop, OS: Windows, GPU: NVIDIA, CPU: AMD, RAM: 16GB, SSD: 1TB',)
('Type: Desktop, OS: Linux Mint, GPU: NVIDIA, CPU: AMD, RAM: 64GB, SSD: 2TB',)
--------------------------------------------------
