### 데이터베이스 연결 및 조작

In [1]:
import psycopg2

# 데이터베이스 연결
conn = psycopg2.connect(host='localhost', dbname='my_vec_db',user='jaesolshin',port=5432) # 연결객체

cursor = conn.cursor() # 작업객체

cursor.execute("CREATE TABLE IF NOT EXISTS test (id serial PRIMARY KEY, num integer, data varchar);") # 테이블 생성
cursor.execute("INSERT INTO test (num, data) VALUES (%s, %s)", (100, "abcdef")) # 데이터 삽입
cursor.execute("select * from test;") # 쿼리 실행

print(cursor.fetchall()) # 모든 행을 가져와 출력합니다. cursor.fetchone(), cursor.fetchone(3)

conn.commit()
conn.close()

[(1, 100, 'abcdef'), (2, 100, 'abcdef')]


In [2]:
conn = psycopg2.connect(host='localhost', dbname='my_vec_db',user='jaesolshin',port=5432) # 연결객체
cursor = conn.cursor()

cursor.execute("DROP TABLE IF EXISTS test;")

# conn.commit()
conn.close()

In [3]:
def show_items(rows):
    for row in rows:
        print(row)
    print("-"*50)

conn = psycopg2.connect(host='localhost', dbname='my_vec_db',user='jaesolshin',port=5432) # 연결객체
cursor = conn.cursor()

cursor.execute("SELECT * FROM items;")
show_items(cursor.fetchall())

cursor.execute("SELECT *, embedding <-> '[1,1,1]' AS L2 FROM items ORDER BY L2 LIMIT 5;")
show_items(cursor.fetchall())

conn.close()

(1, '[1,2,3]')
(2, '[4,5,6]')
--------------------------------------------------
(1, '[1,2,3]', 2.23606797749979)
(2, '[4,5,6]', 7.0710678118654755)
--------------------------------------------------


### 무작위 벡터 삽입

In [4]:
import numpy as np

# 벡터 생성
example_vectors = np.random.randn(10,5)
example_vectors

array([[-0.37861862, -2.04965323, -1.72553916,  1.27299132, -0.21074173],
       [ 0.1406984 , -0.43985981, -0.10925659, -1.22730744,  0.04562907],
       [-2.29232094,  0.47463031,  0.90942389,  2.13287989, -2.76614894],
       [-0.41944542,  1.00630069, -1.35412563, -0.10537534,  0.75007022],
       [ 0.17224844,  0.36601097,  0.66658565, -0.14565509,  0.28451986],
       [-0.46745956, -0.48037994, -1.67375843,  0.4214595 , -0.9116874 ],
       [-0.55323478,  0.38511212,  1.90185414, -0.19315131, -0.51533999],
       [-0.21200342,  0.35358292, -0.78109513,  1.50956701, -0.76630032],
       [-1.29078538,  0.66690273, -0.67548623,  0.50326409, -0.35064311],
       [ 1.08737223,  1.79720239, -0.81917937, -0.80967003, -0.32665767]])

In [5]:
import psycopg2

conn = psycopg2.connect(host='localhost', dbname='my_vec_db',user='jaesolshin',port=5432) # 연결객체
cursor = conn.cursor()

# 테이블 생성
cursor.execute("CREATE TABLE IF NOT EXISTS vectors (id bigserial PRIMARY KEY, embedding vector(5));")

# 벡터 삽입
for vector in example_vectors:
    cursor.execute(
        "INSERT INTO vectors (embedding) VALUES (%s)", 
        (vector.tolist(),)  # numpy 배열을 Python 리스트로 변환
    )

# 커밋
conn.commit()

In [6]:
# 삽입 확인
cursor.execute("SELECT * FROM vectors;")
results = cursor.fetchall()

print('\nAll Rows:')
show_items(results)


All Rows:
(1, '[-0.9562891,-0.759813,0.59435976,0.0077875503,-0.80438113]')
(2, '[-1.2348526,0.6862422,-0.6120079,0.49805138,0.008737438]')
(3, '[0.0240263,1.2761552,1.1962513,0.65458846,-1.3365945]')
(4, '[0.6180711,-0.10998694,0.44672072,0.6442783,1.2122089]')
(5, '[0.17362301,-0.08542972,-0.4284525,1.0409049,0.18013716]')
(6, '[0.4827253,-0.37992033,-1.4451149,-1.7272892,-0.76796126]')
(7, '[-0.20629711,0.055571597,-1.2165215,0.29449007,0.23211904]')
(8, '[-1.8292468,1.0811973,1.0158238,0.6767827,0.9377151]')
(9, '[-0.30108055,-0.12144249,0.42995632,-0.41742995,-0.61486197]')
(10, '[0.4043061,-0.6895977,0.7050093,0.9187715,0.9954731]')
(11, '[-0.37861863,-2.0496533,-1.7255392,1.2729913,-0.21074173]')
(12, '[0.1406984,-0.4398598,-0.109256595,-1.2273074,0.045629073]')
(13, '[-2.292321,0.4746303,0.9094239,2.13288,-2.766149]')
(14, '[-0.41944543,1.0063007,-1.3541256,-0.105375335,0.7500702]')
(15, '[0.17224844,0.36601096,0.6665856,-0.1456551,0.28451985]')
(16, '[-0.46745956,-0.48037994,

In [7]:
# 벡터 검색
test_vec = np.random.randn(1,5)
test_vec_list = test_vec.squeeze().tolist()

cursor.execute("SELECT * FROM vectors ORDER BY embedding <-> '{}' LIMIT 5".format(test_vec_list))
results = cursor.fetchall()

print('\nRetrieved Rows:')
show_items(results)

# 연결 종료
cursor.close()
conn.close()


Retrieved Rows:
(15, '[0.17224844,0.36601096,0.6665856,-0.1456551,0.28451985]')
(20, '[1.0873722,1.7972023,-0.81917936,-0.80967003,-0.32665768]')
(4, '[0.6180711,-0.10998694,0.44672072,0.6442783,1.2122089]')
(12, '[0.1406984,-0.4398598,-0.109256595,-1.2273074,0.045629073]')
(10, '[0.4043061,-0.6895977,0.7050093,0.9187715,0.9954731]')
--------------------------------------------------
