# 初始化postgresql和pg_vector


In [None]:
import psycopg2
import os
import torch
# 按照clip: pip3 install git+https://github.com/openai/CLIP.git
import clip
import numpy as np
from PIL import Image

device = "cuda" if torch.cuda.is_available() else "cpu"
model, preprocess = clip.load("RN50x64", device=device)
model.eval()

In [None]:
dbconn = psycopg2.connect(
    host="127.0.0.1", user="postgres", password="root", port=5432, connect_timeout=10
)
dbconn.set_session(autocommit=True)

cur = dbconn.cursor()
cur.execute("CREATE EXTENSION IF NOT EXISTS vector;")
cur.execute("DROP TABLE IF EXISTS image_features;")
cur.execute(
    """CREATE TABLE IF NOT EXISTS image_features(
               id bigserial primary key, 
               image text, 
               feature vector(1024));"""
)


for root, dirs, files in os.walk("pic"):
    for file in files:
        file_path = os.path.join(root, file)
        image = preprocess(Image.open(file_path)).unsqueeze(0).to(device)
        feat = model.encode_image(image)
        feat_vector = feat[0].detach().cpu().numpy().tolist()
        print("file %s %s" % (file, feat_vector))
        cur.execute(
            """INSERT INTO image_features
                      (image, feature) 
                  VALUES(%s, %s);""",
            (file, feat_vector),
        )


cur.execute(
    """CREATE INDEX ON image_features 
               USING ivfflat (feature vector_ip_ops) WITH (lists = 100);"""
)
cur.execute("VACUUM ANALYZE image_features;")

cur.close()
dbconn.close()
print("Vector embeddings has been successfully loaded into PostgreSQL")

In [None]:
import matplotlib.pyplot as plt

dbconn = psycopg2.connect(
    host="127.0.0.1", user="postgres", password="root", port=5432, connect_timeout=10
)
dbconn.set_session(autocommit=True)
cur = dbconn.cursor()

while True:
    user_input = input("请输入：")
    if user_input == "quit":
        break

    text = clip.tokenize([user_input])
    search_feat = model.encode_text(text)[0]
    
    cur.execute(
        """SELECT image, feature <=> %s::vector  FROM image_features 
            ORDER BY 2 limit 5;""",
        (np.array(search_feat.detach()).tolist(),),
    )
    matching_result = [x for x in cur.fetchall()]
    print(matching_result)
    image_paths = [os.path.join("pic", image) for (image,_) in matching_result]
    fig, axes = plt.subplots(1, max(2, len(image_paths)), figsize=(10, 10))
    for i, image in enumerate(image_paths):
        image = Image.open(image)
        ax = axes[i]
        ax.text(0.5, 0.5, matching_result[i][1], ha="center", va="top", fontsize=6)
        ax.imshow(image)
        ax.axis("off")
    #axes[0].text(0.5, 0.5, user_input, ha="center", va="bottom", fontsize=12)
    plt.show()

In [None]:
import cv2

dbconn = psycopg2.connect(
    host="127.0.0.1", user="postgres", password="root", port=5432, connect_timeout=10
)
dbconn.set_session(autocommit=True)

cur = dbconn.cursor()
cur.execute("CREATE EXTENSION IF NOT EXISTS vector;")
cur.execute("DROP TABLE IF EXISTS vedio_features;")
cur.execute(
    """CREATE TABLE IF NOT EXISTS vedio_features(
               id bigserial primary key, 
               image bytea, 
               feature vector(1024));"""
)

# 打开视频文件
video = cv2.VideoCapture('vedio.mp4')

# 检查视频是否成功打开
if not video.isOpened():
    print("无法打开视频文件")
    exit()

# 逐帧读取视频并保存采样的帧图像
frame_count = 0
while True:
    # 读取视频的下一帧
    ret, frame = video.read()

    # 检查是否成功读取帧
    if not ret:
        break

    # 每隔一定帧数采样一帧图像
    if frame_count % 30 == 0:  # 每隔30帧采样一次
        # 保存采样的帧图像
        image = Image.fromarray(frame)
        image = image.resize((256,256))
        image_tensor = preprocess(image).unsqueeze(0).to(device)
        feat = model.encode_image(image_tensor).cpu()
        feat_vector = feat[0].detach().numpy().tolist()
        print(f"第{frame_count}帧：{feat}")
        cur.execute(
            """INSERT INTO vedio_features
                      (image, feature) 
                  VALUES(%s, %s);""",
            (image.tobytes(), feat_vector),
        )
        
        #image.save(f"frame_{frame_count}.jpg")
        #cv2.imwrite(f"frame_{frame_count}.jpg", frame)

    frame_count += 1

cur.execute(
    """CREATE INDEX ON vedio_features 
               USING ivfflat (feature vector_cosine_ops) WITH (lists = 100);"""
)
cur.execute("VACUUM ANALYZE vedio_features;")
# 释放视频对象
video.release()
cur.close()
dbconn.close()

In [None]:
import matplotlib.pyplot as plt

dbconn = psycopg2.connect(
    host="127.0.0.1", user="postgres", password="root", port=5432, connect_timeout=10
)
dbconn.set_session(autocommit=True)
cur = dbconn.cursor()

while True:
    user_input = input("请输入：")
    if user_input == "quit":
        break

    text = clip.tokenize([user_input])
    search_feat = model.encode_text(text)[0]
    
    cur.execute(
        """SELECT image   FROM vedio_features 
            ORDER BY feature <=> %s::vector limit 5;""",
        (np.array(search_feat.detach()).tolist(),),
    )
    for row in cur.fetchall():
        image_byte = row[0]
        image = Image.frombytes("RGB", (256, 256), image_byte)
        plt.imshow(image)
        plt.axis("off")  # 可选：关闭坐标轴
        plt.show()