In [1]:
import dspy
import faiss
import numpy as np
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
from sentence_transformers import SentenceTransformer
from langchain.vectorstores import FAISS as LangchainFAISS
from langchain_community.vectorstores import FAISS
# from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_huggingface import HuggingFaceEmbeddings
import pandas as pd
from dotenv import load_dotenv
import os

In [2]:
class CustomFAISSRetriever(dspy.Retrieve):
    def load_index(self, idx_path=None, model_name:str=None):
        try:
            index = faiss.read_index(idx_path)
            print(f"成功載入FAISS索引，包含 {index.ntotal} 個向量")
            return index
        except Exception as e:
            print(f"索引載入失敗: {str(e)}")
            return None

    def load_local_db(self, local_db_path=None, embeddings=None):
        try:
            db = LangchainFAISS.load_local(
                folder_path=local_db_path,
                embeddings=embeddings,
                allow_dangerous_deserialization=True
            )
            print(f"載入成功，共 {db.index.ntotal} 筆技術問答")
            return db
        except Exception as e:
            print(f"向量庫載入異常: {str(e)}")
            return None
            
    def __init__(self, faiss_index_path, vector_db_path, k=2):
        super().__init__()
        self.k = k
        # 使用同一個模型名稱
        self.model_name = "paraphrase-multilingual-MiniLM-L12-v2"
        
        # 初始化 embeddings
        self.embeddings = HuggingFaceEmbeddings(
            model_name=self.model_name
        )
        
        # 載入 FAISS 索引
        self.index = self.load_index(faiss_index_path)
        
        # 載入向量庫
        self.vector_db = self.load_local_db(vector_db_path, self.embeddings)
        
        # 使用相同的模型進行查詢編碼
        self.model = SentenceTransformer(self.model_name)

    def __call__(self, query):
        # 編碼查詢
        query_embedding = self.model.encode(
            query,
            convert_to_tensor=False,
            show_progress_bar=False  # 對單一查詢關閉進度條
        )
        query_embedding = query_embedding.reshape(-1,1).T
        # query_embedding = query_embedding.cpu().numpy()
        query_embedding = query_embedding.astype(np.float32)
        
        # 搜索向量庫
        """
        question = input("Enter your query:")
        searchDocs = db.similarity_search(question)
        print(searchDocs[0].page_content)
        """
        # docs = self.vector_db.similarity_search_with_score(query_embedding, k=self.k)
        # docs = self.vector_db.similarity_search(query_embedding, k=self.k)
        distances, pos = self.index.search(query_embedding, k=self.k)
        # print(distance)
        # print(pos)
        # return the pos for retrieving data from answers
        return pos, distances;

In [3]:
# load_dotenv();
# model_name = os.getenv("SENTENCE_MODEL");
# print(f"mode is {model_name}");
# faiss_index_path = os.getenv("INDEX_PATH")
# print(f"Iindex is {faiss_index_path}");
# faiss_db_path = os.getenv("FAISS_DB")
# print(f"DB is {faiss_db_path}")

In [4]:
retriever=CustomFAISSRetriever(faiss_index_path="./db/20250307v1/dqe_learn_vdb_index.faiss", vector_db_path="./db/20250307v1/dqe_learn_vdb_faiss/")
if retriever:
    print("faiss retriever is created successfully");

成功載入FAISS索引，包含 28 個向量
載入成功，共 28 筆技術問答
faiss retriever is created successfully


In [10]:
# def search_similar_questions_by_vdb(retriever, question):
#     _docs= retriever(question)
#     print(f"{_docs[0].page_content}")
    # return pos, distances

In [11]:
# search_similar_questions_by_vdb(retriever, "电池鼓包的问题")

In [6]:
def search_similar_questions_by_index(retriever, question):
    pos, distances = retriever(question)
    return pos, distances

In [12]:
df = pd.read_csv("../../../../doc/deq_learn_refine2_correct.csv");

In [37]:
def do_query_by_index(query:str=None):
    _pos, _distances = search_similar_questions(retriever, query)
    print(f"問題: {query}")
    print("\nAI-Response:")
    count = 0
    for i in _pos[0]:
        count+=1;
        if i >= 0:
            print(f"第{count}筆回答：\n")
            print(f"模块:\n{df['模块'][i]}")
            print(f"严重度:\n{df['严重度'][i]}")
            print(f"问题现象描述:\n{df['问题现象描述'][i]}")
            print(f"原因分析:\n{df['原因分析'][i]}")
            print(f"改善对策:\n{df['改善对策'][i]}")
            print(f"经验萃取:\n{df['经验萃取'][i]}")
            print(f"评审后优化:\n{df['评审后优化'][i]}")
            print(f"评分:\n{df['评分'][i]}")
            print("=============================================================================================================")
        else:
            print(f"第{count}筆回答：\n")
            print("未找到资料")

In [35]:
question = "电池鼓包的问题"
do_query(query=question)

[[8.3733521e+00 3.4028235e+38]]
[[27 -1]]
問題: 电池鼓包的问题

AI-Response:
第1筆回答：

模块:
软件
严重度:
A
问题现象描述:
高温运行过程中电池不充电
原因分析:
电池在超过45℃时（电池反馈给EC的温度）有电池保护，不充电
改善对策:
经确认电池规格为60℃温度保护，与EC确认更改EC设置与电池保持一致，即按照60℃温度保护设定
经验萃取:
EC设定电池保护温度时需要确认与电池规格书一致
评审后优化:
EC设定电池保护温度时需要确认与电池规格书一致
评分:
4.0
第2筆回答：

未找到资料
