In [23]:
import dspy
import faiss
import numpy as np
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
from sentence_transformers import SentenceTransformer
from langchain.vectorstores import FAISS as LangchainFAISS
from langchain_huggingface import HuggingFaceEmbeddings
import pandas as pd
# from langchain.embeddings import HuggingFaceEmbeddings
# from langchain_community.vectorstores import FAISS

In [4]:
# !pip install dspy-ai transformers torch faiss-cpu sentence-transformers langchain

In [42]:
class CustomFAISSRetriever(dspy.Retrieve):
    def load_index(self, idx_path=None):
        try:
            index = faiss.read_index(idx_path)
            print(f"成功載入FAISS索引，包含 {index.ntotal} 個向量")
            return index
        except Exception as e:
            print(f"索引載入失敗: {str(e)}")
            return None

    def load_local_db(self, local_db_path=None, embeddings=None):
        try:
            db = LangchainFAISS.load_local(
                folder_path=local_db_path,
                embeddings=embeddings,
                allow_dangerous_deserialization=True
            )
            print(f"載入成功，共 {db.index.ntotal} 筆技術問答")
            return db
        except Exception as e:
            print(f"向量庫載入異常: {str(e)}")
            return None
            
    def __init__(self, faiss_index_path, vector_db_path, k=2):
        super().__init__()
        self.k = k
        # 使用同一個模型名稱
        self.model_name = "sentence-transformers/paraphrase-multilingual-mpnet-base-v2"
        
        # 初始化 embeddings
        self.embeddings = HuggingFaceEmbeddings(
            model_name=self.model_name
        )
        
        # 載入 FAISS 索引
        self.index = self.load_index(faiss_index_path)
        
        # 載入向量庫
        # self.vector_db = self.load_local_db(vector_db_path, self.embeddings)
        
        # 使用相同的模型進行查詢編碼
        self.model = SentenceTransformer(self.model_name)
    
    def __call__(self, query):
        # 編碼查詢
        query_embedding = self.model.encode(
            query,
            convert_to_tensor=False,
            show_progress_bar=False  # 對單一查詢關閉進度條
        )
        
        # 轉換為 numpy array 並確保類型為 float32
        # if torch.is_tensor(query_embedding):
        #     query_embedding = query_embedding.cpu().numpy()
        query_embedding = query_embedding.reshape(-1,1).T
        
        # query_embedding = query_embedding.cpu().numpy()
        query_embedding = query_embedding.astype(np.float32)
        
        # 搜索向量庫
        # docs = self.vector_db.similarity_search_with_score(query_embedding, k=self.k)
        distance,pos = self.index.search(query_embedding, k=self.k)
        print(distance)
        print(pos)
        return pos;
        # 格式化結果
        # passages = []
        # # for doc, score in docs:
        # for doc in docs:
        #     context = doc.page_content
        #     metadata = doc.metadata
        #     formatted_context = f"{context}\nSource: {metadata['source']}\nLast Updated: {metadata['last_updated']}"
        #     passages.append(formatted_context)
        
        # return dspy.Prediction(passages=passages)

# 其餘函數保持不變
def setup_retriever(faiss_index_path, vector_db_path):
    retriever = CustomFAISSRetriever(faiss_index_path, vector_db_path)
    return retriever

def search_similar_questions(retriever, question):
    results = retriever(question)
    return results
    # return results.passages

qKey = "Question"
qAns = "Answer"
df = pd.read_csv("./data/qa.csv");
df_clean = df.groupby(qKey)[qAns].apply(lambda x: '\n'.join(x.unique())).reset_index()
# print(df[qAns][10])
def main():
    
    retriever = setup_retriever(
        faiss_index_path="./db2/q_index.faiss",
        vector_db_path="./db2/qa_vecdb_faiss_new/"
    )
    
    question = "How can we fix the BSOD problem when flashing the BIOS under the OS?"
    _pos = search_similar_questions(retriever, question)
    
    print(f"問題: {question}")
    print("\n相關文件:")
    for i in _pos[0]:
        print(f"{i}:\n{df_clean[qAns][i]}")
    # for i, result in enumerate(results, 1):
    #     print(f"\n--- 文件 {i} ---")
    #     print(result)

# if __name__ == "__main__":
#     main()


In [43]:
main()

成功載入FAISS索引，包含 145 個向量
[[3.4226779e-12 1.4949589e+00]]
[[56 47]]
問題: How can we fix the BSOD problem when flashing the BIOS under the OS?

相關文件:
56:
Modify the following sections in the original code:

# ELINK
#    Name        = "EmdoorSmiFlashPreUpdate"
#    Parent      = "SMIFlashPreUpdateList"
#    InvokeOrder = AfterParent
#    Token       = "EmdoorSmmLib_INF_SUPPORT" = "1"
# End

# ELINK
#    Name        = "EmdoorSmiFlashEndUpdate"
#    Parent      = "SMIFlashEndUpdateList"
#    InvokeOrder = AfterParent
#    Token       = "EmdoorSmmLib_INF_SUPPORT" = "1"
# End
47:
Revise the code to configure the Flash BIOS update sequence to prevent BSOD issues occurring under the operating system.
