# LangChain with FAISS Vector DB

Example by Joselin James. Example was adapted to use README.md as the source of documents in the DB.

In [None]:
# Extra packages may be necessary:
# ! pip install faiss-cpu

In [None]:
import os
os.environ['OPENAI_API_KEY'] = "fill in"

In [None]:
from typing import List

from langchain.callbacks.manager import CallbackManagerForRetrieverRun
from langchain.chains import ConversationalRetrievalChain
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import UnstructuredMarkdownLoader
from langchain.embeddings import OpenAIEmbeddings
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.schema import Document
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain.vectorstores.base import VectorStoreRetriever
import numpy as np

from trulens_eval import feedback
from trulens_eval import Feedback
from trulens_eval import Select
from trulens_eval import Tru

In [None]:
# Create a local FAISS Vector DB based on README.md .
loader = UnstructuredMarkdownLoader("README.md")
documents = loader.load()

text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
docs = text_splitter.split_documents(documents)

embeddings = OpenAIEmbeddings()
db = FAISS.from_documents(docs, embeddings)

# Save it.
db.save_local("faiss_index")

In [None]:
# Create the example app.

class VectorStoreRetrieverWithScore(VectorStoreRetriever):

    def _get_relevant_documents(
        self, query: str, *, run_manager: CallbackManagerForRetrieverRun
    ) -> List[Document]:
        if self.search_type == "similarity":
            docs_and_scores = self.vectorstore.similarity_search_with_relevance_scores(
                query, **self.search_kwargs
            )

            print("From relevant doc in vec store")
            docs = []
            for doc, score in docs_and_scores:
                if score > 0.6:
                    doc.metadata["score"] = score
                    docs.append(doc)
        elif self.search_type == "mmr":
            docs = self.vectorstore.max_marginal_relevance_search(
                query, **self.search_kwargs
            )
        else:
            raise ValueError(f"search_type of {self.search_type} not allowed.")
        return docs


class FAISSWithScore(FAISS):

    def as_retriever(self) -> VectorStoreRetrieverWithScore:
        return VectorStoreRetrieverWithScore(
            vectorstore=self,
            search_type="similarity",
            search_kwargs={"k": 4},
        )


class FAISSStore:

    @staticmethod
    def load_vector_store():
        embeddings = OpenAIEmbeddings()
        faiss_store = FAISSWithScore.load_local("faiss_index", embeddings)
        print("Faiss vector DB loaded")
        return faiss_store

# Create a feedback function.
openai = feedback.OpenAI()

f_qs_relevance = Feedback(openai.qs_relevance).on_input().on(
    Select.Record.app.combine_docs_chain._call.args.inputs.input_documents[:].page_content
).aggregate(np.min)


# Bring it all together.
def load_conversational_chain(vector_store):
    llm = ChatOpenAI(
        temperature=0,
        model_name="gpt-4",
    )
    retriever = vector_store.as_retriever()
    chain = ConversationalRetrievalChain.from_llm(
        llm, retriever, return_source_documents=True
    )
    
    tru = Tru()

    truchain = tru.Chain(
        chain,
        feedbacks=[f_qs_relevance],
        with_hugs=False
    )

    return truchain

In [None]:
# Run example:
vector_store = FAISSStore.load_vector_store()
chain = load_conversational_chain(vector_store)

ret, record = chain.call_with_record({"question": "What is trulens?", "chat_history":""})

In [None]:
# Check result.
ret

In [None]:
# Check record.
record.dict()

In [None]:
# Check that components of the app have been instrumented despite various
# subclasses used.
chain.print_instrumented()

In [None]:
# Start dashboard to inspect records.
proc = Tru().start_dashboard()