# lang_chain with FAISS vector db
Example by Joselin James.

In [None]:
%load_ext autoreload
%autoreload 2
from pathlib import Path
import sys

# If running from github repo, can use this:
sys.path.append(str(Path().cwd().parent.parent.parent.resolve()))

# Uncomment for more debugging printouts.
"""
import logging
root = logging.getLogger()
root.setLevel(logging.DEBUG)

handler = logging.StreamHandler(sys.stdout)
handler.setLevel(logging.DEBUG)
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
handler.setFormatter(formatter)
root.addHandler(handler)
"""
None

In [None]:
# ! pip install faiss-cpu

In [None]:
from trulens_eval.keys import check_keys
check_keys("OPENAI_API_KEY")

In [None]:
# Create a local FAISS db:

from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain.document_loaders import TextLoader
from langchain.document_loaders import UnstructuredMarkdownLoader

loader = UnstructuredMarkdownLoader("../../../README.md")
documents = loader.load()
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
docs = text_splitter.split_documents(documents)

embeddings = OpenAIEmbeddings()

db = FAISS.from_documents(docs, embeddings)
db.save_local("faiss_index")

In [None]:
# Example from Joselin James

from langchain.vectorstores.base import VectorStoreRetriever
from langchain.callbacks.manager import CallbackManagerForRetrieverRun
from langchain.schema import Document
from langchain.vectorstores import FAISS
from langchain.embeddings import OpenAIEmbeddings
from langchain.chat_models import ChatOpenAI
from langchain.chains import ConversationalRetrievalChain
from trulens_eval import Tru, TruChain
from trulens_eval import feedback, Feedback

from typing import List
import numpy as np

from trulens_eval import Select

class VectorStoreRetrieverWithScore(VectorStoreRetriever):

    def _get_relevant_documents(
        self, query: str, *, run_manager: CallbackManagerForRetrieverRun
    ) -> List[Document]:
        if self.search_type == "similarity":
            docs_and_scores = self.vectorstore.similarity_search_with_relevance_scores(
                query, **self.search_kwargs
            )

            print("From relevant doc in vec store")
            docs = []
            for doc, score in docs_and_scores:
                if score > 0.6:
                    doc.metadata["score"] = score
                    docs.append(doc)
        elif self.search_type == "mmr":
            docs = self.vectorstore.max_marginal_relevance_search(
                query, **self.search_kwargs
            )
        else:
            raise ValueError(f"search_type of {self.search_type} not allowed.")
        return docs


class FAISSWithScore(FAISS):

    def as_retriever(self) -> VectorStoreRetrieverWithScore:
        return VectorStoreRetrieverWithScore(
            vectorstore=self,
            search_type="similarity",
            search_kwargs={"k": 4},
        )


class FAISSStore:

    @staticmethod
    def load_vector_store():
        embeddings = OpenAIEmbeddings()
        faiss_store = FAISSWithScore.load_local("faiss_index", embeddings)
        print("Faiss vector DB loaded")
        return faiss_store


openai = feedback.OpenAI()

f_qs_relevance = Feedback(openai.qs_relevance).on_input().on(
    Select.Record.app.combine_docs_chain._call.args.inputs.input_documents[:].page_content
).aggregate(np.min)


def load_conversational_chain(vector_store):
    llm = ChatOpenAI(
        temperature=0,
        model_name="gpt-4",
    )
    retriever = vector_store.as_retriever()
    chain = ConversationalRetrievalChain.from_llm(
        llm, retriever, return_source_documents=True
    )
    
    tru = Tru()

    truchain = tru.Chain(
        chain,
        feedbacks=[f_qs_relevance],
        with_hugs=False
    )

    return truchain

In [None]:
vector_store = FAISSStore.load_vector_store()
chain = load_conversational_chain(vector_store)
ret, record = chain.call_with_record({"question": "What is trulens?", "chat_history":""})

In [None]:
ret

In [None]:
record.dict()

In [None]:
chain.print_instrumented()

In [None]:
proc = Tru().start_dashboard(force=True, _dev=Path.cwd().parent.parent.parent)