In [16]:
from langchain.document_loaders import PyPDFLoader
from langchain.vectorstores import Chroma
from langchain.embeddings import OpenAIEmbeddings

# Loader
m1 = PyPDFLoader("./doc/marketing-1.pdf")
m2 = PyPDFLoader("./doc/marketing-2.pdf")

coke1 = PyPDFLoader("./doc/500067-PDF-ENG.pdf")

# Split and load documents
from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(chunk_size = 1000, chunk_overlap = 0)
splits = text_splitter.split_documents(m1.load())
splits2 = text_splitter.split_documents(m2.load())

coke = text_splitter.split_documents(coke1.load())

for i in splits2:
    splits.append(i)

# Retrievers
marketing_retriever = Chroma.from_documents(documents=splits,embedding=OpenAIEmbeddings()).as_retriever()
coke_retriever = Chroma.from_documents(documents=coke,embedding=OpenAIEmbeddings()).as_retriever()

retriever_infos = [
    {
        "name": "marketing",
        "description": "Good for answering questions related to marketing strategy",
        "retriever": marketing_retriever
    },
    {
        "name": "coca cola marketing strategy", 
        "description": "Good for answering questions related to marketing strategy by example of Coca Cola",
        "retriever": coke_retriever
    }
]

In [12]:
len(coke)

128

In [13]:
from langchain.chains.router import MultiRetrievalQAChain
from langchain.llms import OpenAI

chain = MultiRetrievalQAChain.from_retrievers(OpenAI(), retriever_infos, verbose=True)

In [17]:
print(chain.run("What is the marketing strategy of Coca Cola?"))



[1m> Entering new MultiRetrievalQAChain chain...[0m




coca cola marketing strategy: {'query': 'What is the marketing strategy of Coca Cola?'}
[1m> Finished chain.[0m
 Coca Cola's marketing strategy in the 1940s and 1950s was to associate the brand with the American way of life and the idea of taking a "pause" to refresh. They used print and television ads to spread their message, and enlisted Eddie Fischer as a spokesperson.


In [None]:
# Embed and store splits

from langchain.vectorstores import Chroma
from langchain.embeddings import OpenAIEmbeddings
vectorstore = Chroma.from_documents(documents=splits,embedding=OpenAIEmbeddings())
retriever = vectorstore.as_retriever()

In [None]:
# Prompt 
# https://smith.langchain.com/hub/rlm/rag-prompt

from langchain import hub
rag_prompt = hub.pull("rlm/rag-prompt")

In [None]:
# LLM

from langchain.chat_models import ChatOpenAI
llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)

In [None]:
# RAG chain 

from langchain.schema.runnable import RunnablePassthrough
rag_chain = (
    {"context": retriever, "question": RunnablePassthrough()} 
    | rag_prompt 
    | llm 
)

In [None]:
question = "what is marketing?"
docs = vectorstore.similarity_search(question)
docs


In [None]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(
    # Set a really small chunk size, just to show.
    chunk_size = 100,
    chunk_overlap  = 20,
    length_function = len,
    add_start_index = True,
)

In [None]:
from langchain.embeddings import OpenAIEmbeddings
from os import getenv


embeddings_model = OpenAIEmbeddings(openai_api_key=getenv("OPENAI_API_KEY"))


In [None]:
from langchain.llms import OpenAI
from langchain.chains import AnalyzeDocumentChain


llm = OpenAI(temperature=0)

In [None]:
from langchain.chains.question_answering import load_qa_chain
qa_chain = load_qa_chain(llm, chain_type="map_reduce")

In [None]:
qa_document_chain = AnalyzeDocumentChain(combine_docs_chain=qa_chain)

In [None]:
text =[]
for i in pages[:10]:
    text.append(i.page_content)

text

In [None]:
import os
import chromadb
from langchain.vectorstores import Chroma
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.document_transformers import (
    LongContextReorder,
)
from langchain.chains import StuffDocumentsChain, LLMChain
from langchain.prompts import PromptTemplate
from langchain.llms import OpenAI

embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

retriever = Chroma.from_texts(text, embedding=embeddings).as_retriever(
    search_kwargs={"k": 10}
)

In [None]:
query = "What can you tell me about branding?"

docs = retriever.get_relevant_documents(query)
docs

In [None]:
# Reorder the documents:
# Less relevant document will be at the middle of the list and more
# relevant elements at beginning / end.
reordering = LongContextReorder()
reordered_docs = reordering.transform_documents(docs)

# Confirm that the 4 relevant documents are at beginning and end.
reordered_docs[:3]


In [None]:
# We prepare and run a custom Stuff chain with reordered docs as context.

# Override prompts
document_prompt = PromptTemplate(
    input_variables=["page_content"], template="{page_content}"
)
document_variable_name = "context"
llm = OpenAI()
stuff_prompt_override = """Given this text extracts:
-----
{context}
-----
Please answer the following question:
{query}"""
prompt = PromptTemplate(
    template=stuff_prompt_override, input_variables=["context", "query"]
)

# Instantiate the chain
llm_chain = LLMChain(llm=llm, prompt=prompt)
chain = StuffDocumentsChain(
    llm_chain=llm_chain,
    document_prompt=document_prompt,
    document_variable_name=document_variable_name,
)
chain.run(input_documents=reordered_docs[:3], query=query)

In [None]:
qa_document_chain.run(input_document=pages[1].page_content, question="So how we could brand our bussiness?")
