In [None]:
pip install chromadb pypdf langchain_community

In [None]:
pip install langchain --upgrade

In [None]:
pip install -U langchain-openai

In [None]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.llms import OpenAI
from langchain.chains import RetrievalQA
from langchain.document_loaders import PyPDFLoader
from langchain.document_loaders import DirectoryLoader
from langchain.chat_models import ChatOpenAI
from langchain.chains import RetrievalQAWithSourcesChain
from langchain.memory import ConversationBufferMemory,ConversationSummaryBufferMemory, ConversationBufferWindowMemory, ChatMessageHistory
from langchain.chains import ConversationChain
from langchain.chains import ConversationalRetrievalChain
from langchain.chains import RetrievalQA
from langchain_openai import ChatOpenAI, OpenAIEmbeddings

In [None]:
import chromadb
from langchain.vectorstores import Chroma

In [None]:
import os
import openai
import sys
sys.path.append('../..')

from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv()) # read local .env file

openai.api_key  = os.environ['OPENAI_API_KEY']

In [None]:
# loader = TextLoader('single_text_file.txt')
loader = DirectoryLoader('PDFs/', glob="./*.pdf", loader_cls=PyPDFLoader)
docs = loader.load()

In [None]:
text_splitter = RecursiveCharacterTextSplitter(
                                               chunk_size=1000,
                                               chunk_overlap=200)

documents = text_splitter.split_documents(docs)

In [None]:
documents[0]

In [None]:
len(documents)

In [None]:
from langchain.embeddings.openai import OpenAIEmbeddings
embeddings=OpenAIEmbeddings(model="text-embedding-ada-002")

In [None]:
persist_directory = 'docs/'

In [None]:
vectordb = Chroma.from_documents(
    documents=documents,
    embedding=embeddings,
    persist_directory=persist_directory
)

In [None]:
print(vectordb._collection.count())

In [None]:
retriever = vectordb.as_retriever(search_type="similarity",search_kwargs={"k": 7})

In [None]:
llm = ChatOpenAI(model_name="gpt-3.5-turbo-1106", temperature=0.6, max_tokens=500)

In [None]:
from langchain.prompts import PromptTemplate

# Build prompt
template = """You are an assistant for question-answering tasks. \
Use the following pieces of retrieved context to answer the question. Answer the question using every piece of relevant context available\
Answer in about 200-300 words. If you don't find answer in context, just say that you don't know. If two questions are asked together, answer them in different paragraphs\
Context: {context}
Question: {question}
Helpful Answer:"""
QA_CHAIN_PROMPT = PromptTemplate.from_template(template)

In [None]:
qa_chain = RetrievalQA.from_chain_type(
    llm,
    retriever=retriever,
    return_source_documents=True,
    chain_type_kwargs={"prompt": QA_CHAIN_PROMPT}
)

In [None]:
import textwrap

def process_llm_response(llm_response):
    print(textwrap.fill(llm_response['result'], 100))
    print('\n\nSources:')

    # Keep track of already printed sources and page numbers
    printed_sources = set()
    printed_pages = set()

    for doc in llm_response['source_documents']:
        source = doc.metadata['source']
        page = doc.metadata['page']

        # Check if source and page have not been printed before
        if source not in printed_sources or page not in printed_pages:
            print("pdf name:", source, "page no:", page)
            # Add source and page to printed sets
            printed_sources.add(source)
            printed_pages.add(page)

In [None]:
result = qa_chain({"query": "what does traditional growth strategies focuses on?"})
process_llm_response(result)

In [None]:
result = qa_chain({"query": "what does traditional growth strategies focuses on in terms of economics. Also explain how inceptionNet is better than other models"})
process_llm_response(result)

In [None]:
result = qa_chain({"query": "How does shortcut connection addresses the problem of degradation as networks are made deeper"})
process_llm_response(result)

In [None]:
result = qa_chain({"query": "differenciate between MobileNet and InceptionV3 model?"})
process_llm_response(result)

In [None]:
result = qa_chain({"query": "differenciate between MobileNet and InceptionV3 and residual nets model?"})
process_llm_response(result)

In [None]:
result = qa_chain({"query": "explain the architecture of residual nets"})
process_llm_response(result)

In [None]:
result = qa_chain({"query": "How does MobileNetV2 improves the state of the art performance of mobile models"})
process_llm_response(result)

In [None]:
result = qa_chain({"query": "Summarize the artchitecture of Mobilenetv2 and inception V2"})
process_llm_response(result)

In [None]:
result = qa_chain({"query": " what are the ways of factorizing convolutions in various settings?"})
process_llm_response(result)

In [None]:
result = qa_chain({"query": " Who is Virat Kohli?"})
process_llm_response(result)