**Step 01: Install All the Required Packages**

**Step 02: Import All the Required Libraries**

In [1]:
import google.generativeai as genai

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
#dotenv package to load the API key
from dotenv import load_dotenv
from IPython.display import display
from IPython.display import Markdown
import textwrap
import os

**Step 03: Setup your API Key**

Before you can use the Gemini API, you must first obtain an API key. If you don't already have one, create a key with one click in Google AI Studio.


<a class="button button-primary" href="https://makersuite.google.com/app/apikey" target="_blank" rel="noopener noreferrer">Get an API key</a>

In [3]:
load_dotenv()
os.getenv("GOOGLE_API_KEY")
genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))

In [4]:
#Create a new .env file in the workspace and store the API key in it
# !echo -e 'GOOGLE_API_KEY=AIzaSyDVqhJAMhWc5VEk0jbHTlM8U4a7BqGHot8' > .env

In [5]:
!ls -a

.
..
.env
Artificial_Intelligence.pdf
Chat_with_Multiple_documents_with_Gemini_Pro_and_LangChain.ipynb
main.py
pdfs
requirements.txt
test.py
venv


In [6]:
load_dotenv()

True

In [7]:
#Create a helper function that will convert the markdown into nicely formatted text
def to_markdown(text):
  text = text.replace('•','*')
  return Markdown(textwrap.indent(text, '>', predicate=lambda _: True))

In [8]:
model = genai.GenerativeModel('gemini-1.5-flash')

In [9]:
%%time
response = model.generate_content("What is the meaning of life?")

CPU times: total: 15.6 ms
Wall time: 5.69 s


In [15]:
print(response)

response:
GenerateContentResponse(
    done=True,
    iterator=None,
    result=protos.GenerateContentResponse({
      "candidates": [
        {
          "content": {
            "parts": [
              {
                "text": "The meaning of life is a question that has been pondered by philosophers, theologians, and individuals for centuries. There is no single, definitive answer, as it is a deeply personal and subjective question.  \n\nHere are some different perspectives on the meaning of life:\n\n**Philosophical Perspectives:**\n\n* **Nihilism:**  Life has no inherent meaning or purpose.  \n* **Existentialism:**  Individuals are responsible for creating their own meaning and purpose.\n* **Absurdism:**  The search for meaning in a meaningless universe is inherently absurd, yet we must embrace this absurdity.\n* **Hedonism:**  The pursuit of pleasure and happiness is the ultimate goal.\n* **Utilitarianism:**  The greatest good for the greatest number of people is the ultimate goa

**Step 07: Chat with Documents**

In [18]:
from langchain import PromptTemplate
from langchain.chains.question_answering import load_qa_chain
from PyPDF2 import PdfReader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma

**Download the PDF Files**

**Extract the text from the PDF's**

In [26]:
def get_pdf_text(pdf_docs):
    data=""
    for pdf in pdf_docs:
        pdf_reader= PdfReader(pdf)
        for page in pdf_reader.pages:
            data+= page.extract_text()      
    return data
    

In [25]:
print(data)

[]


In [27]:
#print(data[0].page_content)


In [17]:
context = "\n".join(str(p.page_content) for p in data)

In [9]:
print("The total number of words in the context:", len(context))

The total number of words in the context: 76045


**Split the Extracted Data into Text Chunks**

In [10]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=10000, chunk_overlap=200)
context = "\n\n".join(str(p.page_content) for p in data)

In [11]:
texts = text_splitter.split_text(context)

In [12]:
print(len(texts))

9


In [13]:
texts[0]

'3 grad.illinois.edu/CareerDevelopment Rachel Green  \n2 1 0  W .  G R E E N  S T . ,  C H A M P A I G N ,  I L  \n( 2 1 7 )  5 5 5 - 1 2 3 4  •  R S T U D E N T @ I L L I N O I S . E D U  \nEDUCATION  \nPhD in English May 20xx \nUniversity of Illinois at Urbana-Champaign \nDissertation title:  “Down on the Farm: World War One and the Emergence of Literary  \nModernism in the American South”  \nCommittee : Margaret Black, Naomi Blue, John Jay, Robert Roberts (Chair) \nMA in English  20xx \nUniversity of Illinois at Urbana-Champaign \nBA in English and Communications, summa cum laude  20xx \nButler University, Indianapolis, IN  \nTEACHING  & A DVISING   \nComposition Instructor  20xx-present \nResearch Writing Program, University of Illinois \n\uf0b7Facilitator for seven sections of English composition.\n\uf0b7Planned and taught a writing-intensive course based upon current events.\n\uf0b7Used instructional technology to enhance pedagogical technique.\n\uf0b7Taught in part with an innov

**Download the Embeddings from Google**

In [16]:
from langchain_google_genai import GoogleGenerativeAIEmbeddings

In [23]:
embeddings = GoogleGenerativeAIEmbeddings(model = "models/embedding-001")

**Create Embeddings for each of the Text Chunk and save them into a Vector Store**

In [24]:
vector_index = Chroma.from_texts(texts, embeddings).as_retriever()

In [54]:
question = "YOLOv7 is used for object detection or not"
docs = vector_index.get_relevant_documents(question)

In [55]:
docs

[Document(page_content='References\n[1] anonymous. Designing network design strategies. anony-\nmous submission , 2022. 3\n[2] Irwan Bello, William Fedus, Xianzhi Du, Ekin Dogus\nCubuk, Aravind Srinivas, Tsung-Yi Lin, Jonathon Shlens,\nand Barret Zoph. Revisiting ResNets: Improved training\nand scaling strategies. Advances in Neural Information Pro-\ncessing Systems (NeurIPS) , 34, 2021. 2\n[3] Alexey Bochkovskiy, Chien-Yao Wang, and Hong-\nYuan Mark Liao. YOLOv4: Optimal speed and accuracy of\nobject detection. arXiv preprint arXiv:2004.10934 , 2020.\n2, 6, 7\n[4] Yue Cao, Thomas Andrew Geddes, Jean Yee Hwa Yang,\nand Pengyi Yang. Ensemble deep learning in bioinformat-\nics.Nature Machine Intelligence , 2(9):500–508, 2020. 2\n[5] Nicolas Carion, Francisco Massa, Gabriel Synnaeve, Nico-\nlas Usunier, Alexander Kirillov, and Sergey Zagoruyko.\nEnd-to-end object detection with transformers. In Pro-\nceedings of the European Conference on Computer Vision\n(ECCV) , pages 213–229, 2020. 10\

**Create a  Prompt Template**

In [56]:
prompt_template = """
  Answer the question as detailed as possible from the provided context, make sure to provide all the details, if the answer is not in
  provided context just say, "answer is not available in the context", don't provide the wrong answer\n\n
  Context:\n {context}?\n
  Question: \n{question}\n

  Answer:
"""

prompt = PromptTemplate(template = prompt_template, input_variables = ["context", "question"])

In [57]:
from langchain_google_genai import ChatGoogleGenerativeAI
model = ChatGoogleGenerativeAI(model="gemini-pro",
                             temperature=0.3)


In [58]:
chain = load_qa_chain(model, chain_type="stuff", prompt=prompt)

In [59]:
response = chain(
    {"input_documents":docs, "question": question}
    , return_only_outputs=True)

In [60]:
response

{'output_text': 'Yes, YOLOv7 is used for object detection'}