In [1]:
# import Libraries

import openai # we are going to use OPen AI LLM
import langchain # framework for developing applications powered by language models
import pinecone # cloud-native vector database
from langchain.document_loaders import PyPDFDirectoryLoader # As have PDF file in Directory(with in documents folder) we are using PYPDFDirectoryLoader. Else we can use PyPDF
from langchain.text_splitter import RecursiveCharacterTextSplitter # Splits text in the document as chunks. vectors can be created from chunks only
from langchain.embeddings.openai import OpenAIEmbeddings# For embedding text chunks as vectors
from langchain.vectorstores import Pinecone # for Storing vectors using this DB
from langchain.llms import OpenAI

  from tqdm.autonotebook import tqdm


In [2]:
from dotenv import load_dotenv
load_dotenv() # Loads all the environment variables

True

In [3]:

import os

In [4]:
## Lets Read the document
def read_doc(directory):
    file_loader=PyPDFDirectoryLoader(directory)
    documents=file_loader.load()
    return documents

In [5]:
doc=read_doc('documents/') # loading from documents folder
len(doc) # It tells How many pages document contains 

58

In [6]:
## Divide the docs into chunks
### https://api.python.langchain.com/en/latest/text_splitter/langchain.text_splitter.RecursiveCharacterTextSplitter.html#
def chunk_data(docs,chunk_size=800,chunk_overlap=50):

    #chunk_size:the maximum character length of each text chunk.
    #chunk_overlap: the number of tokens/units (IN (NLP), a token can be a word, part of a word, or even just characters like punctuation) that overlap between two consecutive chunks.
    text_splitter=RecursiveCharacterTextSplitter(chunk_size=chunk_size,chunk_overlap=chunk_overlap) # split a document into chunks with a specified chunk_size and chunk_overlap
    text_splitter.split_documents(docs)# Splits the document
    return docs

In [7]:
documents=chunk_data(docs=doc)
len(documents)

58

In [8]:
## Embedding Technique Of OPENAI
embeddings=OpenAIEmbeddings(api_key=os.environ['OPENAI_API_KEY'])
embeddings

  warn_deprecated(


OpenAIEmbeddings(client=<openai.resources.embeddings.Embeddings object at 0x000001E7B21CA5C0>, async_client=<openai.resources.embeddings.AsyncEmbeddings object at 0x000001E7CCA51D20>, model='text-embedding-ada-002', deployment='text-embedding-ada-002', openai_api_version='', openai_api_base=None, openai_api_type='', openai_proxy='', embedding_ctx_length=8191, openai_api_key='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', openai_organization=None, allowed_special=set(), disallowed_special='all', chunk_size=1000, max_retries=2, request_timeout=None, headers=None, tiktoken_enabled=True, tiktoken_model_name=None, show_progress_bar=False, model_kwargs={}, skip_empty=False, default_headers=None, default_query=None, retry_min_seconds=4, retry_max_seconds=20, http_client=None)

In [9]:
# testing the size if the vector
vectors=embeddings.embed_query("How are you?")
print (f"The length of the vector which is generated from OPENAI embeddings is : {len(vectors)}")
vectors

The length of the vector which is generated from OPENAI embeddings is : 1536


[-0.016785908412158042,
 -0.012151270116836888,
 0.006627965687606043,
 -0.026018159342696565,
 -0.01616878054948239,
 0.01762520513661754,
 -0.011114493891931487,
 -0.0099234347700346,
 -0.018131250431046412,
 -0.010417137246439636,
 0.0278695466560138,
 0.0016508201293049156,
 -0.00733766375413787,
 -0.011651395765758651,
 0.007238923072592348,
 -0.015391197915142053,
 0.028363250063741408,
 -0.011830363367475229,
 0.013959458032731997,
 -0.0205997656070393,
 0.00252868606461311,
 0.006344086460993312,
 0.0009997490514024384,
 -0.008263357876457773,
 -0.01588490039154709,
 -0.007794339173455263,
 0.025117151322085612,
 -0.012404292764051323,
 0.02230304282936083,
 -0.02515417884483454,
 0.005609702758413819,
 0.0076955989575710265,
 -0.013169533511690397,
 0.004014424029647617,
 0.008757060352862809,
 -0.022290699080014423,
 0.004020595438659533,
 -0.01043565193913667,
 0.0203282291984504,
 -0.006337915051981395,
 0.0270302499315543,
 0.001255857519538152,
 -0.005239425202618115,
 -0

In [23]:

## Vector Search DB In Pinecone
from pinecone import Pinecone
pc = Pinecone(api_key=os.environ['PINECONE_API_KEY'], environment="gcp-starter")
index_name="vectordbtry"

In [25]:
from langchain_pinecone import PineconeVectorStore

In [26]:
vector_database_index = PineconeVectorStore.from_documents(
    index_name='vectordbtry', 
    documents=doc, 
    embedding=embeddings
)


In [27]:

## Cosine Similarity Retreive Results from VectorDB
def retrieve_query(query,k=2):
    matching_results=vector_database_index.similarity_search(query,k=k)
    return matching_results

In [28]:
from langchain.chains.question_answering import load_qa_chain
from langchain import OpenAI

In [39]:
llm=OpenAI(model_name="gpt-3.5-turbo",temperature=0.5)
chain=load_qa_chain(llm,chain_type="stuff")




In [40]:
## Search answers from VectorDB
def retrieve_answers(query):
    doc_search=retrieve_query(query)
    print(doc_search)
    response=chain.run(input_documents=doc_search,question=query)
    return response

In [41]:
our_query = "How much the agriculture target will be increased by how many crore?"
answer = retrieve_answers(our_query)
print(answer)

[Document(page_content="7 \n \n \n farmers in contributing to the health of fellow citizens by growing these \n‘Shree Anna’.   \n22. Now to make India a global hub for ' Shree Anna' , the Indian Institute \nof Millet Research, Hyderabad  will be supported as the Centre of Excellence \nfor sharing best practices, research and technologies at the international \nlevel.    \nAgriculture Credit  \n23. The agriculture credit target will be increased  \nto ` 20 lakh crore with focus on animal husbandry, dairy and fisheries.  \nFisheries \n24. We will launch a new sub-scheme of PM Matsya Sampada Yojana \nwith targeted investment of ` 6,000 crore to further enable activities of \nfishermen, fish vendors, and micro & small enterprises, improve value chain \nefficiencies, and expand the market. \nCooperation \n25. For farmers, especially small and marginal farmers, and other \nmarginalised sections, the government is promoting cooperative-based \neconomic development model. A new Ministry of Coo

APIRemovedInV1: 

You tried to access openai.ChatCompletion, but this is no longer supported in openai>=1.0.0 - see the README at https://github.com/openai/openai-python for the API.

You can run `openai migrate` to automatically upgrade your codebase to use the 1.0.0 interface. 

Alternatively, you can pin your installation to the old version, e.g. `pip install openai==0.28`

A detailed migration guide is available here: https://github.com/openai/openai-python/discussions/742
