In [20]:
from git import Repo

from langchain.text_splitter import Language
from langchain.document_loaders.generic import GenericLoader
from langchain.document_loaders.parsers import LanguageParser
from langchain.text_splitter import RecursiveCharacterTextSplitter

import os 

from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import Chroma 
from langchain.chat_models import ChatOpenAI
from langchain.memory import ConversationSummaryMemory
from langchain.chains import ConversationalRetrievalChain


In [3]:
!mkdir test_repo

In [8]:
repo_path="test_repo/"
repo=Repo.clone_from("https://github.com/vishnugnair/MEDICAL-CHATBOT.git",to_path=repo_path)

In [9]:
loader = GenericLoader.from_filesystem(
    repo_path,
    glob="**/*",
    suffixes=[".py"],  # Now processing JavaScript files
    parser=LanguageParser(language=Language.PYTHON, parser_threshold=500)  # Now parsing JS
)

documents = loader.load()


In [10]:
documents

[Document(page_content='from flask import Flask, render_template, request\nimport os\nfrom dotenv import load_dotenv\n\n# LangChain + Pinecone\nfrom langchain.chains import create_retrieval_chain\nfrom langchain.chains.combine_documents import create_stuff_documents_chain\nfrom langchain_openai import OpenAI\n\n# Local modules\nfrom store_index import create_index_if_not_exists, load_or_create_docsearch, index_name\nfrom src.prompt import prompt\n\napp = Flask(__name__)\n\nload_dotenv()\nOPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")\n\n# 1. Create or verify Pinecone index\ncreate_index_if_not_exists()\n\n# 2. Load documents and create docsearch\ndocsearch, embeddings = load_or_create_docsearch()\n\n# 3. Build retriever\nretriever = docsearch.as_retriever(search_type="similarity", search_kwags={"k": 3})\n\n# 4. Initialize OpenAI LLM\nllm = OpenAI(api_key=OPENAI_API_KEY, temperature=0.4, max_tokens=500)\n\n# 5. Create Q&A chain and RAG chain\nquestion_answer_chain = create_stuff_docu

In [11]:
len(documents)

7

In [12]:
documents_splitter = RecursiveCharacterTextSplitter.from_language(
    language=Language.PYTHON,
    chunk_size=500,
    chunk_overlap=20
) 


In [13]:
texts=documents_splitter.split_documents(documents)

In [14]:
len(texts)

21

In [15]:
from dotenv import load_dotenv

load_dotenv()

OPENAI_API_KEY = os.environ.get('OPENAI_API_KEY')

os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY

In [16]:
embeddings = OpenAIEmbeddings(disallowed_special=())

In [21]:
vectordb=Chroma.from_documents(texts,embedding=embeddings,persist_directory="./db")

In [22]:
vectordb.persist()

In [23]:
llm=ChatOpenAI()

In [None]:
memory=ConversationSummaryMemory(llm=llm, memory_key="chat_history", return_messages=True)  

In [None]:
qa=ConversationalRetrievalChain.from_llm(llm, retriever=vectordb.as_retriever(search_type="mmr",search_kwargs={"k":8})) 

In [42]:
question="How is RAG achieved in my app?"

In [43]:
result = qa({"question": question, "chat_history": []})
print(result["answer"])


RAG (Retriever, Answerer, and Generator) is achieved in your app through the RAG chain that is created using the `rag_chain` variable. The RAG chain is initiated by invoking the chain with the user query using the `rag_chain.invoke({"input": user_query})` function. This chain integrates retriever, answerer, and generator components to process the user query and provide a response. The response is then used to render the template with the answer and the user query.
