Installing packages that will be utilized in RAG

In [None]:
!pip install langchain langchain-community langchain-core langchain-openai
!pip install chromadb
!pip install tiktoken google-search-results
!pip install pydantic
!pip install typing-inspect typing_extensions
!pip install loguru

Collecting langchain
  Downloading langchain-0.2.14-py3-none-any.whl.metadata (7.1 kB)
Collecting langchain-community
  Downloading langchain_community-0.2.12-py3-none-any.whl.metadata (2.7 kB)
Collecting langchain-core
  Downloading langchain_core-0.2.33-py3-none-any.whl.metadata (6.2 kB)
Collecting langchain-openai
  Downloading langchain_openai-0.1.22-py3-none-any.whl.metadata (2.6 kB)
Collecting langchain-text-splitters<0.3.0,>=0.2.0 (from langchain)
  Downloading langchain_text_splitters-0.2.2-py3-none-any.whl.metadata (2.1 kB)
Collecting langsmith<0.2.0,>=0.1.17 (from langchain)
  Downloading langsmith-0.1.99-py3-none-any.whl.metadata (13 kB)
Collecting tenacity!=8.4.0,<9.0.0,>=8.1.0 (from langchain)
  Downloading tenacity-8.5.0-py3-none-any.whl.metadata (1.2 kB)
Collecting dataclasses-json<0.7,>=0.5.7 (from langchain-community)
  Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)
Collecting jsonpatch<2.0,>=1.33 (from langchain-core)
  Downloading jsonpatch-1.33

Importing libraries

In [None]:
import openai
import os

from langchain_community.document_loaders.directory import DirectoryLoader

from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores.chroma import Chroma

from langchain.chains import RetrievalQA
from langchain_openai import ChatOpenAI
from loguru import logger

from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler

Configure OS environment to utilize OpenAI and LangChain APIs

In [None]:
os.environ["OPENAI_API_KEY"] = # Insert OpenAI API key here
os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_API_KEY"] = # Insert LangChain API key here

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
directory_path = # Enter path to data folder stored in Google Drive

In [None]:
from langchain_community.document_loaders import TextLoader

Load data into LangChain using Directory Loader

In [None]:
loader1 = DirectoryLoader(directory_path, glob = "**/*.txt", show_progress = True, loader_cls = TextLoader)
txt_docs = loader1.load_and_split()

100%|██████████| 23/23 [00:00<00:00, 167.78it/s]


Create embeddings and generate local ChromaDB vector database using the documents provided

In [None]:
import chromadb
embeddings = OpenAIEmbeddings()
client = chromadb.Client()
# Local vector database containing document embeddings
txt_docsearch = Chroma.from_documents(documents=txt_docs, embedding=embeddings)
#logfile = "output.log"

handler = StreamingStdOutCallbackHandler()

# Define LLM
llm = ChatOpenAI(model_name="gpt-4", temperature=0.6) # "gpt-4"

# Create Retriever
qa_txt = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", callbacks=[handler], retriever=txt_docsearch.as_retriever())

In [None]:
q = "The MD Program at George Washington (GW) School of Medicine and Health Sciences includes substantial content in Clinical Public Health (population health, health systems science, health policy, and community health) to prepare GW graduates for an expanded scope of practice required to be successful 21st century physicians. Write a 350 character short essay on Ishani Nautiyal's specific experiences related to that aspect of the MD Program? Write in first person tone as if you are Ishani. Cite concrete examples."
qa_txt.invoke(q)

{'query': "The MD Program at George Washington (GW) School of Medicine and Health Sciences includes substantial content in Clinical Public Health (population health, health systems science, health policy, and community health) to prepare GW graduates for an expanded scope of practice required to be successful 21st century physicians. Write a 350 character short essay on Ishani Nautiyal's specific experiences related to that aspect of the MD Program? Write in first person tone as if you are Ishani. Cite concrete examples.",
 'result': 'Through my volunteer work at the Cuyahoga County Juvenile Detention Center, I gained insights into the disparities in underserved communities. This motivated me to advocate for equitable access to healthcare. As the president of Global Medical Brigades, I led initiatives to provide accessible healthcare in rural Honduras, underlining the importance of population health.'}