In [None]:
import os
from langchain.chains import RetrievalQA
from langchain.document_loaders import PyPDFLoader, TextLoader, Docx2txtLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from langchain.llms import OpenAI

# Set your OpenAI API key
os.environ["OPENAI_API_KEY"] = "your-openai-api-key"

def load_document(file_path):
    """Load supported document types."""
    if file_path.endswith(".pdf"):
        loader = PyPDFLoader(file_path)
    elif file_path.endswith(".txt"):
        loader = TextLoader(file_path)
    elif file_path.endswith(".docx"):
        loader = Docx2txtLoader(file_path)
    else:
        raise ValueError("Unsupported file type")
    return loader.load()

def build_qa_chain(file_path):
    """Create the retriever and QA chain from a document."""
    docs = load_document(file_path)

    # Split long documents into manageable chunks
    splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
    split_docs = splitter.split_documents(docs)

    # Create embeddings
    embeddings = OpenAIEmbeddings()

    # Store embeddings in a local Chroma vector store
    vectorstore = Chroma.from_documents(split_docs, embedding=embeddings)

    retriever = vectorstore.as_retriever(search_kwargs={"k": 3})

    # Create the QA chain with OpenAI
    llm = OpenAI(temperature=0)
    qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever)
    return qa_chain

def ask_question(qa_chain, question):
    """Ask a question to the LLM-based QA system."""
    response = qa_chain.run(question)
    return response

if __name__ == "__main__":
    file_path = "example.pdf"  # Replace with your document
    qa_chain = build_qa_chain(file_path)

    print("Document loaded and indexed. Ask your questions:")
    while True:
        query = input("Q: ")
        if query.lower() in ["exit", "quit"]:
            break
        answer = ask_question(qa_chain, query)
        print(f"A: {answer}\n")
