In [1]:
import streamlit as st
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import UnstructuredFileLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain.embeddings import OpenAIEmbeddings, CacheBackedEmbeddings
from langchain.storage import LocalFileStore
from langchain.chains import RetrievalQA

# Streamlit Sidebar
st.sidebar.title("Settings")
openai_api_key = st.sidebar.text_input("Enter your OpenAI API Key", type="password")
github_repo_url = "https://github.com/xcaliver2001/challenge_gpt"
st.sidebar.markdown(f"[GitHub Repository]({github_repo_url})")

if not openai_api_key:
    st.warning("Please enter your OpenAI API Key to proceed.")
    st.stop()

# Streamlit Main App
st.title("RAG Pipeline with LangChain")
uploaded_file = st.file_uploader("Upload a Document", type=["txt"])

if uploaded_file:
    document_path = f"./temp/{uploaded_file.name}"
    with open(document_path, "wb") as f:
        f.write(uploaded_file.getbuffer())

    # Initialize Components
    llm = ChatOpenAI(api_key=openai_api_key)
    cached_dir = LocalFileStore('./cache/')

    splitter = CharacterTextSplitter.from_tiktoken_encoder(
        separator='\n',
        chunk_size=600,
        chunk_overlap=100,
    )

    loader = UnstructuredFileLoader(document_path)
    docs = loader.load_and_split(text_splitter=splitter)

    embeddings = OpenAIEmbeddings(api_key=openai_api_key)
    cached_embeddings = CacheBackedEmbeddings.from_bytes_store(embeddings, cached_dir)
    vectorstore = FAISS.from_documents(docs, cached_embeddings)

    chain = RetrievalQA.from_chain_type(
        llm=llm,
        chain_type='refine',
        retriever=vectorstore.as_retriever(),
    )

    # Chat Interface
    st.subheader("Chat with Your Document")
    if "chat_history" not in st.session_state:
        st.session_state.chat_history = []

    question = st.text_input("Enter your question")

    if question:
        answer = chain.run(question)
        st.session_state.chat_history.append((question, answer))

    for q, a in st.session_state.chat_history:
        st.markdown(f"**Question:** {q}")
        st.markdown(f"**Answer:** {a}")
else:
    st.info("Please upload a document to start.")


2024-12-19 06:45:25.211 
  command:

    streamlit run c:\Users\xcali\project_gpt\env\lib\site-packages\ipykernel_launcher.py [ARGUMENTS]
