In [None]:
import os
import sys
import re
from dotenv import load_dotenv
import PyPDF2
from PyPDF2 import PdfReader
import gradio as gr
from langchain.document_loaders import TextLoader
from langchain.indexes import VectorstoreIndexCreator
from langchain_openai import OpenAIEmbeddings

In [None]:
def process_text_stripe(text: str) -> str:
    temp_marker = "TEMP_MARKER"
    text = re.sub("  ", temp_marker, text)
    text = re.sub(" ", "", text)
    text = re.sub(temp_marker, " ", text)
    return text

In [None]:
def process_text_manual(text: str) -> str:
    text = text.replace('X', '')
    text = re.sub(r"RZ_CoffeeB_Cover_BDA_Globe_DE\.indd.*\d+:\d+", '', text)
    text = re.sub(r' -', '', text)
    return text

In [None]:
def extract_text_from_pdf(file_path: str, process_func) -> str:
    with open(file_path, 'rb') as f:
        reader = PyPDF2.PdfReader(f)
        text = [process_func(page.extract_text()) for page in reader.pages]
    return '\n'.join(text)

In [None]:
def write_text_to_file(file_path: str, text: str):
    with open(file_path, "w", encoding="utf-8") as f:
        f.write(text + "\n")

In [None]:
def ask_question(user_query):
    if user_query.lower() == "quit":
        return "You have chosen to exit."
    else:
        return index.query(user_query)

In [None]:
if __name__ == "__main__":
    load_dotenv()
    api_key = os.getenv("OPENAI_API_KEY")
    os.environ["OPENAI_API_KEY"] = api_key

    texts = {
        "data_stripe.txt": extract_text_from_pdf('stripe-2022-update.pdf', process_text_stripe), 
        "data_manual.txt": extract_text_from_pdf('CoffeeB_Manual Globe_EN_10.08.2022.pdf', process_text_manual)
    }

    for filename, text in texts.items():
        write_text_to_file(filename, text)

    loader1 = TextLoader('data_stripe.txt')
    loader2 = TextLoader('data_manual.txt')

    index = VectorstoreIndexCreator().from_loaders([loader1, loader2])

    iface = gr.Interface(
        fn=ask_question,
        inputs=gr.Textbox(lines=2, placeholder="Enter your question here",label="question"),
        outputs="text",
        title="Test",
        description="Enter your question based on 2 given docs."
    )

    iface.launch(share=True)
