https://huggingface.co/spaces/rafaldembski/PDF-CHATBOT/blob/main/app.py


In [None]:
import numpy as np
import streamlit as st
import os
import base64
from llama_index.core import StorageContext, load_index_from_storage, VectorStoreIndex, SimpleDirectoryReader, ChatPromptTemplate
from llama_index.llms.huggingface import HuggingFaceInferenceAPI
from dotenv import load_dotenv
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.core import Settings
from PIL import Image

# Load environment variables
load_dotenv()

# Configure the Llama index settings
Settings.llm = HuggingFaceInferenceAPI(
    model_name="google/gemma-1.1-7b-it",
    tokenizer_name="google/gemma-1.1-7b-it",
    context_window=3000,
    token=os.getenv("HF_TOKEN"),
    max_new_tokens=512,
    generate_kwargs={"temperature": 0.1},
)
Settings.embed_model = HuggingFaceEmbedding(
    model_name="BAAI/bge-small-en-v1.5"
)

# Define the directory for persistent storage and data
PERSIST_DIR = "./db"
DATA_DIR = "data"

# Ensure data directory exists
os.makedirs(DATA_DIR, exist_ok=True)
os.makedirs(PERSIST_DIR, exist_ok=True)

# Language descriptions
descriptions = {
    "pl": """
    # ChatPDF
    **ChatPDF** to zaawansowane narzędzie oparte na sztucznej inteligencji, zaprojektowane do analizy i generowania odpowiedzi na pytania związane z treścią załadowanych dokumentów PDF. Aplikacja umożliwia użytkownikom wprowadzanie zapytań dotyczących zawartości dokumentów i otrzymywanie precyzyjnych odpowiedzi w oparciu o zaawansowane algorytmy uczenia maszynowego.
    **Jak korzystać z aplikacji**:
    1. Wgraj plik PDF, korzystając z przycisku **Submit & Process**.
    2. Poczekaj, aż plik PDF zostanie przetworzony.
    3. Zadawaj pytania dotyczące zawartości pliku, określając język, w jakim ma być wygenerowana odpowiedź.
    **Technologie**:
    - Model: Gemma 1.1-7b-it
    - Stworzony przez: Rafał Dembski
    - Technologie: LlamaIndex, PyTorch, Streamlit
    """,
    "en": """
    # ChatPDF
    **ChatPDF** is an advanced AI-powered tool designed to analyze and generate answers to questions related to the content of uploaded PDF documents. The application allows users to input queries about document contents and receive precise responses based on advanced machine learning algorithms.
    **How to use the application**:
    1. Upload a PDF file using the **Submit & Process** button.
    2. Wait for the PDF file to be processed.
    3. Ask questions about the content of the file, specifying the language in which you want the response to be generated.
    **Technologies**:
    - Model: Gemma 1.1-7b-it
    - Developed by: Rafał Dembski
    - Technologies: LlamaIndex, PyTorch, Streamlit
    """,
    "de": """
    # ChatPDF
    **ChatPDF** ist ein fortschrittliches, KI-gesteuertes Tool, das entwickelt wurde, um Fragen zur Analyse und Beantwortung von Fragen im Zusammenhang mit dem Inhalt hochgeladener PDF-Dokumente zu generieren. Die Anwendung ermöglicht es Benutzern, Anfragen bezüglich des Dokumenteninhalts einzugeben und präzise Antworten basierend auf fortschrittlichen maschinellen Lernalgorithmen zu erhalten.
    **So verwenden Sie die Anwendung**:
    1. Laden Sie eine PDF-Datei über die Schaltfläche **Submit & Process** hoch.
    2. Warten Sie, bis die PDF-Datei verarbeitet wurde.
    3. Stellen Sie Fragen zum Inhalt der Datei und geben Sie an, in welcher Sprache die Antwort generiert werden soll.
    **Technologien**:
    - Modell: Gemma 1.1-7b-it
    - Entwickelt von: Rafał Dembski
    - Technologien: LlamaIndex, PyTorch, Streamlit
    """
}

def displayPDF(file):
    with open(file, "rb") as f:
        base64_pdf = base64.b64encode(f.read()).decode('utf-8')
    pdf_display = f'<iframe src="data:application/pdf;base64,{base64_pdf}" width="100%" height="600" type="application/pdf"></iframe>'
    st.markdown(pdf_display, unsafe_allow_html=True)

def data_ingestion():
    documents = SimpleDirectoryReader(DATA_DIR).load_data()
    storage_context = StorageContext.from_defaults()
    index = VectorStoreIndex.from_documents(documents)
    index.storage_context.persist(persist_dir=PERSIST_DIR)

def handle_query(query):
    storage_context = StorageContext.from_defaults(persist_dir=PERSIST_DIR)
    index = load_index_from_storage(storage_context)
    chat_text_qa_msgs = [
    (
        "user",
        """You are a Q&A assistant named ChatPDF. You have a specific response programmed for when users specifically ask about your creator, Suriya. The response is: "I was created by Suriya, an enthusiast in Artificial Intelligence. He is dedicated to solving complex problems and delivering innovative solutions. With a strong focus on machine learning, deep learning, Python, generative AI, NLP, and computer vision, Suriya is passionate about pushing the boundaries of AI to explore new possibilities." For all other inquiries, your main goal is to provide answers as accurately as possible, based on the instructions and context you have been given. If a question does not match the provided context or is outside the scope of the document, kindly advise the user to ask questions within the context of the document.
        Context:
        {context_str}
        Question:
        {query_str}
        """
    )
    ]
    text_qa_template = ChatPromptTemplate.from_messages(chat_text_qa_msgs)

    query_engine = index.as_query_engine(text_qa_template=text_qa_template)
    answer = query_engine.query(query)

    if hasattr(answer, 'response'):
        return answer.response
    elif isinstance(answer, dict) and 'response' in answer:
        return answer['response']
    else:
        return "Sorry, I couldn't find an answer."

# Streamlit app initialization
# Language selection
selected_language = st.sidebar.selectbox("Wybierz język / Select Language / Sprache auswählen", ("pl", "en", "de"))

# Display description based on selected language
st.markdown(descriptions[selected_language])

if 'messages' not in st.session_state:
    st.session_state.messages = [{'role': 'assistant', "content": 'Hello! Upload a PDF and ask me anything about its content.'}]

with st.sidebar:
    st.title("Menu:")
    uploaded_file = st.file_uploader("Upload your PDF Files and Click on the Submit & Process Button")
    if st.button("Submit & Process"):
        with st.spinner("Processing..."):
            filepath = "data/saved_pdf.pdf"
            with open(filepath, "wb") as f:
                f.write(uploaded_file.getbuffer())
            data_ingestion()  # Process PDF every time new file is uploaded
            st.success("Done")

user_prompt = st.chat_input("Ask me anything about the content of the PDF:")
if user_prompt:
    st.session_state.messages.append({'role': 'user', "content": user_prompt})
    response = handle_query(user_prompt)
    st.session_state.messages.append({'role': 'assistant', "content": response})

for message in st.session_state.messages:
    with st.chat_message(message['role']):
        st.write(message['content'])


In [None]:
streamlit
python-dotenv
llama-index
llama-index-embeddings-huggingface
llama-index-llms-huggingface

In [None]:
import numpy as np
import streamlit as st
import os
import base64
from llama_index.core import StorageContext, load_index_from_storage, VectorStoreIndex, SimpleDirectoryReader, ChatPromptTemplate
from llama_index.llms.huggingface import HuggingFaceLLM # Changed import here
from transformers import pipeline # Import pipeline from transformers
from dotenv import load_dotenv
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.core import Settings
from PIL import Image

# Load environment variables (optional, if you still need dotenv for other things)
load_dotenv()

# Configure the Llama index settings
# Load the Gemma model using Hugging Face pipeline
hf_pipeline = pipeline(
    "text-generation",
    model="google/gemma-1.1-7b-it",
    tokenizer="google/gemma-1.1-7b-it",
    torch_dtype=torch.float16, # Optional: Use float16 for less memory if you have CUDA and torch >= 2.0.1
    device_map="auto", # or "cuda:0" if you have specific GPU
)

Settings.llm = HuggingFaceLLM(
    pipeline=hf_pipeline,
)

Settings.embed_model = HuggingFaceEmbedding(
    model_name="BAAI/bge-small-en-v1.5"
)

# Define the directory for persistent storage and data
PERSIST_DIR = "./db"
DATA_DIR = "data"

# Ensure data directory exists
os.makedirs(DATA_DIR, exist_ok=True)
os.makedirs(PERSIST_DIR, exist_ok=True)

# Language descriptions
descriptions = {
    "pl": """
    # ChatPDF
    **ChatPDF** to zaawansowane narzędzie oparte na sztucznej inteligencji, zaprojektowane do analizy i generowania odpowiedzi na pytania związane z treścią załadowanych dokumentów PDF. Aplikacja umożliwia użytkownikom wprowadzanie zapytań dotyczących zawartości dokumentów i otrzymywanie precyzyjnych odpowiedzi w oparciu o zaawansowane algorytmy uczenia maszynowego.
    **Jak korzystać z aplikacji**:
    1. Wgraj plik PDF, korzystając z przycisku **Submit & Process**.
    2. Poczekaj, aż plik PDF zostanie przetworzony.
    3. Zadawaj pytania dotyczące zawartości pliku, określając język, w jakim ma być wygenerowana odpowiedź.
    **Technologie**:
    - Model: Gemma 1.1-7b-it (Lokalnie)
    - Stworzony przez: Rafał Dembski
    - Technologie: LlamaIndex, PyTorch, Streamlit
    """,
    "en": """
    # ChatPDF
    **ChatPDF** is an advanced AI-powered tool designed to analyze and generate answers to questions related to the content of uploaded PDF documents. The application allows users to input queries about document contents and receive precise responses based on advanced machine learning algorithms.
    **How to use the application**:
    1. Upload a PDF file using the **Submit & Process** button.
    2. Wait for the PDF file to be processed.
    3. Ask questions about the content of the file, specifying the language in which you want the response to be generated.
    **Technologies**:
    - Model: Gemma 1.1-7b-it (Local)
    - Developed by: Rafał Dembski
    - Technologies: LlamaIndex, PyTorch, Streamlit
    """,
    "de": """
    # ChatPDF
    **ChatPDF** ist ein fortschrittliches, KI-gesteuertes Tool, das entwickelt wurde, um Fragen zur Analyse und Beantwortung von Fragen im Zusammenhang mit dem Inhalt hochgeladener PDF-Dokumente zu generieren. Die Anwendung ermöglicht es Benutzern, Anfragen bezüglich des Dokumenteninhalts einzugeben und präzise Antworten basierend auf fortschrittlichen maschinellen Lernalgorithmen zu erhalten.
    **So verwenden Sie die Anwendung**:
    1. Laden Sie eine PDF-Datei über die Schaltfläche **Submit & Process** hoch.
    2. Warten Sie, bis die PDF-Datei verarbeitet wurde.
    3. Stellen Sie Fragen zum Inhalt der Datei und geben Sie an, in welcher Sprache die Antwort generiert werden soll.
    **Technologien**:
    - Modell: Gemma 1.1-7b-it (Lokal)
    - Entwickelt von: Rafał Dembski
    - Technologien: LlamaIndex, PyTorch, Streamlit
    """
}

def displayPDF(file):
    with open(file, "rb") as f:
        base64_pdf = base64.b64encode(f.read()).decode('utf-8')
    pdf_display = f'<iframe src="data:application/pdf;base64,{base64_pdf}" width="100%" height="600" type="application/pdf"></iframe>'
    st.markdown(pdf_display, unsafe_allow_html=True)

def data_ingestion():
    documents = SimpleDirectoryReader(DATA_DIR).load_data()
    storage_context = StorageContext.from_defaults()
    index = VectorStoreIndex.from_documents(documents)
    index.storage_context.persist(persist_dir=PERSIST_DIR)

def handle_query(query):
    storage_context = StorageContext.from_defaults(persist_dir=PERSIST_DIR)
    index = load_index_from_storage(storage_context)
    chat_text_qa_msgs = [
    (
        "user",
        """You are a Q&A assistant named ChatPDF. You have a specific response programmed for when users specifically ask about your creator, Suriya. The response is: "I was created by Suriya, an enthusiast in Artificial Intelligence. He is dedicated to solving complex problems and delivering innovative solutions. With a strong focus on machine learning, deep learning, Python, generative AI, NLP, and computer vision, Suriya is passionate about pushing the boundaries of AI to explore new possibilities." For all other inquiries, your main goal is to provide answers as accurately as possible, based on the instructions and context you have been given. If a question does not match the provided context or is outside the scope of the document, kindly advise the user to ask questions within the context of the document.
        Context:
        {context_str}
        Question:
        {query_str}
        """
    )
    ]
    text_qa_template = ChatPromptTemplate.from_messages(chat_text_qa_msgs)

    query_engine = index.as_query_engine(text_qa_template=text_qa_template)
    answer = query_engine.query(query)

    if hasattr(answer, 'response'):
        return answer.response
    elif isinstance(answer, dict) and 'response' in answer:
        return answer['response']
    else:
        return "Sorry, I couldn't find an answer."

# Streamlit app initialization
# Language selection
selected_language = st.sidebar.selectbox("Wybierz język / Select Language / Sprache auswählen", ("pl", "en", "de"))

# Display description based on selected language
st.markdown(descriptions[selected_language])

if 'messages' not in st.session_state:
    st.session_state.messages = [{'role': 'assistant', "content": 'Hello! Upload a PDF and ask me anything about its content.'}]

with st.sidebar:
    st.title("Menu:")
    uploaded_file = st.file_uploader("Upload your PDF Files and Click on the Submit & Process Button")
    if st.button("Submit & Process"):
        with st.spinner("Processing..."):
            filepath = "data/saved_pdf.pdf"
            with open(filepath, "wb") as f:
                f.write(uploaded_file.getbuffer())
            data_ingestion()  # Process PDF every time new file is uploaded
            st.success("Done")

user_prompt = st.chat_input("Ask me anything about the content of the PDF:")
if user_prompt:
    st.session_state.messages.append({'role': 'user', "content": user_prompt})
    response = handle_query(user_prompt)
    st.session_state.messages.append({'role': 'assistant', "content": response})

for message in st.session_state.messages:
    with st.chat_message(message['role']):
        st.write(message['content'])

In [None]:
streamlit
python-dotenv
llama-index
llama-index-embeddings-huggingface
llama-index-llms-huggingface
numpy
llama-index
transformers
python-dotenv
torch

In [2]:
!pip install -r a.txt



In [1]:
!huggingface-cli login --token ْْْْْْْْْْXXXXXXX

The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: read).
The token `read` has been saved to /root/.cache/huggingface/stored_tokens
Your token has been saved to /root/.cache/huggingface/token
Login successful.
The current active token is: `read`


In [None]:
google/gemma-1.1-7b-it

In [None]:
import numpy as np
import os
import base64
from llama_index.core import StorageContext, load_index_from_storage, VectorStoreIndex, SimpleDirectoryReader, ChatPromptTemplate
from llama_index.llms.huggingface import HuggingFaceLLM
from transformers import pipeline
from dotenv import load_dotenv
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.core import Settings
import torch # Import torch if you are using torch_dtype

# Load environment variables (optional, if you still need dotenv for other things)
load_dotenv()

# Configure the Llama index settings
# Load the Gemma model using Hugging Face pipeline
hf_pipeline = pipeline(
    "text-generation",
    model="meta-llama/Llama-3.2-1B-Instruct",
    tokenizer="meta-llama/Llama-3.2-1B-Instruct",
    torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32, # Use float16 if CUDA is available, otherwise float32
    device_map="auto" if torch.cuda.is_available() else "cpu", # Use GPU if available, otherwise CPU
)

Settings.llm = HuggingFaceLLM(
    pipeline=hf_pipeline,
)

Settings.embed_model = HuggingFaceEmbedding(
    model_name="BAAI/bge-small-en-v1.5"
)

# Define the directory for persistent storage and data
PERSIST_DIR = "./db"
DATA_DIR = "data"

# Ensure data directory exists
os.makedirs(DATA_DIR, exist_ok=True)
os.makedirs(PERSIST_DIR, exist_ok=True)

# Language descriptions (not used in non-UI version, but kept for reference)
descriptions = {
    "pl": """
    # ChatPDF (Non-UI)
    **ChatPDF** to zaawansowane narzędzie oparte na sztucznej inteligencji, zaprojektowane do analizy i generowania odpowiedzi na pytania związane z treścią załadowanych dokumentów PDF. Działa w trybie wiersza poleceń.
    **Technologie**:
    - Model: Gemma 1.1-7b-it (Lokalnie)
    - Stworzony przez: Rafał Dembski
    - Technologie: LlamaIndex, PyTorch, Transformers
    """,
    "en": """
    # ChatPDF (Non-UI)
    **ChatPDF** is an advanced AI-powered tool designed to analyze and generate answers to questions related to the content of uploaded PDF documents. Runs in command-line mode.
    **Technologies**:
    - Model: Gemma 1.1-7b-it (Local)
    - Developed by: Rafał Dembski
    - Technologies: LlamaIndex, PyTorch, Transformers
    """,
    "de": """
    # ChatPDF (Non-UI)
    **ChatPDF** ist ein fortschrittliches, KI-gesteuertes Tool, das entwickelt wurde, um Fragen zur Analyse und Beantwortung von Fragen im Zusammenhang mit dem Inhalt hochgeladener PDF-Dokumente zu generieren. Läuft im Kommandozeilenmodus.
    **Technologien**:
    - Modell: Gemma 1.1-7b-it (Lokal)
    - Entwickelt von: Rafał Dembski
    - Technologien: LlamaIndex, PyTorch, Transformers
    """
}

def data_ingestion():
    documents = SimpleDirectoryReader(DATA_DIR).load_data()
    storage_context = StorageContext.from_defaults()
    index = VectorStoreIndex.from_documents(documents)
    index.storage_context.persist(persist_dir=PERSIST_DIR)
    print("PDF data ingested and vector store created/updated.")

def handle_query(query):
    storage_context = StorageContext.from_defaults(persist_dir=PERSIST_DIR)
    index = load_index_from_storage(storage_context)
    chat_text_qa_msgs = [
    (
        "user",
        """You are a Q&A assistant named ChatPDF. You have a specific response programmed for when users specifically ask about your creator, Suriya. The response is: "I was created by Suriya, an enthusiast in Artificial Intelligence. He is dedicated to solving complex problems and delivering innovative solutions. With a strong focus on machine learning, deep learning, Python, generative AI, NLP, and computer vision, Suriya is passionate about pushing the boundaries of AI to explore new possibilities." For all other inquiries, your main goal is to provide answers as accurately as possible, based on the instructions and context you have been given. If a question does not match the provided context or is outside the scope of the document, kindly advise the user to ask questions within the context of the document.
        Context:
        {context_str}
        Question:
        {query_str}
        """
    )
    ]
    text_qa_template = ChatPromptTemplate.from_messages(chat_text_qa_msgs)

    query_engine = index.as_query_engine(text_qa_template=text_qa_template)
    answer = query_engine.query(query)

    if hasattr(answer, 'response'):
        return answer.response
    elif isinstance(answer, dict) and 'response' in answer:
        return answer['response']
    else:
        return "Sorry, I couldn't find an answer."

# Main function to run without UI
def main():
    pdf_file_path = "/content/sample.pdf" # Path to your PDF file
    user_question = "What is the main topic of this document?" # Your question

    # Check if PDF data directory is empty, or vector store doesn't exist, then ingest data
    if not os.listdir(DATA_DIR) or not os.path.exists(PERSIST_DIR):
        print("No PDF data or vector store found. Processing PDF...")
        data_ingestion()
    else:
        print("Vector store already exists. Loading existing store.")

    print(f"Question: {user_question}")
    response = handle_query(user_question)
    print(f"Answer: {response}")


if __name__ == "__main__":
    main()

In [1]:
from llama_index.llms.huggingface import HuggingFaceLLM
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.core import Settings
import torch
from transformers import pipeline # Make sure to keep this import

# ... other imports and code ...

# Configure the Llama index settings
# Initialize HuggingFaceLLM directly with model_name
Settings.llm = HuggingFaceLLM(
    model_name="google/gemma-1.1-7b-it",
    tokenizer_name="google/gemma-1.1-7b-it", # You can explicitly set tokenizer_name, though often it's inferred from model_name
    tokenizer_kwargs={"torch_dtype": torch.float16 if torch.cuda.is_available() else torch.float32}, # Tokenizer kwargs
    model_kwargs={"torch_dtype": torch.float16 if torch.cuda.is_available() else torch.float32, "device_map": "auto" if torch.cuda.is_available() else "cpu"}, # Model kwargs
    context_window=3000, # Keep other parameters if you need them, adjust as necessary
    max_new_tokens=512,
    generate_kwargs={"temperature": 0.1},
)

Settings.embed_model = HuggingFaceEmbedding(
    model_name="BAAI/bge-small-en-v1.5"
)

# ... rest of your code ...



import numpy as np
import os
import base64
from llama_index.core import StorageContext, load_index_from_storage, VectorStoreIndex, SimpleDirectoryReader, ChatPromptTemplate
from llama_index.llms.huggingface import HuggingFaceLLM
from transformers import pipeline
from dotenv import load_dotenv
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.core import Settings
import torch # Import torch if you are using torch_dtype
from llama_index.llms.huggingface import HuggingFaceLLM
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.core import Settings
import torch
from transformers import pipeline
# Load environment variables (optional, if you still need dotenv for other things)
load_dotenv()


Settings.llm = HuggingFaceLLM(
    model_name="meta-llama/Llama-3.2-1B-Instruct",
    tokenizer_name="meta-llama/Llama-3.2-1B-Instruct", # You can explicitly set tokenizer_name, though often it's inferred from model_name
    tokenizer_kwargs={"torch_dtype": torch.float16 if torch.cuda.is_available() else torch.float32}, # Tokenizer kwargs
    model_kwargs={"torch_dtype": torch.float16 if torch.cuda.is_available() else torch.float32}, # Model kwargs - REMOVED device_map HERE
    context_window=3000, # Keep other parameters if you need them, adjust as necessary
    max_new_tokens=5,
    generate_kwargs={"temperature": 0.1},
)

Settings.embed_model = HuggingFaceEmbedding(
    model_name="BAAI/bge-small-en-v1.5"
)










# Define the directory for persistent storage and data
PERSIST_DIR = "./db"
DATA_DIR = "data"

# Ensure data directory exists
os.makedirs(DATA_DIR, exist_ok=True)
os.makedirs(PERSIST_DIR, exist_ok=True)

# Language descriptions (not used in non-UI version, but kept for reference)
descriptions = {
    "pl": """
    # ChatPDF (Non-UI)
    **ChatPDF** to zaawansowane narzędzie oparte na sztucznej inteligencji, zaprojektowane do analizy i generowania odpowiedzi na pytania związane z treścią załadowanych dokumentów PDF. Działa w trybie wiersza poleceń.
    **Technologie**:
    - Model: Gemma 1.1-7b-it (Lokalnie)
    - Stworzony przez: Rafał Dembski
    - Technologie: LlamaIndex, PyTorch, Transformers
    """,
    "en": """
    # ChatPDF (Non-UI)
    **ChatPDF** is an advanced AI-powered tool designed to analyze and generate answers to questions related to the content of uploaded PDF documents. Runs in command-line mode.
    **Technologies**:
    - Model: Gemma 1.1-7b-it (Local)
    - Developed by: Rafał Dembski
    - Technologies: LlamaIndex, PyTorch, Transformers
    """,
    "de": """
    # ChatPDF (Non-UI)
    **ChatPDF** ist ein fortschrittliches, KI-gesteuertes Tool, das entwickelt wurde, um Fragen zur Analyse und Beantwortung von Fragen im Zusammenhang mit dem Inhalt hochgeladener PDF-Dokumente zu generieren. Läuft im Kommandozeilenmodus.
    **Technologien**:
    - Modell: Gemma 1.1-7b-it (Lokal)
    - Entwickelt von: Rafał Dembski
    - Technologien: LlamaIndex, PyTorch, Transformers
    """
}

def data_ingestion():
    documents = SimpleDirectoryReader(DATA_DIR).load_data()
    storage_context = StorageContext.from_defaults()
    index = VectorStoreIndex.from_documents(documents)
    index.storage_context.persist(persist_dir=PERSIST_DIR)
    print("PDF data ingested and vector store created/updated.")

def handle_query(query):
    storage_context = StorageContext.from_defaults(persist_dir=PERSIST_DIR)
    index = load_index_from_storage(storage_context)
    chat_text_qa_msgs = [
    (
        "user",
        """You are a Q&A assistant named ChatPDF. You have a specific response programmed for when users specifically ask about your creator, Suriya. The response is: "I was created by Suriya, an enthusiast in Artificial Intelligence. He is dedicated to solving complex problems and delivering innovative solutions. With a strong focus on machine learning, deep learning, Python, generative AI, NLP, and computer vision, Suriya is passionate about pushing the boundaries of AI to explore new possibilities." For all other inquiries, your main goal is to provide answers as accurately as possible, based on the instructions and context you have been given. If a question does not match the provided context or is outside the scope of the document, kindly advise the user to ask questions within the context of the document.
        Context:
        {context_str}
        Question:
        {query_str}
        """
    )
    ]
    text_qa_template = ChatPromptTemplate.from_messages(chat_text_qa_msgs)

    query_engine = index.as_query_engine(text_qa_template=text_qa_template)
    answer = query_engine.query(query)

    if hasattr(answer, 'response'):
        return answer.response
    elif isinstance(answer, dict) and 'response' in answer:
        return answer['response']
    else:
        return "Sorry, I couldn't find an answer."

# Main function to run without UI
def main():
    pdf_file_path = "/content/sample.pdf" # Path to your PDF file
    user_question = "What is the main topic of this document?" # Your question

    # Check if PDF data directory is empty, or vector store doesn't exist, then ingest data
    if not os.listdir(DATA_DIR) or not os.path.exists(PERSIST_DIR):
        print("No PDF data or vector store found. Processing PDF...")
        data_ingestion()
    else:
        print("Vector store already exists. Loading existing store.")

    print(f"Question: {user_question}")
    response = handle_query(user_question)
    print(f"Answer: {response}")


if __name__ == "__main__":
    main()

TypeError: transformers.models.auto.auto_factory._BaseAutoModelClass.from_pretrained() got multiple values for keyword argument 'device_map'

In [None]:
Settings.llm = HuggingFaceLLM(
    model_name="google/gemma-1.1-7b-it",
    tokenizer_name="google/gemma-1.1-7b-it", # You can explicitly set tokenizer_name, though often it's inferred from model_name
    tokenizer_kwargs={"torch_dtype": torch.float16 if torch.cuda.is_available() else torch.float32}, # Tokenizer kwargs
    model_kwargs={"torch_dtype": torch.float16 if torch.cuda.is_available() else torch.float32}, # Model kwargs - REMOVED device_map HERE
    context_window=3000, # Keep other parameters if you need them, adjust as necessary
    max_new_tokens=512,
    generate_kwargs={"temperature": 0.1},
)

In [1]:

import numpy as np
import os
import base64
from llama_index.core import StorageContext, load_index_from_storage, VectorStoreIndex, SimpleDirectoryReader, ChatPromptTemplate
from llama_index.llms.huggingface import HuggingFaceLLM
from transformers import pipeline
from dotenv import load_dotenv
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.core import Settings
import torch # Import torch if you are using torch_dtype
from llama_index.llms.huggingface import HuggingFaceLLM
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.core import Settings
import torch
from transformers import pipeline
# Load environment variables (optional, if you still need dotenv for other things)
load_dotenv()


Settings.llm = HuggingFaceLLM(
    model_name="meta-llama/Llama-3.2-1B-Instruct",
    tokenizer_name="meta-llama/Llama-3.2-1B-Instruct", # You can explicitly set tokenizer_name, though often it's inferred from model_name
    tokenizer_kwargs={"torch_dtype": torch.float16 if torch.cuda.is_available() else torch.float32}, # Tokenizer kwargs
    model_kwargs={"torch_dtype": torch.float16 if torch.cuda.is_available() else torch.float32}, # Model kwargs - REMOVED device_map HERE
    context_window=3000, # Keep other parameters if you need them, adjust as necessary
    max_new_tokens=5,
    generate_kwargs={"temperature": 0.1},
)

Settings.embed_model = HuggingFaceEmbedding(
    model_name="BAAI/bge-small-en-v1.5"
)










# Define the directory for persistent storage and data
PERSIST_DIR = "./db"
DATA_DIR = "data"

# Ensure data directory exists
os.makedirs(DATA_DIR, exist_ok=True)
os.makedirs(PERSIST_DIR, exist_ok=True)

# Language descriptions (not used in non-UI version, but kept for reference)
descriptions = {
    "pl": """
    # ChatPDF (Non-UI)
    **ChatPDF** to zaawansowane narzędzie oparte na sztucznej inteligencji, zaprojektowane do analizy i generowania odpowiedzi na pytania związane z treścią załadowanych dokumentów PDF. Działa w trybie wiersza poleceń.
    **Technologie**:
    - Model: Gemma 1.1-7b-it (Lokalnie)
    - Stworzony przez: Rafał Dembski
    - Technologie: LlamaIndex, PyTorch, Transformers
    """,
    "en": """
    # ChatPDF (Non-UI)
    **ChatPDF** is an advanced AI-powered tool designed to analyze and generate answers to questions related to the content of uploaded PDF documents. Runs in command-line mode.
    **Technologies**:
    - Model: Gemma 1.1-7b-it (Local)
    - Developed by: Rafał Dembski
    - Technologies: LlamaIndex, PyTorch, Transformers
    """,
    "de": """
    # ChatPDF (Non-UI)
    **ChatPDF** ist ein fortschrittliches, KI-gesteuertes Tool, das entwickelt wurde, um Fragen zur Analyse und Beantwortung von Fragen im Zusammenhang mit dem Inhalt hochgeladener PDF-Dokumente zu generieren. Läuft im Kommandozeilenmodus.
    **Technologien**:
    - Modell: Gemma 1.1-7b-it (Lokal)
    - Entwickelt von: Rafał Dembski
    - Technologien: LlamaIndex, PyTorch, Transformers
    """
}

def data_ingestion():
    documents = SimpleDirectoryReader(DATA_DIR).load_data()
    storage_context = StorageContext.from_defaults()
    index = VectorStoreIndex.from_documents(documents)
    index.storage_context.persist(persist_dir=PERSIST_DIR)
    print("PDF data ingested and vector store created/updated.")

def handle_query(query):
    storage_context = StorageContext.from_defaults(persist_dir=PERSIST_DIR)
    index = load_index_from_storage(storage_context)
    chat_text_qa_msgs = [
    (
        "user",
        """You are a Q&A assistant named ChatPDF. You have a specific response programmed for when users specifically ask about your creator, Suriya. The response is: "I was created by Suriya, an enthusiast in Artificial Intelligence. He is dedicated to solving complex problems and delivering innovative solutions. With a strong focus on machine learning, deep learning, Python, generative AI, NLP, and computer vision, Suriya is passionate about pushing the boundaries of AI to explore new possibilities." For all other inquiries, your main goal is to provide answers as accurately as possible, based on the instructions and context you have been given. If a question does not match the provided context or is outside the scope of the document, kindly advise the user to ask questions within the context of the document.
        Context:
        {context_str}
        Question:
        {query_str}
        """
    )
    ]
    text_qa_template = ChatPromptTemplate.from_messages(chat_text_qa_msgs)

    query_engine = index.as_query_engine(text_qa_template=text_qa_template)
    answer = query_engine.query(query)

    if hasattr(answer, 'response'):
        return answer.response
    elif isinstance(answer, dict) and 'response' in answer:
        return answer['response']
    else:
        return "Sorry, I couldn't find an answer."

# Main function to run without UI
def main():
    pdf_file_path = "/content/data/sample.pdf" # Path to your PDF file
    user_question = "What is the main topic of this document?" # Your question

    # Check if PDF data directory is empty, or vector store doesn't exist, then ingest data
    if not os.listdir(DATA_DIR) or not os.path.exists(PERSIST_DIR):
        print("No PDF data or vector store found. Processing PDF...")
        data_ingestion()
    else:
        print("Vector store already exists. Loading existing store.")

    print(f"Question: {user_question}")
    response = handle_query(user_question)
    print(f"Answer: {response}")


if __name__ == "__main__":
    main()

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Vector store already exists. Loading existing store.
Question: What is the main topic of this document?


FileNotFoundError: [Errno 2] No such file or directory: '/content/db/docstore.json'

In [None]:

Settings.llm = HuggingFaceLLM(
    model_name="meta-llama/Llama-3.2-1B-Instruct",
    tokenizer_name="meta-llama/Llama-3.2-1B-Instruct", # You can explicitly set tokenizer_name, though often it's inferred from model_name
    tokenizer_kwargs={"torch_dtype": torch.float16 if torch.cuda.is_available() else torch.float32}, # Tokenizer kwargs
    model_kwargs={"torch_dtype": torch.float16 if torch.cuda.is_available() else torch.float32}, # Model kwargs - REMOVED device_map HERE
    context_window=3000, # Keep other parameters if you need them, adjust as necessary
    max_new_tokens=5,
    generate_kwargs={"temperature": 0.1},
)

In [None]:
import numpy as np
import os
import base64
from llama_index.core import StorageContext, load_index_from_storage, VectorStoreIndex, SimpleDirectoryReader, ChatPromptTemplate
from llama_index.llms.huggingface import HuggingFaceLLM
from transformers import pipeline
from dotenv import load_dotenv
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.core import Settings
import torch # Import torch if you are using torch_dtype
from llama_index.llms.huggingface import HuggingFaceLLM
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.core import Settings
import torch
from transformers import pipeline
# Load environment variables (optional, if you still need dotenv for other things)
load_dotenv()


Settings.llm = HuggingFaceLLM(
    model_name="meta-llama/Llama-3.2-1B-Instruct",
    tokenizer_name="meta-llama/Llama-3.2-1B-Instruct",
    context_window=3000,
    max_new_tokens=5,
    generate_kwargs={"temperature": 0.1},
)

Settings.embed_model = HuggingFaceEmbedding(
    model_name="BAAI/bge-small-en-v1.5"
)


# Define the directory for persistent storage and data
PERSIST_DIR = os.path.abspath("./db") # Use absolute path
DATA_DIR = "data" # Keep as relative for now, adjust if needed

# Ensure data directory exists
os.makedirs(DATA_DIR, exist_ok=True)
os.makedirs(PERSIST_DIR, exist_ok=True)

# ... (descriptions dictionary - unchanged) ...

def data_ingestion():
    print("Starting data ingestion...") # Debug print
    try:
        documents = SimpleDirectoryReader(DATA_DIR).load_data()
        print(f"Loaded {len(documents)} documents.") # Debug print
        storage_context = StorageContext.from_defaults()
        index = VectorStoreIndex.from_documents(documents)
        index.storage_context.persist(persist_dir=PERSIST_DIR)
        print(f"Vector store persisted to: {PERSIST_DIR}") # Debug print
        print("PDF data ingested and vector store created/updated.")
    except Exception as e:
        print(f"Error during data ingestion: {e}") # Error handling
        raise # Re-raise the exception for debugging


def handle_query(query):
    storage_context = StorageContext.from_defaults(persist_dir=PERSIST_DIR)
    print(f"Loading vector store from: {PERSIST_DIR}") # Debug print
    index = load_index_from_storage(storage_context) # This is where FileNotFoundError occurs
    # ... (rest of handle_query - unchanged) ...


# Main function to run without UI
def main():
    pdf_file_path = "/content/data/sample.pdf" # Path to your PDF file
    user_question = "What is the main topic of this document?" # Your question

    # Force data ingestion for debugging
    print("Forcing data ingestion for debugging...") # Debug print
    data_ingestion() # Always run data ingestion

    print("Vector store should be created/updated now.") # Debug print

    print(f"Question: {user_question}")
    response = handle_query(user_question)
    print(f"Answer: {response}")


if __name__ == "__main__":
    main()

In [1]:
import numpy as np
import os
import base64
from llama_index.core import StorageContext, load_index_from_storage, VectorStoreIndex, SimpleDirectoryReader, ChatPromptTemplate
from llama_index.llms.huggingface import HuggingFaceLLM
from transformers import pipeline
from dotenv import load_dotenv
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.core import Settings
import torch  # Import torch if you are using torch_dtype

# Load environment variables (optional, if you still need dotenv for other things)
load_dotenv()

# Configure the Llama index settings
Settings.llm = HuggingFaceLLM(
    model_name="meta-llama/Llama-3.2-1B-Instruct",
    tokenizer_name="meta-llama/Llama-3.2-1B-Instruct",
    context_window=3000,
    max_new_tokens=5,  # Reduced max_new_tokens for testing, adjust as needed
    generate_kwargs={"temperature": 0.1},
)

Settings.embed_model = HuggingFaceEmbedding(
    model_name="BAAI/bge-small-en-v1.5"
)


# Define the directory for persistent storage and data
PERSIST_DIR = os.path.abspath("./db")  # Use absolute path
DATA_DIR = "data"  # Keep as relative for now, adjust if needed

# Ensure data directory exists
os.makedirs(DATA_DIR, exist_ok=True)
os.makedirs(PERSIST_DIR, exist_ok=True)

# Language descriptions (not used in non-UI version, but kept for reference)
descriptions = {
    "pl": """
    # ChatPDF (Non-UI)
    **ChatPDF** to zaawansowane narzędzie oparte na sztucznej inteligencji, zaprojektowane do analizy i generowania odpowiedzi na pytania związane z treścią załadowanych dokumentów PDF. Działa w trybie wiersza poleceń.
    **Technologie**:
    - Model: Llama-3.2-1B-Instruct (Lokalnie)
    - Stworzony przez: Rafał Dembski
    - Technologie: LlamaIndex, PyTorch, Transformers
    """,
    "en": """
    # ChatPDF (Non-UI)
    **ChatPDF** is an advanced AI-powered tool designed to analyze and generate answers to questions related to the content of uploaded PDF documents. Runs in command-line mode.
    **Technologies**:
    - Model: Llama-3.2-1B-Instruct (Local)
    - Developed by: Rafał Dembski
    - Technologies: LlamaIndex, PyTorch, Transformers
    """,
    "de": """
    # ChatPDF (Non-UI)
    **ChatPDF** ist ein fortschrittliches, KI-gesteuertes Tool, das entwickelt wurde, um Fragen zur Analyse und Beantwortung von Fragen im Zusammenhang mit dem Inhalt hochgeladener PDF-Dokumente zu generieren. Läuft im Kommandozeilenmodus.
    **Technologien**:
    - Modell: Llama-3.2-1B-Instruct (Lokal)
    - Entwickelt von: Rafał Dembski
    - Technologien: LlamaIndex, PyTorch, Transformers
    """
}

def data_ingestion():
    print("Starting data ingestion...")  # Debug print
    try:
        print(f"DATA_DIR is: {DATA_DIR}")  # Debug print
        print(f"Contents of DATA_DIR: {os.listdir(DATA_DIR)}")  # Debug print
        pdf_file_path = os.path.abspath(os.path.join(DATA_DIR, "sample.pdf"))  # Assuming "sample.pdf" in DATA_DIR
        print(f"Attempting to load PDF from path: {pdf_file_path}")  # Debug print
        documents = SimpleDirectoryReader(input_files=[pdf_file_path]).load_data()  # Load specific file
        print(f"Loaded {len(documents)} documents.")  # Debug print
        storage_context = StorageContext.from_defaults()
        index = VectorStoreIndex.from_documents(documents)
        index.storage_context.persist(persist_dir=PERSIST_DIR)
        print(f"Vector store persisted to: {PERSIST_DIR}")  # Debug print
        print("PDF data ingested and vector store created/updated.")
    except Exception as e:
        print(f"Error during data ingestion: {e}")  # Error handling
        raise  # Re-raise the exception for debugging


def handle_query(query):
    storage_context = StorageContext.from_defaults(persist_dir=PERSIST_DIR)
    print(f"Loading vector store from: {PERSIST_DIR}")  # Debug print
    index = load_index_from_storage(storage_context)  # This is where FileNotFoundError occurred
    chat_text_qa_msgs = [
        (
            "user",
            """You are a Q&A assistant named ChatPDF. You have a specific response programmed for when users specifically ask about your creator, Suriya. The response is: "I was created by Suriya, an enthusiast in Artificial Intelligence. He is dedicated to solving complex problems and delivering innovative solutions. With a strong focus on machine learning, deep learning, Python, generative AI, NLP, and computer vision, Suriya is passionate about pushing the boundaries of AI to explore new possibilities." For all other inquiries, your main goal is to provide answers as accurately as possible, based on the instructions and context you have been given. If a question does not match the provided context or is outside the scope of the document, kindly advise the user to ask questions within the context of the document.
        Context:
        {context_str}
        Question:
        {query_str}
        """
        )
    ]
    text_qa_template = ChatPromptTemplate.from_messages(chat_text_qa_msgs)

    query_engine = index.as_query_engine(text_qa_template=text_qa_template)
    answer = query_engine.query(query)

    if hasattr(answer, 'response'):
        return answer.response
    elif isinstance(answer, dict) and 'response' in answer:
        return answer['response']
    else:
        return "Sorry, I couldn't find an answer."


# Main function to run without UI
def main():
    pdf_file_path = "/content/data/sample.pdf"  # Path to your PDF file
    user_question = "What is the main topic of this document?"  # Your question

    # Force data ingestion for debugging - always run it
    print("Forcing data ingestion for debugging...")  # Debug print
    data_ingestion()  # Always run data ingestion
    print("Vector store should be created/updated now.")  # Debug print

    print(f"Question: {user_question}")
    response = handle_query(user_question)
    print(f"Answer: {response}")


if __name__ == "__main__":
    main()

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Forcing data ingestion for debugging...
Starting data ingestion...
DATA_DIR is: data
Contents of DATA_DIR: ['sample.pdf']
Attempting to load PDF from path: /content/data/sample.pdf
Loaded 6 documents.
Vector store persisted to: /content/db
PDF data ingested and vector store created/updated.
Vector store should be created/updated now.
Question: What is the main topic of this document?
Loading vector store from: /content/db


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Answer: The main topic of this


In [1]:
import numpy as np
import os
import base64
from llama_index.core import StorageContext, load_index_from_storage, VectorStoreIndex, SimpleDirectoryReader, ChatPromptTemplate
from llama_index.llms.huggingface import HuggingFaceLLM
from transformers import pipeline
from dotenv import load_dotenv
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.core import Settings
import torch  # Import torch if you are using torch_dtype

# Load environment variables (optional, if you still need dotenv for other things)
load_dotenv()

# Configure the Llama index settings
Settings.llm = HuggingFaceLLM(
    model_name="meta-llama/Llama-3.2-1B-Instruct",
    tokenizer_name="meta-llama/Llama-3.2-1B-Instruct",
    context_window=3000,
    max_new_tokens=55,  # Reduced max_new_tokens for testing, adjust as needed
    generate_kwargs={"temperature": 0.1},
)

Settings.embed_model = HuggingFaceEmbedding(
    model_name="BAAI/bge-small-en-v1.5"
)


# Define the directory for persistent storage and data
PERSIST_DIR = os.path.abspath("./db")  # Use absolute path
DATA_DIR = "data"  # Keep as relative for now, adjust if needed

# Ensure data directory exists
os.makedirs(DATA_DIR, exist_ok=True)
os.makedirs(PERSIST_DIR, exist_ok=True)

# Language descriptions (not used in non-UI version, but kept for reference)
descriptions = {
    "pl": """
    # ChatPDF (Non-UI)
    **ChatPDF** to zaawansowane narzędzie oparte na sztucznej inteligencji, zaprojektowane do analizy i generowania odpowiedzi na pytania związane z treścią załadowanych dokumentów PDF. Działa w trybie wiersza poleceń.
    **Technologie**:
    - Model: Llama-3.2-1B-Instruct (Lokalnie)
    - Stworzony przez: Rafał Dembski
    - Technologie: LlamaIndex, PyTorch, Transformers
    """,
    "en": """
    # ChatPDF (Non-UI)
    **ChatPDF** is an advanced AI-powered tool designed to analyze and generate answers to questions related to the content of uploaded PDF documents. Runs in command-line mode.
    **Technologies**:
    - Model: Llama-3.2-1B-Instruct (Local)
    - Developed by: Rafał Dembski
    - Technologies: LlamaIndex, PyTorch, Transformers
    """,
    "de": """
    # ChatPDF (Non-UI)
    **ChatPDF** ist ein fortschrittliches, KI-gesteuertes Tool, das entwickelt wurde, um Fragen zur Analyse und Beantwortung von Fragen im Zusammenhang mit dem Inhalt hochgeladener PDF-Dokumente zu generieren. Läuft im Kommandozeilenmodus.
    **Technologien**:
    - Modell: Llama-3.2-1B-Instruct (Lokal)
    - Entwickelt von: Rafał Dembski
    - Technologien: LlamaIndex, PyTorch, Transformers
    """
}

def data_ingestion():
    print("Starting data ingestion...")  # Debug print
    try:
        print(f"DATA_DIR is: {DATA_DIR}")  # Debug print
        print(f"Contents of DATA_DIR: {os.listdir(DATA_DIR)}")  # Debug print
        pdf_file_path = os.path.abspath(os.path.join(DATA_DIR, "sample.pdf"))  # Assuming "sample.pdf" in DATA_DIR
        print(f"Attempting to load PDF from path: {pdf_file_path}")  # Debug print
        documents = SimpleDirectoryReader(input_files=[pdf_file_path]).load_data()  # Load specific file
        print(f"Loaded {len(documents)} documents.")  # Debug print
        storage_context = StorageContext.from_defaults()
        index = VectorStoreIndex.from_documents(documents)
        index.storage_context.persist(persist_dir=PERSIST_DIR)
        print(f"Vector store persisted to: {PERSIST_DIR}")  # Debug print
        print("PDF data ingested and vector store created/updated.")
    except Exception as e:
        print(f"Error during data ingestion: {e}")  # Error handling
        raise  # Re-raise the exception for debugging


def handle_query(query):
    storage_context = StorageContext.from_defaults(persist_dir=PERSIST_DIR)
    print(f"Loading vector store from: {PERSIST_DIR}")  # Debug print
    index = load_index_from_storage(storage_context)  # This is where FileNotFoundError occurred
    chat_text_qa_msgs = [
        (
            "user",
            """You are a Q&A assistant named ChatPDF. You have a specific response programmed for when users specifically ask about your creator, Suriya. The response is: "I was created by Suriya, an enthusiast in Artificial Intelligence. He is dedicated to solving complex problems and delivering innovative solutions. With a strong focus on machine learning, deep learning, Python, generative AI, NLP, and computer vision, Suriya is passionate about pushing the boundaries of AI to explore new possibilities." For all other inquiries, your main goal is to provide answers as accurately as possible, based on the instructions and context you have been given. If a question does not match the provided context or is outside the scope of the document, kindly advise the user to ask questions within the context of the document.
        Context:
        {context_str}
        Question:
        {query_str}
        """
        )
    ]
    text_qa_template = ChatPromptTemplate.from_messages(chat_text_qa_msgs)

    query_engine = index.as_query_engine(text_qa_template=text_qa_template)
    answer = query_engine.query(query)

    if hasattr(answer, 'response'):
        return answer.response
    elif isinstance(answer, dict) and 'response' in answer:
        return answer['response']
    else:
        return "Sorry, I couldn't find an answer."


# Main function to run without UI
def main():
    pdf_file_path = "/content/data/sample.pdf"  # Path to your PDF file
    user_question = "What is the main topic of this document?"  # Your question

    # Force data ingestion for debugging - always run it
    print("Forcing data ingestion for debugging...")  # Debug print
    data_ingestion()  # Always run data ingestion
    print("Vector store should be created/updated now.")  # Debug print

    print(f"Question: {user_question}")
    response = handle_query(user_question)
    print(f"Answer: {response}")


if __name__ == "__main__":
    main()

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Forcing data ingestion for debugging...
Starting data ingestion...
DATA_DIR is: data
Contents of DATA_DIR: ['sample.pdf']
Attempting to load PDF from path: /content/data/sample.pdf
Loaded 6 documents.
Vector store persisted to: /content/db
PDF data ingested and vector store created/updated.
Vector store should be created/updated now.
Question: What is the main topic of this document?
Loading vector store from: /content/db


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Answer: The main topic of this document is the German economy and its challenges, specifically focusing on the need for reform, the impact of demographic aging, climate change, and digitalization on the economy.


In [2]:
!pip install faiss-cpu


Collecting faiss-cpu
  Downloading faiss_cpu-1.10.0-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (4.4 kB)
Downloading faiss_cpu-1.10.0-cp311-cp311-manylinux_2_28_x86_64.whl (30.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m30.7/30.7 MB[0m [31m13.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: faiss-cpu
Successfully installed faiss-cpu-1.10.0


In [3]:
!pip install PyMuPDFPro

Collecting PyMuPDFPro
  Downloading pymupdfpro-1.25.3-cp39-abi3-manylinux2014_x86_64.whl.metadata (3.6 kB)
Collecting PyMuPDF==1.25.3 (from PyMuPDFPro)
  Downloading pymupdf-1.25.3-cp39-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (3.4 kB)
Downloading pymupdfpro-1.25.3-cp39-abi3-manylinux2014_x86_64.whl (13.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.7/13.7 MB[0m [31m28.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pymupdf-1.25.3-cp39-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (20.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m20.0/20.0 MB[0m [31m21.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: PyMuPDF, PyMuPDFPro
Successfully installed PyMuPDF-1.25.3 PyMuPDFPro-1.25.3


In [5]:
!pip install -U langchain-community

Collecting langchain-community
  Downloading langchain_community-0.3.18-py3-none-any.whl.metadata (2.4 kB)
Collecting pydantic-settings<3.0.0,>=2.4.0 (from langchain-community)
  Downloading pydantic_settings-2.8.1-py3-none-any.whl.metadata (3.5 kB)
Collecting httpx-sse<1.0.0,>=0.4.0 (from langchain-community)
  Downloading httpx_sse-0.4.0-py3-none-any.whl.metadata (9.0 kB)
Downloading langchain_community-0.3.18-py3-none-any.whl (2.5 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.5/2.5 MB[0m [31m22.3 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading httpx_sse-0.4.0-py3-none-any.whl (7.8 kB)
Downloading pydantic_settings-2.8.1-py3-none-any.whl (30 kB)
Installing collected packages: httpx-sse, pydantic-settings, langchain-community
Successfully installed httpx-sse-0.4.0 langchain-community-0.3.18 pydantic-settings-2.8.1


In [6]:
import fitz  # PyMuPDF - أسرع من PyPDF2
from langchain.text_splitter import RecursiveCharacterTextSplitter
import os
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS  # استخدام faiss-cpu
from langchain.prompts import PromptTemplate
from transformers import pipeline
import torch

# Function to extract text from PDFs (using PyMuPDF for speed)
def extract_pdf_text(pdf_file_paths):
    text = ""
    for pdf_path in pdf_file_paths:
        try:
            doc = fitz.open(pdf_path)
            for page in doc:
                text += page.get_text("text")  # Faster than PyPDF2
        except Exception as e:
            print(f"Error processing PDF file at {pdf_path}: {e}")
            continue
    return text

# Function to split text into optimized chunks
def split_text_into_chunks(text):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=5000, chunk_overlap=200)
    return text_splitter.split_text(text)

# Function to create vector store using optimized HuggingFace embeddings
def create_and_save_vector_store(text_chunks):
    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/paraphrase-MiniLM-L3-v2")  # Lighter model
    vector_store = FAISS.from_texts(text_chunks, embedding=embeddings)
    vector_store.save_local("faiss_index")
    print("Vector store created and saved locally.")

# Function to create the conversational prompt template
def create_prompt_template():
    prompt_template = """
    Answer the question as detailed as possible from the provided context.
    If the answer contains any structured data like tables or lists, respond in the same format.
    If the answer is not in the provided context, say: "The answer is not available in the context." Do not make up an answer.

    Context:
    {context}

    Question:
    {question}
    """
    return PromptTemplate(template=prompt_template, input_variables=['context', 'question'])

# Function to handle user queries efficiently
def handle_user_query(user_question):
    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/paraphrase-MiniLM-L3-v2")  # Lighter model
    try:
        new_db = FAISS.load_local("faiss_index", embeddings, allow_dangerous_deserialization=True)
    except Exception as e:
        print(f"Error loading vector store: {e}. Ensure 'faiss_index' exists and is correctly created.")
        return

    docs = new_db.similarity_search(user_question)
    context = "\n\n".join([doc.page_content for doc in docs])
    prompt = create_prompt_template()
    formatted_prompt = prompt.format(context=context, question=user_question)

    # Use a smaller and optimized model for text generation
    generator = pipeline(
        'text-generation',
        model='meta-llama/Llama-3.2-1B-Instruct',  # Faster model for CPU
        torch_dtype=torch.float32,
        device='cpu'
    )

    response = generator(formatted_prompt, max_new_tokens=5, num_return_sequences=1)
    reply_text = response[0]['generated_text'] if response else "No response generated."

    print("Question:", user_question)
    print("Reply:", reply_text)

# Main function
def main():
    pdf_file_paths = [
        "/content/data/sample.pdf",  # Replace with actual PDF file paths
    ]
    user_question = "What is the main topic of these documents?"

    # Process PDF and create vector store if needed
    if not os.path.exists("faiss_index"):
        print("Processing PDF and creating vector store...")
        raw_text = extract_pdf_text(pdf_file_paths)
        if raw_text:
            text_chunks = split_text_into_chunks(raw_text)
            create_and_save_vector_store(text_chunks)
        else:
            print("No text extracted from PDFs. Skipping vector store creation.")
            return
    else:
        print("Vector store already exists. Loading existing store.")

    handle_user_query(user_question)

if __name__ == "__main__":
    main()


Processing PDF and creating vector store...


  embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/paraphrase-MiniLM-L3-v2")  # Lighter model
The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/229 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/122 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/4.04k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/629 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/69.6M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/314 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

1_Pooling%2Fconfig.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Vector store created and saved locally.


Device set to use cpu
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Question: What is the main topic of these documents?
Reply: 
    Answer the question as detailed as possible from the provided context.
    If the answer contains any structured data like tables or lists, respond in the same format.
    If the answer is not in the provided context, say: "The answer is not available in the context." Do not make up an answer.

    Context:
    von einer Stagnation der Erwerbstätigkeit aus. Einem Be-
schäftigungsaufbau im Bereich der sozialen Dienstleistun-
gen dürfte dabei zunächst ein weiterer Abbau der Beschäf-
tigung in den produzierenden Branchen gegenüberstehen. 
Angesichts eines zunehmenden qualifikatorischen Mis-
matches zwischen Arbeitsangebot und -nachfrage dürften 
die Beschäftigungsperspektiven für Arbeitslose jedoch 
weiterhin schwierig bleiben, so dass die Arbeitslosigkeit im 
Jahresdurchschnitt voraussichtlich um 120.000 Personen 
steigt. 
Bei den Verbraucherpreisen rechnet die Bundesregierung 
für dieses Jahr mit einer Inflationsrate von 2

In [1]:
import fitz  # PyMuPDF - أسرع من PyPDF2
from langchain.text_splitter import RecursiveCharacterTextSplitter
import os
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS  # استخدام faiss-cpu
from langchain.prompts import PromptTemplate
from transformers import pipeline
import torch

# Function to extract text from PDFs (using PyMuPDF for speed)
def extract_pdf_text(pdf_file_paths):
    text = ""
    for pdf_path in pdf_file_paths:
        try:
            doc = fitz.open(pdf_path)
            for page in doc:
                text += page.get_text("text")  # Faster than PyPDF2
        except Exception as e:
            print(f"Error processing PDF file at {pdf_path}: {e}")
            continue
    return text

# Function to split text into optimized chunks
def split_text_into_chunks(text):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=5000, chunk_overlap=200)
    return text_splitter.split_text(text)

# Function to create vector store using optimized HuggingFace embeddings
def create_and_save_vector_store(text_chunks):
    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/paraphrase-MiniLM-L3-v2")  # Lighter model
    vector_store = FAISS.from_texts(text_chunks, embedding=embeddings)
    vector_store.save_local("faiss_index")
    print("Vector store created and saved locally.")

# Function to create the conversational prompt template
def create_prompt_template():
    prompt_template = """
    Answer the question as detailed as possible from the provided context.
    If the answer contains any structured data like tables or lists, respond in the same format.
    If the answer is not in the provided context, say: "The answer is not available in the context." Do not make up an answer.

    Context:
    {context}

    Question:
    {question}
    """
    return PromptTemplate(template=prompt_template, input_variables=['context', 'question'])

# Function to handle user queries efficiently
def handle_user_query(user_question):
    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/paraphrase-MiniLM-L3-v2")  # Lighter model
    try:
        new_db = FAISS.load_local("faiss_index", embeddings, allow_dangerous_deserialization=True)
    except Exception as e:
        print(f"Error loading vector store: {e}. Ensure 'faiss_index' exists and is correctly created.")
        return

    docs = new_db.similarity_search(user_question)
    context = "\n\n".join([doc.page_content for doc in docs])
    prompt = create_prompt_template()
    formatted_prompt = prompt.format(context=context, question=user_question)

    # Use a smaller and optimized model for text generation
    generator = pipeline(
        'text-generation',
        model='meta-llama/Llama-3.2-1B-Instruct',  # Faster model for CPU
        torch_dtype=torch.float32,
        device='cpu'
    )

    response = generator(formatted_prompt, max_new_tokens=55, num_return_sequences=1)
    reply_text = response[0]['generated_text'] if response else "No response generated."

    print("Question:", user_question)
    print("Reply:", reply_text)

# Main function
def main():
    pdf_file_paths = [
        "/content/data/sample.pdf",  # Replace with actual PDF file paths
    ]
    user_question = "What is the main topic of these documents?"

    # Process PDF and create vector store if needed
    if not os.path.exists("faiss_index"):
        print("Processing PDF and creating vector store...")
        raw_text = extract_pdf_text(pdf_file_paths)
        if raw_text:
            text_chunks = split_text_into_chunks(raw_text)
            create_and_save_vector_store(text_chunks)
        else:
            print("No text extracted from PDFs. Skipping vector store creation.")
            return
    else:
        print("Vector store already exists. Loading existing store.")

    handle_user_query(user_question)

if __name__ == "__main__":
    main()


Vector store already exists. Loading existing store.


  embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/paraphrase-MiniLM-L3-v2")  # Lighter model
The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.
Device set to use cpu
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Question: What is the main topic of these documents?
Reply: 
    Answer the question as detailed as possible from the provided context.
    If the answer contains any structured data like tables or lists, respond in the same format.
    If the answer is not in the provided context, say: "The answer is not available in the context." Do not make up an answer.

    Context:
    von einer Stagnation der Erwerbstätigkeit aus. Einem Be-
schäftigungsaufbau im Bereich der sozialen Dienstleistun-
gen dürfte dabei zunächst ein weiterer Abbau der Beschäf-
tigung in den produzierenden Branchen gegenüberstehen. 
Angesichts eines zunehmenden qualifikatorischen Mis-
matches zwischen Arbeitsangebot und -nachfrage dürften 
die Beschäftigungsperspektiven für Arbeitslose jedoch 
weiterhin schwierig bleiben, so dass die Arbeitslosigkeit im 
Jahresdurchschnitt voraussichtlich um 120.000 Personen 
steigt. 
Bei den Verbraucherpreisen rechnet die Bundesregierung 
für dieses Jahr mit einer Inflationsrate von 2

https://huggingface.co/spaces/Raijin-ASR/RAG-chat-pdf/blob/main/app.py

https://huggingface.co/spaces/rafaldembski/PDF-CHATBOT/blob/main/app.py