In [1]:
import os
from dotenv import load_dotenv
from huggingface_hub import login
from transformers import pipeline
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Chroma
from langchain.document_loaders import TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Load environment variables
load_dotenv()
huggingface_token = os.getenv("HUGGINGFACE_TOKEN")

if not huggingface_token:
    print("HUGGINGFACE_TOKEN not found in .env file. Please add it.")
    exit()

In [3]:
# Log in to Hugging Face
try:
    login(token=huggingface_token)
    print("Successfully logged in to Hugging Face!")
except Exception as e:
    print(f"Error logging in to Hugging Face: {e}")
    exit()

Successfully logged in to Hugging Face!


In [4]:
# 1. Load Data
loader = TextLoader("sample_data.txt")  # Replace with your data file
documents = loader.load()


In [5]:
# 2. Split Text into Chunks
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
chunks = text_splitter.split_documents(documents)


In [6]:
# 3. Create Embeddings
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")

  embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")


In [7]:
# 4. Create Vector Database
vectordb = Chroma.from_documents(chunks, embeddings)

In [8]:
import torch
device = "cuda" if torch.cuda.is_available() else "cpu"

question_answerer = pipeline(
    "question-answering",
    model="bert-large-uncased-whole-word-masking-finetuned-squad",
    tokenizer="bert-large-uncased-whole-word-masking-finetuned-squad",
    device=device
)

Some weights of the model checkpoint at bert-large-uncased-whole-word-masking-finetuned-squad were not used when initializing BertForQuestionAnswering: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
- This IS expected if you are initializing BertForQuestionAnswering from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForQuestionAnswering from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Device set to use cpu


In [9]:
def ask_llm_with_context(question: str):
    """
    Answers a question using a Hugging Face LLM with context from a vector database.
    """
    # Search the vector database for relevant chunks
    relevant_chunks = vectordb.similarity_search(question, k=3)  # Retrieve top 3 relevant chunks

    # Extract the text content of the relevant chunks
    context = " ".join([doc.page_content for doc in relevant_chunks])

    # Prepare the input for the question-answering pipeline
    qa_input = {
        "question": question,
        "context": context
    }

    # Get the answer from the pipeline
    result = question_answerer(qa_input)

    print(f"Question: {question}")
    print(f"Answer: {result['answer']}")
    print(f"\nContext:\n{context}")

In [10]:
ask_llm_with_context("When was the Eiffel Tower constructed?")
print("\n---")
ask_llm_with_context("Who was the Eiffel Tower named after?")
print("\n---")
ask_llm_with_context("How tall is the Eiffel Tower?")



Question: When was the Eiffel Tower constructed?
Answer: 1887 to 1889

Context:
The Eiffel Tower is a wrought-iron lattice tower on the Champ de Mars in Paris, France.
It is named after the engineer Gustave Eiffel, whose company designed and built the tower.
Constructed from 1887 to 1889 as the centerpiece of the 1889 World's Fair, it was initially criticized by some of France's leading artists and intellectuals for its design, but it has become a global cultural icon of France and one of the most recognizable structures in the world.
The tower is 330 metres (1,083 ft) tall, about the same height as an 81-story building. Its base is square, measuring 125 metres (410 ft) on each side. During its construction, the Eiffel Tower surpassed the Washington Monument to become the tallest man-made structure in the world, a title it held for 41 years until the Chrysler Building in New York City was finished in 1930. Due to the addition of a broadcasting aerial at the top in 1957, it is now talle