In [19]:
import os
import textwrap
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
from PyPDF2 import PdfReader

# 1. Document Ingestion: Load PDF, TXT, or MD
def load_text(file_path):
    if file_path.endswith(".pdf"):
        reader = PdfReader(file_path)
        return "\n".join(page.extract_text() for page in reader.pages if page.extract_text())
    elif file_path.endswith(".txt") or file_path.endswith(".md"):
        with open(file_path, "r", encoding="utf-8") as f:
            return f.read()
    else:
        raise ValueError("Unsupported file format. Use PDF, TXT, or Markdown.")

# 2. Chunking into semantic segments
def semantic_chunking(text):
    splitter = RecursiveCharacterTextSplitter(chunk_size=512, chunk_overlap=100)
    return splitter.split_text(text)

# 3. Embedding + Vector Store (FAISS)
def build_vector_index(chunks):
    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
    vector_store = FAISS.from_texts(chunks, embedding=embeddings)
    return vector_store

# 4. Summary Generation (BART, Free model)
def summarize_with_bart(context):
    model_id = "facebook/bart-large-cnn"
    tokenizer = AutoTokenizer.from_pretrained(model_id)
    model = AutoModelForSeq2SeqLM.from_pretrained(model_id)

    inputs = tokenizer(context, return_tensors="pt", truncation=True, max_length=1024)
    summary_ids = model.generate(inputs["input_ids"], max_length=256, min_length=64, length_penalty=2.0, num_beams=4, early_stopping=True)
    return tokenizer.decode(summary_ids[0], skip_special_tokens=True)

# 5. Pretty Print Helper
def print_wrapped(title, text, width=100):
    print(f"\n {title}\n" + "-" * width)
    wrapped = textwrap.fill(text, width=width)
    print(wrapped)
    print("-" * width)

# 6. Full RAG Flow
def offline_rag_summary(file_path):
    print(" Loading document...")
    text = load_text(file_path)

    print("✂️ Chunking into semantic blocks...")
    chunks = semantic_chunking(text)

    print(" Building FAISS vector index...")
    vector_store = build_vector_index(chunks)

    print(" Retrieving top relevant chunks...")
    retriever = vector_store.as_retriever(search_type="similarity", k=5)
    docs = retriever.get_relevant_documents("Summarize this document")
    context = " ".join(doc.page_content for doc in docs)

    print(" Generating summary using BART (free)...")
    summary = summarize_with_bart(context)

    print_wrapped("Retrieved Context", context)
    print_wrapped("Final Summary", summary)

# ▶️ Run
offline_rag_summary("global_warming.pdf")


 Loading document...
✂️ Chunking into semantic blocks...
 Building FAISS vector index...
 Retrieving top relevant chunks...
 Generating summary using BART (free)...

 Retrieved Context
----------------------------------------------------------------------------------------------------
Review, 100(2), 52 -59. fully understand the implications of global warming or consider it a
significant problem for the future. However, global  warming is already happening, and some of its
devastating consequences are already being felt. It significantly impacts  biodiversity and disrupts
ecological balance. Due to the dangerous effects of global warming, many strategies need to  be
established. The report discusses global warming, outlines its causes and risks, and proposes
solutions to this urgent driving it. The rapid increase in greenhouse gases is problematic, as it
affects the environment faster than many living  organisms can a dapt. This changing and
increasingly complex world presents signific

In [20]:
offline_rag_summary("illiteracy.txt")

 Loading document...
✂️ Chunking into semantic blocks...
 Building FAISS vector index...
 Retrieving top relevant chunks...
 Generating summary using BART (free)...

 Retrieved Context
----------------------------------------------------------------------------------------------------
interpret a table about blood pressure, age, and physical activity; or compute and compare the cost
per ounce of food items. that countries with lower levels of functional illiteracy among their adult
populations tend to be those with the highest levels of scientific literacy among the lower stratum
of young people nearing the end of their formal academic studies. This correspondence suggests that
the capacity of schools to ensure students attain the functional literacy required to comprehend the
basic texts and documents associated with competent citizenship contributes to a society's level of
civic literacy.[3] The National Center for Education Statistics provides more detail.[10] Literacy
is broken dow

In [21]:
offline_rag_summary("future_of_ai.md")

 Loading document...
✂️ Chunking into semantic blocks...
 Building FAISS vector index...
 Retrieving top relevant chunks...
 Generating summary using BART (free)...

 Retrieved Context
----------------------------------------------------------------------------------------------------
## Conclusion  AI will undoubtedly shape the future of our society. However, it is essential to
address its challenges proactively and ensure its development benefits all of humanity. ## Key
Developments  - **Machine Learning & Deep Learning:** These have enabled computers to learn from
data and make decisions with minimal human intervention. - **Natural Language Processing:** With
advancements in NLP, machines can now understand, generate, and translate human languages more
effectively. - **Computer Vision:** AI-powered systems can interpret visual inputs, aiding in facial
recognition, medical imaging, and autonomous vehicles.  ## Ethical Considerations # The Future of
Artificial Intelligence  Artificial