In [5]:
# ===============================
# 📚 StudyMate: AI PDF Q&A System (Google Colab)
# ✅ Updated for OpenAI SDK v1.x
# ===============================

# Step 1: Install required packages
!pip install -q faiss-cpu PyMuPDF sentence-transformers openai ipywidgets

# Step 2: Import libraries
import fitz  # PyMuPDF
import faiss
import numpy as np
from sentence_transformers import SentenceTransformer
from IPython.display import display
import ipywidgets as widgets
from google.colab import files

# Step 3: OpenAI (new SDK) - Import & Initialize
from openai import OpenAI

# Step 3.1: Securely get your OpenAI API key from input (recommended)
api_key = input("Enter your OpenAI API key (sk-...): ").strip()

# Initialize OpenAI client with your API key
client = OpenAI(api_key=api_key)

# Step 4: Upload PDF
uploaded = files.upload()
pdf_path = list(uploaded.keys())[0]
print(f"✅ PDF uploaded: {pdf_path}")

# Step 5: Extract text from PDF
def extract_text_from_pdf(file_path):
    doc = fitz.open(file_path)
    full_text = ""
    for page in doc:
        full_text += page.get_text()
    return full_text

text = extract_text_from_pdf(pdf_path)
print(f"✅ Extracted {len(text)} characters from PDF.")

# Step 6: Chunk text into smaller pieces
def chunk_text(text, chunk_size=500):
    words = text.split()
    chunks = [' '.join(words[i:i+chunk_size]) for i in range(0, len(words), chunk_size)]
    return chunks

chunks = chunk_text(text)

# Step 7: Embed chunks and create FAISS index
model = SentenceTransformer('all-MiniLM-L6-v2')
embeddings = model.encode(chunks, convert_to_numpy=True).astype('float32')

dimension = embeddings.shape[1]
index = faiss.IndexFlatL2(dimension)
index.add(embeddings)

print(f"✅ FAISS index created with {len(chunks)} chunks.")

# Step 8: Define the question answering function using new OpenAI SDK
def answer_question(question, top_k=3):
    # Embed the question
    q_embedding = model.encode([question], convert_to_numpy=True).astype('float32')
    D, I = index.search(q_embedding, top_k)

    # Get relevant chunks
    relevant_chunks = [chunks[i] for i in I[0]]
    context = "\n".join(relevant_chunks)

    # Prepare prompt
    messages = [
        {"role": "system", "content": "You are a helpful study assistant. Answer based on the context."},
        {"role": "user", "content": f"Context:\n{context}\n\nQuestion:\n{question}"}
    ]

    # Call GPT using new SDK
    response = client.chat.completions.create(
        model="gpt-3.5-turbo",
        messages=messages,
        max_tokens=300,
        temperature=0.3
    )

    return response.choices[0].message.content.strip()

# Step 9: Interactive Q&A Widget
q_box = widgets.Text(
    value='',
    placeholder='Ask a question about the uploaded PDF...',
    description='Question:',
    layout=widgets.Layout(width='100%')
)

output_box = widgets.Output()

def handle_question(change):
    with output_box:
        output_box.clear_output()
        question = change['new']
        if question.strip() != "":
            print("🤖 Thinking...")
            try:
                answer = answer_question(question)
                print(f"\n✅ Answer:\n{answer}")
            except Exception as e:
                print(f"❌ Error: {str(e)}")

q_box.observe(handle_question, names='value')

display(q_box)
display(output_box)



Enter your OpenAI API key (sk-...): sk-...5ZMA


Saving mc-web-mech2-11-2009.pdf to mc-web-mech2-11-2009 (3).pdf
✅ PDF uploaded: mc-web-mech2-11-2009 (3).pdf
✅ Extracted 3523 characters from PDF.
✅ FAISS index created with 2 chunks.


Text(value='', description='Question:', layout=Layout(width='100%'), placeholder='Ask a question about the upl…

Output()