<a href="https://colab.research.google.com/github/yosefshatila00/capstone_project/blob/main/capstone.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [20]:
!pip install -U \
  langchain \
  langchain-community \
  langchain-core \
  langchain-text-splitters \
  langchain-groq \
  sentence-transformers \
  pinecone==3.0.3 \
  gradio




In [21]:
import os
from google.colab import userdata
groq_api =userdata.get('groq')
pinecone_api =userdata.get('pinecone')
os.environ["GROQ_API_KEY"] = groq_api
os.environ["PINECONE_API_KEY"] = pinecone_api

In [22]:
from langchain_groq import ChatGroq

llm = ChatGroq(
    model="llama-3.1-8b-instant",
    temperature=0.7
)


In [23]:
from pinecone import Pinecone, ServerlessSpec
import os

INDEX_NAME = "study-assistant"

pc = Pinecone(api_key=os.environ["PINECONE_API_KEY"])

existing_indexes = [i["name"] for i in pc.list_indexes()]

if INDEX_NAME not in existing_indexes:
    pc.create_index(
        name=INDEX_NAME,
        dimension=384,  # all-MiniLM-L6-v2
        metric="cosine",
        spec=ServerlessSpec(
            cloud="aws",
            region="us-east-1"
        )
    )

index = pc.Index(INDEX_NAME)
print("Pinecone index ready")


Pinecone index ready


In [24]:
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import Pinecone as LangchainPinecone
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_core.messages import HumanMessage


embeddings = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-MiniLM-L6-v2"
)

def ingest_notes(text):
    splitter = RecursiveCharacterTextSplitter(
        chunk_size=500,
        chunk_overlap=50
    )

    docs = splitter.create_documents([text])

    vectorstore = LangchainPinecone.from_documents(
        docs,
        embeddings,
        index_name=INDEX_NAME
    )

    return vectorstore

In [25]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

def create_rag_chain(vectorstore):
    retriever = vectorstore.as_retriever(search_kwargs={"k": 4})

    prompt = ChatPromptTemplate.from_template(
        """You are a helpful study assistant.
Use ONLY the context below to answer the question.
If the answer is not in the context, say "I don't know."

Context:
{context}

Question:
{question}

Answer:"""
    )

    rag_chain = (
        {
            "context": retriever | format_docs,
            "question": RunnablePassthrough(),
        }
        | prompt
        | llm
        | StrOutputParser()
    )

    return rag_chain


In [26]:
def simplify_notes(text):
    prompt = f"""Simplify the following notes for studying.
Use bullet points and clear language.

Notes:
{text}
"""
    response = llm.invoke([HumanMessage(content=prompt)])
    return response.content


def generate_quiz(text, n=3):
    prompt = f"""Create {n} multiple-choice questions from the text.
Each question should have 4 options (A, B, C, D).
Provide the correct answers at the end.

Text:
{text}
"""
    response = llm.invoke([HumanMessage(content=prompt)])
    return response.content

In [27]:
import gradio as gr

vectorstore = None
rag_chain = None

def upload_notes(notes):
    global vectorstore, rag_chain
    vectorstore = ingest_notes(notes)
    rag_chain = create_rag_chain(vectorstore)
    return "Notes ingested successfully!"

def qa_ui(question):
    if rag_chain is None:
        return "Please ingest notes first."
    return rag_chain.invoke(question)

def simplify_ui(notes):
    return simplify_notes(notes)

def quiz_ui(notes):
    return generate_quiz(notes)


In [28]:
import gradio as gr
with gr.Blocks() as demo:
    gr.Markdown("# üìö AI Study Assistant")

    with gr.Row():
        # üîπ SIDEBAR
        with gr.Column(scale=1):
            gr.Markdown("### üìÇ Notes")
            notes_input = gr.Textbox(
                label="Paste your notes",
                lines=12
            )
            ingest_btn = gr.Button("üì• Ingest Notes")
            status_output = gr.Textbox(
                label="Status",
                interactive=False
            )

            gr.Markdown("### üß† Study Tools")
            simplify_btn = gr.Button("üìù Simplify Notes")
            quiz_btn = gr.Button("üß™ Generate Quiz")

        # üîπ MAIN CONTENT
        with gr.Column(scale=3):
            gr.Markdown("### ü§ñ Ask a Question")
            question_input = gr.Textbox(
                label="Your Question"
            )
            answer_output = gr.Textbox(
                lines=6,
                label="Answer"
            )

            gr.Markdown("### ‚ú® Simplified Notes")
            simplified_output = gr.Textbox(
                lines=8
            )

            gr.Markdown("### üìã Quiz")
            quiz_output = gr.Textbox(
                lines=10
            )

    # üîó Button bindings
    ingest_btn.click(upload_notes, inputs=notes_input, outputs=status_output)
    simplify_btn.click(simplify_ui, inputs=notes_input, outputs=simplified_output)
    quiz_btn.click(quiz_ui, inputs=notes_input, outputs=quiz_output)
    question_input.submit(qa_ui, inputs=question_input, outputs=answer_output)

demo.launch()


It looks like you are running Gradio on a hosted Jupyter notebook, which requires `share=True`. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://0aebd3fd33b53a1c09.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


