In [3]:
import streamlit as st
import fitz  # PyMuPDF instead of pypdf
import io
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
import torch
import re
from datetime import datetime, timedelta
import json
import hashlib
from typing import List, Dict, Tuple
import time

# Configure Streamlit page
st.set_page_config(
    page_title="StudyMate - AI PDF Q&A Assistant",
    page_icon="📚",
    layout="wide",
    initial_sidebar_state="expanded"
)

class StudyMate:
    def __init__(self):
        self.setup_model()
        self.initialize_session_state()

    @st.cache_resource
    def setup_model(_self):
        """Initialize the Granite model with caching for performance"""
        try:
            with st.spinner("Loading AI model... This may take a moment on first run."):
                tokenizer = AutoTokenizer.from_pretrained("ibm-granite/granite-3.3-2b-instruct")
                model = AutoModelForCausalLM.from_pretrained(
                    "ibm-granite/granite-3.3-2b-instruct",
                    torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
                    device_map="auto" if torch.cuda.is_available() else None
                )

                # Also create pipeline for easier use
                pipe = pipeline(
                    "text-generation",
                    model=model,
                    tokenizer=tokenizer,
                    max_new_tokens=512,
                    do_sample=True,
                    temperature=0.7,
                    pad_token_id=tokenizer.eos_token_id
                )
                return tokenizer, model, pipe
        except Exception as e:
            st.error(f"Error loading model: {str(e)}")
            return None, None, None

    def initialize_session_state(self):
        """Initialize session state variables"""
        if 'pdf_content' not in st.session_state:
            st.session_state.pdf_content = ""
        if 'pdf_sections' not in st.session_state:
            st.session_state.pdf_sections = []
        if 'flashcards' not in st.session_state:
            st.session_state.flashcards = []
        if 'study_plan' not in st.session_state:
            st.session_state.study_plan = {}
        if 'challenges' not in st.session_state:
            st.session_state.challenges = []
        if 'qa_history' not in st.session_state:
            st.session_state.qa_history = []

    def extract_pdf_content(self, pdf_file) -> Tuple[str, List[Dict]]:
        """Extract text content from PDF using PyMuPDF"""
        try:
            # Read PDF bytes
            pdf_bytes = pdf_file.read()
            pdf_document = fitz.open(stream=pdf_bytes, filetype="pdf")

            full_text = ""
            sections = []

            for page_num in range(pdf_document.page_count):
                page = pdf_document[page_num]
                text = page.get_text()
                full_text += f"\n--- Page {page_num + 1} ---\n{text}"

                # Try to identify sections based on headings (simple heuristic)
                lines = text.split('\n')
                for i, line in enumerate(lines):
                    line = line.strip()
                    if (len(line) > 10 and len(line) < 100 and
                        (line.isupper() or re.match(r'^(\d+\.|\d+\.\d+)', line) or
                         any(keyword in line.lower() for keyword in ['chapter', 'section', 'introduction', 'conclusion']))):

                        # Get some context after the heading
                        context_lines = lines[i+1:i+5]
                        context = ' '.join([l.strip() for l in context_lines if l.strip()])

                        sections.append({
                            'title': line,
                            'page': page_num + 1,
                            'preview': context[:200] + "..." if len(context) > 200 else context
                        })

            pdf_document.close()
            return full_text, sections

        except Exception as e:
            st.error(f"Error extracting PDF content: {str(e)}")
            return "", []

    def generate_ai_response(self, prompt: str, context: str = "", max_tokens: int = 400) -> Tuple[str, float]:
        """Generate response using Granite model with confidence estimation"""
        tokenizer, model, pipe = self.setup_model()

        if not pipe:
            return "Error: AI model not available", 0.0

        try:
            # Construct the full prompt
            full_prompt = f"""Context: {context[:2000]}

User Question: {prompt}

Please provide a helpful and accurate answer based on the context provided. Be concise but informative."""

            messages = [{"role": "user", "content": full_prompt}]

            # Generate response
            response = pipe(messages, max_new_tokens=max_tokens, do_sample=True, temperature=0.3)
            answer = response[0]['generated_text'][-1]['content'] if isinstance(response[0]['generated_text'], list) else response[0]['generated_text'].split("User Question:")[-1].split("Please provide")[-1]

            # Simple confidence estimation based on response characteristics
            confidence = self.estimate_confidence(answer, context, prompt)

            return answer.strip(), confidence

        except Exception as e:
            return f"Error generating response: {str(e)}", 0.0

    def estimate_confidence(self, answer: str, context: str, question: str) -> float:
        """Estimate confidence in the AI response"""
        confidence_score = 0.5  # Base confidence

        # Increase confidence if answer contains specific details
        if any(keyword in answer.lower() for keyword in ['specifically', 'according to', 'as stated', 'the document mentions']):
            confidence_score += 0.2

        # Increase confidence if answer length suggests thoroughness
        if 50 < len(answer) < 300:
            confidence_score += 0.1

        # Decrease confidence for very short or very long answers
        if len(answer) < 20 or len(answer) > 500:
            confidence_score -= 0.1

        # Check if key terms from question appear in context
        question_words = set(question.lower().split())
        context_words = set(context.lower().split())
        overlap = len(question_words.intersection(context_words)) / len(question_words) if question_words else 0
        confidence_score += overlap * 0.2

        # Ensure confidence is between 0 and 1
        return max(0.0, min(1.0, confidence_score))

    def create_time_based_study_plan(self, days: int, content: str) -> Dict:
        """Create a personalized study schedule"""
        sections = st.session_state.pdf_sections

        if not sections:
            return {"error": "No sections identified in the PDF"}

        # Simple algorithm to distribute sections across available days
        sections_per_day = max(1, len(sections) // max(1, days))

        study_plan = {}
        current_date = datetime.now()

        for day in range(days):
            date_str = (current_date + timedelta(days=day)).strftime("%Y-%m-%d")
            start_idx = day * sections_per_day
            end_idx = min(start_idx + sections_per_day, len(sections))

            day_sections = sections[start_idx:end_idx] if start_idx < len(sections) else []

            study_plan[date_str] = {
                'day': day + 1,
                'sections': day_sections,
                'focus': 'Review and understand' if day < days - 2 else 'Final review and practice'
            }

        return study_plan

    def generate_flashcards(self, content: str) -> List[Dict]:
        """Generate flashcards from PDF content"""
        prompt = f"""Based on the following content, identify important terms, concepts, and definitions that would be good for flashcards. Format each as "Term: Definition" pairs.

Content: {content[:3000]}

Please provide 5-10 flashcard pairs in this format:
Term: [term]
Definition: [definition]
"""

        response, _ = self.generate_ai_response(prompt, content, max_tokens=500)

        flashcards = []
        lines = response.split('\n')
        current_term = ""
        current_definition = ""

        for line in lines:
            line = line.strip()
            if line.startswith("Term:"):
                if current_term and current_definition:
                    flashcards.append({"term": current_term, "definition": current_definition})
                current_term = line.replace("Term:", "").strip()
                current_definition = ""
            elif line.startswith("Definition:"):
                current_definition = line.replace("Definition:", "").strip()
            elif current_term and not current_definition:
                current_definition = line

        # Add the last flashcard if exists
        if current_term and current_definition:
            flashcards.append({"term": current_term, "definition": current_definition})

        return flashcards

    def generate_learning_challenges(self, content: str) -> List[Dict]:
        """Generate gamified learning challenges"""
        prompt = f"""Based on this academic content, create 5 engaging daily/weekly study challenges that are fun and educational. Each challenge should test different aspects of learning.

Content: {content[:2000]}

Format each challenge as:
Challenge: [challenge description]
Type: [daily/weekly]
Difficulty: [easy/medium/hard]
"""

        response, _ = self.generate_ai_response(prompt, content, max_tokens=400)

        challenges = []
        lines = response.split('\n')
        current_challenge = {}

        for line in lines:
            line = line.strip()
            if line.startswith("Challenge:"):
                if current_challenge:
                    challenges.append(current_challenge)
                current_challenge = {"description": line.replace("Challenge:", "").strip()}
            elif line.startswith("Type:"):
                current_challenge["type"] = line.replace("Type:", "").strip()
            elif line.startswith("Difficulty:"):
                current_challenge["difficulty"] = line.replace("Difficulty:", "").strip()

        if current_challenge:
            challenges.append(current_challenge)

        return challenges

    def summarize_sections(self, sections: List[Dict], content: str) -> List[Dict]:
        """Generate summaries for each section"""
        summaries = []

        for section in sections[:10]:  # Limit to first 10 sections
            prompt = f"""Summarize the following section in 3-5 bullet points. Be concise and focus on key concepts.

Section: {section['title']}
Content: {section.get('preview', '')}

Provide a clear summary in bullet points."""

            summary, confidence = self.generate_ai_response(prompt, content[:1000], max_tokens=200)

            summaries.append({
                'title': section['title'],
                'page': section['page'],
                'summary': summary,
                'confidence': confidence
            })

        return summaries

def main():
    st.title("📚 StudyMate - AI-Powered PDF Q&A Assistant")
    st.markdown("Upload your study materials and get intelligent answers, study plans, and more!")

    # Initialize StudyMate
    study_mate = StudyMate()

    # Sidebar for file upload and features
    with st.sidebar:
        st.header("📁 Upload Your Study Material")
        uploaded_file = st.file_uploader("Choose a PDF file", type="pdf")

        if uploaded_file is not None:
            with st.spinner("Processing PDF..."):
                content, sections = study_mate.extract_pdf_content(uploaded_file)
                st.session_state.pdf_content = content
                st.session_state.pdf_sections = sections
                st.success(f"PDF processed! Found {len(sections)} sections.")

        st.header("🎯 Features")
        feature_options = [
            "💬 Q&A Chat",
            "📅 Study Planner",
            "🎮 Learning Challenges",
            "📝 Section Summaries",
            "🎴 Flashcard Creator"
        ]
        selected_feature = st.selectbox("Choose a feature:", feature_options)

    # Main content area
    if not st.session_state.pdf_content:
        st.info("👆 Please upload a PDF file to get started!")
        st.markdown("""
        ### Features Available:
        - **💬 Q&A Chat**: Ask questions about your PDF content
        - **📅 Study Planner**: Create time-based study schedules
        - **🎮 Learning Challenges**: Get gamified learning activities
        - **📝 Section Summaries**: Get bullet-point summaries of sections
        - **🎴 Flashcard Creator**: Generate flashcards for key terms
        """)
        return

    # Feature implementations
    if selected_feature == "💬 Q&A Chat":
        st.header("💬 Ask Questions About Your PDF")

        # Display chat history
        for qa in st.session_state.qa_history:
            with st.chat_message("user"):
                st.write(qa["question"])
            with st.chat_message("assistant"):
                st.write(qa["answer"])
                if qa.get("confidence"):
                    confidence_color = "green" if qa["confidence"] > 0.7 else "orange" if qa["confidence"] > 0.4 else "red"
                    st.markdown(f"**Confidence:** :{confidence_color}[{qa['confidence']:.0%}]")

        # Chat input
        user_question = st.chat_input("Ask a question about your PDF content...")

        if user_question:
            with st.chat_message("user"):
                st.write(user_question)

            with st.chat_message("assistant"):
                with st.spinner("Thinking..."):
                    answer, confidence = study_mate.generate_ai_response(
                        user_question,
                        st.session_state.pdf_content
                    )
                    st.write(answer)

                    # Display confidence meter
                    confidence_color = "green" if confidence > 0.7 else "orange" if confidence > 0.4 else "red"
                    st.markdown(f"**AI Confidence:** :{confidence_color}[{confidence:.0%}]")

                    # Add to history
                    st.session_state.qa_history.append({
                        "question": user_question,
                        "answer": answer,
                        "confidence": confidence
                    })

    elif selected_feature == "📅 Study Planner":
        st.header("📅 Time-Based Study Planner")

        col1, col2 = st.columns([1, 1])
        with col1:
            days_until_exam = st.number_input("Days until exam:", min_value=1, max_value=365, value=7)
        with col2:
            if st.button("Create Study Plan", type="primary"):
                with st.spinner("Creating your personalized study plan..."):
                    study_plan = study_mate.create_time_based_study_plan(
                        days_until_exam,
                        st.session_state.pdf_content
                    )
                    st.session_state.study_plan = study_plan

        if st.session_state.study_plan:
            st.subheader("📋 Your Personalized Study Schedule")

            for date, plan in st.session_state.study_plan.items():
                if isinstance(plan, dict) and 'sections' in plan:
                    with st.expander(f"📅 Day {plan['day']} - {date}"):
                        st.markdown(f"**Focus:** {plan['focus']}")
                        st.markdown("**Sections to cover:**")
                        for section in plan['sections']:
                            st.markdown(f"- {section['title']} (Page {section['page']})")
                            if section.get('preview'):
                                st.markdown(f"  *Preview: {section['preview'][:100]}...*")

    elif selected_feature == "🎮 Learning Challenges":
        st.header("🎮 Gamified Learning Challenges")

        if st.button("Generate New Challenges", type="primary"):
            with st.spinner("Creating fun learning challenges..."):
                challenges = study_mate.generate_learning_challenges(st.session_state.pdf_content)
                st.session_state.challenges = challenges

        if st.session_state.challenges:
            st.subheader("🏆 Your Learning Challenges")

            for i, challenge in enumerate(st.session_state.challenges, 1):
                with st.container():
                    st.markdown(f"### Challenge {i}")
                    st.markdown(f"**📋 Task:** {challenge.get('description', 'No description available')}")

                    col1, col2 = st.columns(2)
                    with col1:
                        challenge_type = challenge.get('type', 'daily')
                        if 'daily' in challenge_type.lower():
                            st.markdown("⏰ **Type:** Daily")
                        else:
                            st.markdown("📅 **Type:** Weekly")

                    with col2:
                        difficulty = challenge.get('difficulty', 'medium').lower()
                        if 'easy' in difficulty:
                            st.markdown("🟢 **Difficulty:** Easy")
                        elif 'hard' in difficulty:
                            st.markdown("🔴 **Difficulty:** Hard")
                        else:
                            st.markdown("🟡 **Difficulty:** Medium")

                    st.markdown("---")

    elif selected_feature == "📝 Section Summaries":
        st.header("📝 Section Summaries")

        if st.button("Generate Summaries", type="primary"):
            with st.spinner("Creating section summaries..."):
                summaries = study_mate.summarize_sections(
                    st.session_state.pdf_sections,
                    st.session_state.pdf_content
                )
                st.session_state.section_summaries = summaries

        if hasattr(st.session_state, 'section_summaries'):
            st.subheader("📋 PDF Section Summaries")

            for summary in st.session_state.section_summaries:
                with st.expander(f"📖 {summary['title']} (Page {summary['page']})"):
                    st.markdown(summary['summary'])

                    # Confidence indicator
                    confidence = summary.get('confidence', 0.5)
                    confidence_color = "green" if confidence > 0.7 else "orange" if confidence > 0.4 else "red"
                    st.markdown(f"**Summary Confidence:** :{confidence_color}[{confidence:.0%}]")

    elif selected_feature == "🎴 Flashcard Creator":
        st.header("🎴 Flashcard Creator")

        if st.button("Generate Flashcards", type="primary"):
            with st.spinner("Creating flashcards from your PDF..."):
                flashcards = study_mate.generate_flashcards(st.session_state.pdf_content)
                st.session_state.flashcards = flashcards

        if st.session_state.flashcards:
            st.subheader("🃏 Generated Flashcards")

            # Display flashcards
            for i, card in enumerate(st.session_state.flashcards, 1):
                with st.container():
                    st.markdown(f"### Card {i}")

                    col1, col2 = st.columns([1, 2])
                    with col1:
                        st.markdown("**🏷️ Term:**")
                        st.info(card.get('term', 'No term'))

                    with col2:
                        st.markdown("**📝 Definition:**")
                        st.success(card.get('definition', 'No definition'))

                    st.markdown("---")

            # Export options
            st.subheader("📤 Export Flashcards")
            col1, col2 = st.columns(2)

            with col1:
                # Anki format
                anki_text = ""
                for card in st.session_state.flashcards:
                    anki_text += f"{card.get('term', '')}\t{card.get('definition', '')}\n"

                st.download_button(
                    label="📥 Download for Anki",
                    data=anki_text,
                    file_name="studymate_flashcards.txt",
                    mime="text/plain"
                )

            with col2:
                # JSON format
                json_data = json.dumps(st.session_state.flashcards, indent=2)
                st.download_button(
                    label="📥 Download as JSON",
                    data=json_data,
                    file_name="studymate_flashcards.json",
                    mime="application/json"
                )

if __name__ == "__main__":
    main()


ModuleNotFoundError: No module named 'streamlit'

In [None]:
%pip install streamlit