In [1]:
GROQ_API_KEY="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"

## 1. Smart Highlights

In [2]:
from groq import Groq

client = Groq(api_key=GROQ_API_KEY)
def get_groq_mistral_response(prompt: str) -> str:
    try:
        response = client.chat.completions.create(
            model="mistral-saba-24b",
            messages=[
                {
                    "role": "user",
                    "content": prompt
                }
            ],
            temperature=1,
            max_completion_tokens=1024,
            top_p=1,
            stream=False,
            stop=None,    
        )
    except Exception as e:
        print(f"Groq Error: {e}")
        return "⚠️ Error from Groq/Mistral API."

    return response.choices[0].message.content

In [None]:
def smart_highlight_prompt(text: str) -> str:
    return f"""
    You are an advanced document annotator with expertise in analyzing and classifying highlighted text. Your task is to provide a **precise and insightful annotation** for the given highlight.

    1. **Classify the Highlight Type**:  
       Choose from the following categories:
       - **Concept** (A single idea, definition, or principle)  
       - **Process** (Step-by-step explanation or structured reasoning)  
       - **Fact** (A verified statement, research finding, or statistic)  
       - **Task** (A to-do item or action-based instruction)  
       - **Formula** (A structured or mathematical representation)  

    2. **Classify the Sentence Type** (Choose the best match):  
       - **Definition**  
       - **Explanation**  
       - **Quote**  
       - **Important Fact**  
       - **Recommendation**  
       - **Author's Opinion**  
       - **Analogy**  
       - **Other (Specify if needed)**  

    3. **Generate a Short Note**:  
       - Clearly **explain the significance** of the highlight in 1-2 sentences.  
       - Avoid generic phrases like "This sentence describes..."  
       - Use **engaging and direct** phrasing: "It highlights...", "It defines...", "It recommends...".  

    **Highlighted Sentence:**  
    \"\"\"{text}\"\"\"

    **Respond ONLY in this format (no extra text):**  
    - **Highlight Type**: [One of Concept, Process, Fact, Task, Formula]  
    - **Sentence Type**: [One of Definition, Explanation, Quote, etc.]  
    - **Short Note**: [A brief, professional annotation]

In [5]:
# Concept
highlight1 = "Reinforcement learning uses reward feedback to teach agents how to act."
prompt = smart_highlight_prompt(highlight1)
print(get_groq_mistral_response(prompt))

- **Highlight Type**: Concept
- **Sentence Type**: Definition
- **Short Note**: It defines reinforcement learning as a method that uses reward feedback to train agents on performing tasks.


In [6]:
# For paragraph
highlight2 = "Float16 can only represent numbers up to 65504, whilst bfloat16 can represent huge numbers up to 10^38! But notice both number formats use only 16bits! This is because float16 allocates more bits so it can represent smaller decimals better, whilst bfloat16 cannot represent fractions well. But why float16? Let's just use float32! But unfortunately float32 in GPUs is very slow for matrix multiplications - sometimes 4 to 10x slower! So we cannot do this."

prompt = smart_highlight_prompt(highlight2)
print(get_groq_mistral_response(prompt))

- **Highlight Type**: Concept
- **Sentence Type**: Explanation
- **Short Note**: It highlights the trade-offs and efficiency considerations between float16 and bfloat16 compared to float32 for representing numerical values, especially in the context of GPU performance during matrix multiplications.


## 2. Semantic summary prompt

In [None]:
def semantic_summary_prompt(highlights: list) -> str:
  return f"""
    You are an AI-powered summarization expert. Your task is to analyze the provided highlights and generate a **concise study memory** in a well-structured, high-impact paragraph.

    ### **Instructions:**
    1. **Capture the key insights** from the highlights without repeating information.  
    2. **Group related ideas** for a logical flow (e.g., learning techniques, AI advancements, safety concerns).  
    3. **Make it clear, precise, and engaging**, avoiding unnecessary filler words.  
    4. **Ensure natural readability** – the summary should feel polished and professional.

    ### **Highlights to Summarize:**  
    {highlights}

    ### **Respond in the following format:**  
    - **Compressed Study Memory:**  
      - [A single, structured paragraph summarizing the highlights effectively]
    """

In [None]:
highlight1 = "Backpropagation is a supervised learning algorithm used to train neural networks by adjusting weights based on error feedback."
highlight2 = "The activation function in a neural network determines whether a neuron should be activated based on weighted inputs."
highlight3 = "Reinforcement learning allows an agent to learn by receiving rewards or penalties, optimizing future decisions based on past actions." 
highlight4 = "Gradient descent is an optimization algorithm that adjusts model parameters by computing the gradient of the loss function."
highlight5 = "As Geoffrey Hinton once said, 'Deep learning is the first technology in history that can solve perceptual problems better than humans.'"
highlight6 = "Elon Musk argues that AI safety is humanity’s biggest challenge, requiring careful regulation to prevent catastrophic outcomes." 
highlight7 = "Neural networks with more than three hidden layers are commonly referred to as deep neural networks."
highlight8 = "In 2023, OpenAI's ChatGPT reached 100 million users within just two months, making it the fastest-growing app in history."
highlight9 = "To improve model generalization, always use dropout regularization to prevent overfitting in deep learning models."
highlight10 = "For high-performance AI models, use a combination of transfer learning and fine-tuning on domain-specific datasets."

highlights = [highlight1, highlight2, highlight3, highlight4, highlight5, highlight6, highlight7, highlight8, highlight9, highlight10]
prompt = semantic_summary_prompt(highlights)
print(get_groq_mistral_response(prompt))

- **Compressed Study Memory:**
  Machine learning encompasses various techniques including backpropagation, which trains neural networks through error feedback and weight adjustments, and reinforcement learning, where models learn by receiving rewards or penalties. Activation functions determine neuron activation in neural networks, while gradient descent optimizes model parameters. Deep learning, surpassing human capabilities in perceptual tasks, involves deep neural networks with multiple hidden layers. Ensuring model generalizability requires dropout regularization to prevent overfitting, and combining transfer learning with fine-tuning on domain-specific datasets enhances performance. Notable milestones include OpenAI's ChatGPT hitting 100 million users in two months, highlighting AI's rapid growth. Safety remains a pressing concern; Elon Musk emphasizes stringent regulation to mitigate potential catastrophic risks.


## 3. Cross-Referencing(Multi-doc magic)

In [1]:
from sentence_transformers import SentenceTransformer
import chromadb
from chromadb.config import Settings
from chromadb.utils import embedding_functions
import uuid

# ✅ Initialize embedding model
embedding_model = SentenceTransformer("all-MiniLM-L6-v2")

# ✅ Initialize ChromaDB with persistent storage
chroma_client = chromadb.PersistentClient(path="data/chroma_db")

# ✅ Get or create the 'notes' collection
collection = chroma_client.get_or_create_collection(name="notes")

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# ✅ Add new note function
def add_note(text, metadata=None):
    note_id = str(uuid.uuid4())  # Unique ID
    embedding = embedding_model.encode(text)
    collection.add(
        documents=[text],
        embeddings=[embedding],
        ids=[note_id],
        metadatas=[metadata or {}]
    )
    return note_id, embedding

In [3]:
# ✅ Similar notes search
def find_similar_notes(query_embedding, current_note_id=None, top_k=3):
    results = collection.query(
        query_embeddings=[query_embedding],
        n_results=top_k + 1  # Fetch extra to allow filtering
    )
    
    # Filter out current note
    filtered_notes = []
    for note_id, doc in zip(results["ids"][0], results["documents"][0]):
        if note_id != current_note_id:
            filtered_notes.append(doc)
        if len(filtered_notes) >= top_k:
            break

    return filtered_notes


# ✅ Example usage
new_note = "GANs generate realistic images."
note_id, embedding = add_note(new_note, metadata={"type": "note", "topic": "AI"})


In [4]:
new_note = "VAEs also generate images, but their latent space is continuous."
note_id, embedding = add_note(new_note, metadata={"type": "note", "topic": "AI"})

In [None]:
add_note("Transformers revolutionized NLP tasks.", metadata={"type": "note", "topic": "AI"}) 
add_note("GANs are useful for data augmentation.", metadata={"type": "note", "topic": "AI"})

In [8]:
similar = find_similar_notes(embedding, current_note_id=note_id)

print("🔍 Similar Notes:")
for i, note in enumerate(similar):
    print(f"{i+1}. {note}")


🔍 Similar Notes:
1. GANs generate realistic images.
2. GANs are useful for data augmentation.
3. Transformers revolutionized NLP tasks.
