<a href="https://colab.research.google.com/github/zabi076/Amazon-Clone/blob/main/Copy_of_TutorAI_633_.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install langchain-chroma
!pip install langchain-community
!pip install PyPDF2
!pip install cohere
!pip install langchain-community --upgrade
!pip install langchain
!pip install langchain-google-genai
!pip install --upgrade google-generativeai
!pip install -q google-generativeai





Collecting langchain-chroma
  Downloading langchain_chroma-0.1.4-py3-none-any.whl.metadata (1.6 kB)
Collecting chromadb!=0.5.4,!=0.5.5,<0.6.0,>=0.4.0 (from langchain-chroma)
  Downloading chromadb-0.5.20-py3-none-any.whl.metadata (6.8 kB)
Collecting fastapi<1,>=0.95.2 (from langchain-chroma)
  Downloading fastapi-0.115.5-py3-none-any.whl.metadata (27 kB)
Collecting build>=1.0.3 (from chromadb!=0.5.4,!=0.5.5,<0.6.0,>=0.4.0->langchain-chroma)
  Downloading build-1.2.2.post1-py3-none-any.whl.metadata (6.5 kB)
Collecting chroma-hnswlib==0.7.6 (from chromadb!=0.5.4,!=0.5.5,<0.6.0,>=0.4.0->langchain-chroma)
  Downloading chroma_hnswlib-0.7.6-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (252 bytes)
Collecting uvicorn>=0.18.3 (from uvicorn[standard]>=0.18.3->chromadb!=0.5.4,!=0.5.5,<0.6.0,>=0.4.0->langchain-chroma)
  Downloading uvicorn-0.32.0-py3-none-any.whl.metadata (6.6 kB)
Collecting posthog>=2.4.0 (from chromadb!=0.5.4,!=0.5.5,<0.6.0,>=0.4.0->langchain-chroma)
  Do

In [None]:
# Cell 1: Setup and Imports

import sqlite3
import requests
from bs4 import BeautifulSoup
from langchain_chroma import Chroma
from langchain.embeddings import CohereEmbeddings
from uuid import uuid4
import PyPDF2
import io
from google.colab import userdata
import google.generativeai as genai
from langchain_google_genai import GoogleGenerativeAI

# List of past paper websites
websites = [
    "https://papacambridge.com",
    "https://pastpapers.co",
    "https://www.cambridgeinternational.org",
    "https://pastpapers.papacambridge.com",
    "https://www.cienotes.com",
    "https://bestexamhelp.com",
    "https://papers.gceguide.cc",
]

# Initialize SQLite database
db_path = "past_papers.db"

def initialize_papers_db():
    conn = sqlite3.connect(db_path)
    cursor = conn.cursor()
    cursor.execute('''
        CREATE TABLE IF NOT EXISTS past_papers (
            id INTEGER PRIMARY KEY AUTOINCREMENT,
            paper_name TEXT,
            content TEXT
        )
    ''')
    conn.commit()
    conn.close()

# Scrape past papers and store content in SQLite database
def scrape_and_store_papers():
    for site in websites:
        response = requests.get(site)
        soup = BeautifulSoup(response.content, "html.parser")

        # Assuming papers are in PDF links
        pdf_links = soup.find_all("a", href=lambda href: href and ".pdf" in href)

        for link in pdf_links:
            pdf_url = link.get("href")
            if not pdf_url.startswith("http"):
                pdf_url = site + pdf_url  # Convert to absolute URL if needed

            # Fetch PDF content
            pdf_content = fetch_pdf_content(pdf_url)
            if pdf_content:
                paper_name = pdf_url.split("/")[-1]
                save_paper_in_db(paper_name, pdf_content)
                print(f"Stored {paper_name}")

def fetch_pdf_content(pdf_url):
    try:
        response = requests.get(pdf_url)
        with io.BytesIO(response.content) as file:
            reader = PyPDF2.PdfReader(file)
            text = ""
            for page in reader.pages:
                text += page.extract_text() + "\n"
            return text
    except Exception as e:
        print(f"Error fetching PDF: {e}")
        return None

def save_paper_in_db(paper_name, content):
    conn = sqlite3.connect(db_path)
    cursor = conn.cursor()
    cursor.execute('''
        INSERT INTO past_papers (paper_name, content)
        VALUES (?, ?)
    ''', (paper_name, content))
    conn.commit()
    conn.close()

# Initialize the database and scrape papers
initialize_papers_db()
scrape_and_store_papers()

# Initialize embedding with Cohere API key
cohere_api_key = userdata.get('cohere_api_key')  # Replace with your actual Cohere API key
embedding_function = CohereEmbeddings(
    model="embed-english-light-v2.0",
    cohere_api_key=cohere_api_key,
    user_agent="my-app"  # Required user-agent
)

# Initialize Chroma for vector storage
vector_db = Chroma(
    collection_name="past_papers_collection",
    embedding_function=embedding_function
)

# Store papers in vector database
def store_papers_in_vector_db():
    conn = sqlite3.connect(db_path)
    cursor = conn.cursor()
    cursor.execute("SELECT paper_name, content FROM past_papers")
    rows = cursor.fetchall()
    conn.close()

    # Add past papers to Chroma vector database
    for paper_name, content in rows:
        vector_db.add_texts([content], metadata={"title": paper_name})

# Store papers in vector database
store_papers_in_vector_db()

# Cell 3: Paper Generation and Interaction with LLM

def generate_exam_paper(user_prompt):
    # Define the prompt for paper generation
    prompt = f"""You are an expert exam paper creator. Generate questions based on the topic "{user_prompt}".
    Include both short answer questions (concise answers, 2-4 lines) and long answer questions (detailed answers, 5+ lines).
    Format each question as follows:

    Short Question: [Question Text]
    Answer Guide: [Expected Short Answer]

    Long Question: [Question Text]
    Answer Guide: [Detailed Answer Expectation]"""

    # Retrieve context based on prompt similarity
    relevant_docs = vector_db.similarity_search(user_prompt, k=5)
    context = " ".join([doc.page_content for doc in relevant_docs])
    prompt_with_context = f"{prompt}\n\nContext: {context}"

    # Initialize the model with Google Generative AI
    GEMINI_API_KEY = userdata.get("GEMINI_API_key")

    genai.configure(api_key=GEMINI_API_KEY)

    model = genai.GenerativeModel("gemini-1.5-flash")

    llm = GoogleGenerativeAI(google_api_key=GEMINI_API_KEY, model="gemini-1.5-flash")

    # Generate text with the model
    response = llm.invoke(
        input=prompt_with_context,  # Pass the prompt as the 'input'
        temperature=0.7
    )

    # Return the generated text
    return response['candidates'][1]['output'] if 'candidates' in response else "No response generated."

# Example usage
user_prompt = input("Enter your exam topic or requirements: ")
generated_paper = generate_exam_paper(user_prompt)
print(generated_paper)

Enter your exam topic or requirements: chemistry periodic table
No response generated.
