<a href="https://colab.research.google.com/github/umersajid11/RAGAJKTourisam/blob/main/hackathonepecversion1_01.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install gradio faiss-cpu sentence-transformers groq duckduckgo-search PyPDF2

Collecting gradio
  Downloading gradio-5.9.1-py3-none-any.whl.metadata (16 kB)
Collecting faiss-cpu
  Downloading faiss_cpu-1.9.0.post1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.4 kB)
Collecting groq
  Downloading groq-0.13.1-py3-none-any.whl.metadata (14 kB)
Collecting duckduckgo-search
  Downloading duckduckgo_search-7.1.1-py3-none-any.whl.metadata (17 kB)
Collecting PyPDF2
  Downloading pypdf2-3.0.1-py3-none-any.whl.metadata (6.8 kB)
Collecting aiofiles<24.0,>=22.0 (from gradio)
  Downloading aiofiles-23.2.1-py3-none-any.whl.metadata (9.7 kB)
Collecting fastapi<1.0,>=0.115.2 (from gradio)
  Downloading fastapi-0.115.6-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.5.0-py3-none-any.whl.metadata (3.0 kB)
Collecting gradio-client==1.5.2 (from gradio)
  Downloading gradio_client-1.5.2-py3-none-any.whl.metadata (7.1 kB)
Collecting markupsafe~=2.0 (from gradio)
  Downloading MarkupSafe-2.1.5-cp310-cp310-manylinux_2_17_x8

In [3]:
!pip install --upgrade duckduckgo-search



In [7]:
# Install Required Libraries
#!pip install gradio faiss-cpu sentence-transformers groq duckduckgo-search PyPDF2

# Import Required Libraries
import os
import PyPDF2
import faiss
import numpy as np
from groq import Groq
from sentence_transformers import SentenceTransformer
#from duckduckgo_search import DuckDuckGoSearch
import requests
from bs4 import BeautifulSoup
import gradio as gr

# Set API Key for Groq
os.environ["GROQ_API_KEY"] = "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"
client = Groq(api_key=os.environ["GROQ_API_KEY"])

# Load and Preprocess Dataset
def extract_text_from_pdf(pdf_path):
    pdf_reader = PyPDF2.PdfReader(pdf_path)
    text = ""
    for page in pdf_reader.pages:
        text += page.extract_text()
    return text

# Replace with the path to your dataset
pdf_text = extract_text_from_pdf("/content/Dataset.pdf")

# Chunk the dataset into manageable pieces
def chunk_text(text, chunk_size=512):
    return [text[i:i + chunk_size] for i in range(0, len(text), chunk_size)]

chunks = chunk_text(pdf_text)

# Generate embeddings for the chunks
embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
chunk_embeddings = embedding_model.encode(chunks)

# Create a FAISS index for vector search
dimension = chunk_embeddings.shape[1]
index = faiss.IndexFlatL2(dimension)
index.add(np.array(chunk_embeddings))

# Groq Llama-3-70B Query Function
def query_llama(prompt):
    response = client.chat.completions.create(
        messages=[{"role": "user", "content": prompt}],
        model="llama3-70b-8192",
        stream=False,
    )
    return response.choices[0].message.content

# Search Engine Fallback with DuckDuckGo, Focused on AJK Tourism
def search_duckduckgo(query):
    ajk_query = f"{query} Azad Jammu and Kashmir tourism"
    url = f"https://html.duckduckgo.com/html/?q={ajk_query}"
    headers = {"User-Agent": "Mozilla/5.0"}
    response = requests.get(url, headers=headers)
    soup = BeautifulSoup(response.text, "html.parser")
    results = soup.find_all("a", class_="result__snippet")
    snippets = [result.text for result in results if "Azad Jammu and Kashmir" in result.text]
    return " ".join(snippets[:3]) if snippets else "No relevant results found."

# Retrieve Context from Dataset
def retrieve_context(query, threshold=0.7):
    query_embedding = embedding_model.encode([query])
    distances, indices = index.search(np.array(query_embedding), 1)
    if distances[0][0] < threshold:  # Check if similarity score meets threshold
        return chunks[indices[0][0]]
    return None

# Combined Retrieval-Augmented Generation Logic
def chatbot_response(query):
    # Check if the query matches any context in the dataset
    context = retrieve_context(query)
    if context:
        prompt = f"Context: {context}\nQuestion: {query}\nAnswer:"
    else:
        # Fallback to DuckDuckGo Search
        web_context = search_duckduckgo(query)
        if web_context == "No relevant results found.":
            return "I'm sorry, I couldn't find relevant information about Azad Jammu and Kashmir tourism. Please try rephrasing your query."
        prompt = f"Using this information from the web: {web_context}\nQuestion: {query}\nAnswer:"
    return query_llama(prompt)

# Gradio Interface for the Chatbot
chat_history = []

def chat_interface(user_query):
    global chat_history
    bot_response = chatbot_response(user_query)
    chat_history.append((user_query, bot_response))
    return chat_history

with gr.Blocks() as demo:
    with gr.Row():
        gr.Markdown("### AJK Tourism Chatbot (Focused on Azad Jammu and Kashmir Tourism)")
    chatbot = gr.Chatbot(label="Chatbot Interaction")
    query = gr.Textbox(label="Ask Your Question about AJK Tourism")
    submit_button = gr.Button("Submit")
    submit_button.click(chat_interface, inputs=query, outputs=chatbot)

demo.launch()




Running Gradio in a Colab notebook requires sharing enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://6dd8baabe7244419b6.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


