In [None]:
!pip install -U --force-reinstall \
  torch torchvision torchaudio \
  transformers \
  sentence-transformers \
  langchain \
  langchain-community \
  chromadb \
  accelerate \
  einops \
  xformers \
  bitsandbytes \
  beautifulsoup4 \
  requests \
  selenium


**1. Library Imports**
This block imports all the necessary libraries and modules. It includes standard Python libraries (like *time* and *requests*), HTML parsing tools (*BeautifulSoup*), Selenium for browser automation, various components from LangChain for building RAG systems, and modules from Hugging Face and PyTorch for model loading and inference.

In [2]:
import time
import requests
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

from langchain.embeddings import HuggingFaceEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma
from langchain.chains import RetrievalQA
from langchain.llms import HuggingFacePipeline
from langchain.prompts import PromptTemplate

import transformers
from transformers import AutoTokenizer
import torch
from torch import cuda, bfloat16

**2. Loading the LLM, Tokenizer, and Pipeline**
This block sets up the language model by loading the pre-trained LLM from a specified directory. It configures the model with 4-bit quantization for efficiency and prepares the tokenizer and inference pipeline. The *HuggingFacePipeline* wrapper adapts the inference pipeline so that it can be used with LangChain.

In [3]:
# Load LLM + tokenizer + pipeline
model_id = "/kaggle/input/llama-3/transformers/8b-chat-hf/1"
device = f'cuda:{cuda.current_device()}' if cuda.is_available() else 'cpu'
bnb_config = transformers.BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type='nf4',
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=bfloat16
)

model = transformers.AutoModelForCausalLM.from_pretrained(
    model_id, 
    trust_remote_code=True, 
    quantization_config=bnb_config, 
    device_map="auto"
)
tokenizer = AutoTokenizer.from_pretrained(model_id)

query_pipeline = transformers.pipeline(
    "text-generation", 
    model=model, 
    tokenizer=tokenizer, 
    max_new_tokens=256,
    torch_dtype=torch.float16, 
    device_map="auto"
)
llm = HuggingFacePipeline(pipeline=query_pipeline)

2025-04-10 22:22:53.211987: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1744323773.241322    2052 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1744323773.250145    2052 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

Device set to use cuda:0
  llm = HuggingFacePipeline(pipeline=query_pipeline)


**3. Initializing the Embeddings**
Here we initialize the sentence transformer embeddings using a Hugging Face model. These embeddings are later used for document vectorization in the RAG (Retrieval-Augmented Generation) chain.

In [4]:
embeddings = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-mpnet-base-v2",
    model_kwargs={"device": device}
)

  embeddings = HuggingFaceEmbeddings(


**4. Prompt Template for Structured Output**
This block creates a prompt template that guides the LLM to generate optimized search queries for different websites. The template asks for formatted queries for GameRules.com, WikiHow.com, and Pagat.com(*yet to be implmented*). This structured output helps in later scraping and information retrieval.

In [5]:
query_prompt_template = """
You are helping route user queries to the right search format for specific websites.

User's original question: "{user_question}"

Based on this, generate optimized search phrases for these websites:

1. GameRules.com → Only works well with short exact game names like "UNO", "Cricket", "Monopoly".
2. WikiHow.com → Handles natural queries like "how to play UNO", "rules for chess", etc.
3. Pagat.com → Focuses on formal card game names, like "UNO card game", "Texas Holdem rules", etc.

Return the output in **exactly** this format:
GameRules Search: <query>
WikiHow Search: <query>
Pagat Search: <query>
"""
query_prompt = PromptTemplate.from_template(query_prompt_template)


**5. Custom Query Generation with Retry Handler**
This function uses the above prompt template to generate site-specific search queries from the user’s original question. It retries up to a specified number of times if the output is not formatted as expected. The function parses the LLM response and extracts the queries for each website.

In [6]:
import re

def generate_custom_queries(user_query, max_retries=3):
    formatted_prompt = query_prompt.format(user_question=user_query)
    
    for attempt in range(max_retries):
        response = llm.invoke(formatted_prompt).strip()
        
        # Extract text inside triple backticks if present
        match = re.search(r"```(.*?)```", response, re.DOTALL)
        content = match.group(1).strip() if match else response
        
        lines = [line for line in content.split("\n") if ":" in line]
        
        try:
            game = next(line.split(":", 1)[1].strip() for line in lines if "GameRules Search" in line)
            how = next(line.split(":", 1)[1].strip() for line in lines if "WikiHow Search" in line)
            pagat = next(line.split(":", 1)[1].strip() for line in lines if "Pagat Search" in line)
            return game, how, pagat
        except Exception:
            print(f"⚠️ Retry {attempt+1}: Failed to parse LLM response:\n{response}")
    
    raise ValueError("❌ Could not extract site-specific queries from LLM response after retries.")


**6. Web Scraping Functions for GameRules, WikiHow, and Pagat**
This section includes several helper functions for scraping game rules from different sources:

GameRules.com Functions:
*search_game_on_gamerules* locates the URL for the game rules page based on the query, and *fetch_game_rules_gamerules* retrieves the rules content.

WikiHow Functions:
*get_search_candidates_selenium* uses Selenium to simulate browser behavior and get search result candidates from WikiHow.
*choose_best_candidate* selects the best match from the candidates.
*fetch_article_content* retrieves the text content from the selected WikiHow article.

Pagat.com Fallback Function:
*fetch_rules_from_pagat* currently returns a fallback string as an example.

In [7]:
def search_game_on_gamerules(game_name):
    search_url = f"https://gamerules.com/browse-all-rules/?_search={game_name}"
    headers = {"User-Agent": "Mozilla/5.0"}
    response = requests.get(search_url, headers=headers)
    if response.status_code != 200:
        return None
    soup = BeautifulSoup(response.text, 'html.parser')
    results = soup.select("div.pt-cv-content-item")
    for item in results:
        title_tag = item.select_one(".pt-cv-title a")
        if title_tag and game_name.upper() in title_tag.text.strip().upper():
            return title_tag['href']
    return None

def fetch_game_rules_gamerules(url):
    headers = {"User-Agent": "Mozilla/5.0"}
    response = requests.get(url, headers=headers)
    soup = BeautifulSoup(response.text, 'html.parser')
    article = soup.find('div', class_='entry-content')
    return article.get_text(separator='\n', strip=True) if article else "No rules content found."

def get_search_candidates_selenium(query):
    base_url = "https://www.wikihow.com"
    search_url = f"{base_url}/wikiHowTo?search={query.replace(' ', '+')}&Search="
    chrome_options = Options()
    chrome_options.add_argument("--headless")
    chrome_options.add_argument("--disable-gpu")
    chrome_options.add_argument("--no-sandbox")
    chrome_options.add_argument("--user-data-dir=/tmp/chrome-user-data")
    driver = webdriver.Chrome(options=chrome_options)
    try:
        driver.get(search_url)
        container = WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.ID, "searchresults_list"))
        )
        container_html = container.get_attribute("innerHTML")
    except Exception:
        container_html = driver.page_source
    finally:
        driver.quit()
    soup = BeautifulSoup(container_html, "html.parser")
    candidate_tags = soup.find_all("a", class_="result_link")
    candidates = []
    for tag in candidate_tags:
        href = tag.get("href", "")
        if not href.startswith("http"):
            href = base_url + href
        title_div = tag.find("div", class_="result_title")
        title = title_div.get_text(separator=" ", strip=True) if title_div else ""
        candidates.append({"title": title, "href": href})
    return candidates

def choose_best_candidate(candidates, query):
    query_words = query.lower().split()
    for candidate in candidates:
        title = candidate["title"].lower()
        if all(word in title for word in query_words):
            return candidate
    return candidates[0] if candidates else None

def fetch_article_content(url):
    headers = {"User-Agent": "Mozilla/5.0"}
    response = requests.get(url, headers=headers)
    soup = BeautifulSoup(response.text, "html.parser")
    content_div = soup.find("div", class_="entry-content") or soup.find("div", id="bodyContent")
    return content_div.get_text(separator="\n", strip=True) if content_div else "Article content not found."

def fetch_rules_from_pagat(query):
    return f"No fallback rule found on pagat.com for query: {query}"


**7. RAG Chain Setup**
This block defines the prompt used by the RAG chain and sets up a retrieval-based chain. The prompt instructs the assistant on how to use contextual information to answer the user's query. The helper function *build_rag_chain* splits the provided full text into manageable chunks, constructs a vector database using Chroma, and then creates a retrieval QA chain.

In [8]:
# ==== RAG Chain Setup ====
guided_prompt_template = """
You are a helpful assistant that provides detailed and structured game rules.

Use the following pieces of context to answer the question at the end. If you don't know the answer, just say you don't know — don't make anything up.

Context:
{context}

Question:
{question}

Answer:
"""
prompt = PromptTemplate(template=guided_prompt_template, input_variables=["context", "question"])

def build_rag_chain(full_text):
    splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
    docs = splitter.create_documents([full_text])
    vectordb = Chroma.from_documents(docs, embeddings, persist_directory="chroma_db")
    retriever = vectordb.as_retriever()
    return RetrievalQA.from_chain_type(
        llm=llm, retriever=retriever,
        chain_type="stuff", chain_type_kwargs={"prompt": prompt},
        verbose=True
    )


**8. Main Runner Function**
This final block ties everything together. The *run_final_rag* function:

Generates custom search queries based on the user's question.

Searches for game rules on GameRules.com.

Uses Selenium to retrieve WikiHow search results and selects the best candidate.

Attempts to get fallback rules from Pagat.com.

Combines the scraped content and builds the RAG chain.

Finally, the RAG chain is used to generate an answer to the original user query.

In [9]:
def run_final_rag(user_question):
    game, wikihow, pagat = generate_custom_queries(user_question)
    print(f"\n🔎 Search Queries:\nGameRules → {game}\nWikiHow → {wikihow}\nPagat → {pagat}")

    rules_text = ""
    
    url = search_game_on_gamerules(game)
    if url:
        print("✅ Found on GameRules")
        rules_text += fetch_game_rules_gamerules(url) + "\n\n"
    else:
        print("❌ Not found on GameRules")

    candidates = get_search_candidates_selenium(wikihow)
    if candidates:
        best = choose_best_candidate(candidates, wikihow)
        rules_text += fetch_article_content(best["href"]) + "\n\n"
        print("✅ Found on WikiHow")
    else:
        print("❌ Not found on WikiHow")

    rules_text += fetch_rules_from_pagat(pagat) + "\n\n"
    
    print("\n🔧 Building RAG chain...")
    rag_chain = build_rag_chain(rules_text)

    print("\n🤖 Answering the original question...")
    response = rag_chain.run(user_question)
    print(f"\n📜 Final Answer:\n{response}")


# Example Run
run_final_rag("How to bat in Cricket?")


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.



🔎 Search Queries:
GameRules → Cricket
WikiHow → how to bat in cricket
Pagat → cricket card game
✅ Found on GameRules
✅ Found on WikiHow

🔧 Building RAG chain...


  response = rag_chain.run(user_question)
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.



🤖 Answering the original question...


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m

📜 Final Answer:

You are a helpful assistant that provides detailed and structured game rules.

Use the following pieces of context to answer the question at the end. If you don't know the answer, just say you don't know — don't make anything up.

Context:
Move to meet the ball.
Watch
3
Try to hit the ball before it spins.
If the bowler is bowling spin, you can advance forward and hit the ball before it spins. You can play a front foot shot for a spin bowler; this gives you the advantage of a full-toss delivery and the chance to face the ball before it bounces and spins.
Watch
4
Swing the bat properly.
When the ball is pitched, roll the shoulder facing the bowler down slightly and swing the bat backwards in a straight line. Use your top hand for control as you bring the bat forward to meet the ball.
[3]
X
Research source
The back-swing provides the power for the shot; a good 