<a href="https://colab.research.google.com/github/vlad-pirvu/LLM_testing/blob/main/D%26D_Spells.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### I.Dependencies & Imports

In [1]:
# Install dependencies
!pip install -q requests beautifulsoup4 sentence-transformers faiss-cpu gradio

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m31.3/31.3 MB[0m [31m16.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m3.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.8/13.8 MB[0m [31m24.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.6/24.6 MB[0m [31m27.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m883.7/883.7 kB[0m [31m13.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m664.8/664.8 MB[0m [31m2.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m211.5/211.5 MB[0m [31m5.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m56.3/56.3 MB[0m [31m12.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [2]:
!pip install tqdm



In [3]:
!pip install transformers sentence-transformers



In [4]:
# Import libraries
import requests
from bs4 import BeautifulSoup
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np
import gradio as gr
from tqdm import tqdm
import pandas as pd

### II. Scrape D&D 5e Spells from D&D Wiki

In [5]:
# Step 2: Scrape D&D 5e Spells
BASE_URL = "https://dnd5e.wikidot.com"

In [6]:
def get_spell_links():
    res = requests.get(f"{BASE_URL}/spells")
    soup = BeautifulSoup(res.text, 'html.parser')
    links = [a['href'] for a in soup.select('div#page-content a') if a['href'].startswith('/spell:')]
    return list(set(links))

In [7]:
def scrape_spell(link):
    base_url = "https://dnd5e.wikidot.com"
    if not link.startswith("http"):
        url = base_url + link
    else:
        url = link

    res = requests.get(url)
    soup = BeautifulSoup(res.text, 'html.parser')

    # Try finding the spell name in multiple ways
    title_tag = soup.find('div', {'id': 'page-title'})
    if not title_tag:
        title_tag = soup.find('div', class_='page-title page-header')
    if title_tag:
        # If using class selector, the name is in a <span> inside the div
        if title_tag.name == 'div' and 'page-header' in title_tag.get('class', []):
            name = title_tag.find('span').text.strip()
        else:
            name = title_tag.text.strip()
    else:
        print(f"⚠️ Skipping {url}: no title found.")
        return None

    # Skip Unearthed Arcana spells
    if name.endswith(" UA") or name.endswith(" (UA)"):
        return None

    # Get the spell content
    content_tag = soup.find('div', {'id': 'page-content'})
    if not content_tag:
        print(f"⚠️ Skipping {url}: no page-content found.")
        return None
    content = content_tag.text.strip()

    return {'Url': url, 'Name': name, 'content': content}


In [8]:
spell_links = get_spell_links()
print(f"Found {len(spell_links)} spells. Scraping...")

# Collect spells
spells_data = []
for link in tqdm(spell_links, desc="Scraping spells"):
#    full_url = BASE_URL + link
    spell = scrape_spell(link)
    if spell:
        spells_data.append(spell)

Found 580 spells. Scraping...


Scraping spells: 100%|██████████| 580/580 [01:30<00:00,  6.43it/s]


### III. Format Spells DataFrame

In [9]:
# Convert to DataFrame
spells_df = pd.DataFrame(spells_data)
print(f"✅ Collected {len(spells_df)} spells.")
spells_df.head()

✅ Collected 530 spells.


Unnamed: 0,Url,Name,content
0,https://dnd5e.wikidot.com/spell:prayer-of-healing,Prayer of Healing,Source: Player's Handbook\n2nd-level evocation...
1,https://dnd5e.wikidot.com/spell:thorn-whip,Thorn Whip,Source: Player's Handbook\nTransmutation cantr...
2,https://dnd5e.wikidot.com/spell:arcane-eye,Arcane Eye,Source: Player's Handbook\n4th-level divinatio...
3,https://dnd5e.wikidot.com/spell:summon-lesser-...,Summon Lesser Demons,Source: Xanathar's Guide to Everything\n3rd-le...
4,https://dnd5e.wikidot.com/spell:speak-with-dead,Speak with Dead,Source: Player's Handbook\n3rd-level necromanc...


In [10]:
spells_df.shape

(530, 3)

In [11]:
def split_spell_content(row):
    #print("------")
    #print(row.name)
    #print(row['name'])

    lines = row['content'].split('\n')
    lines = [line.strip() for line in lines if line.strip()]  # Clean up empty lines and whitespace

    # Safe defaults
    source = lines[0].replace("Source", "").replace("source","").lstrip(',.:;-') if len(lines) > 0 else ''
    spell_level_type = lines[1].replace("Level", "").replace("level", "").replace("-","").replace("st","").replace("nd","").replace("rd","").replace("th","").lstrip(',.:;-') if len(lines) > 1 else ''
    casting_time = lines[2].replace("Casting Time", "").replace("Casting time","").replace("casting time","").lstrip(',.:;-') if len(lines) > 2 else ''
    spell_range = lines[3].replace("Range", "").replace("range","").lstrip(',.:;-') if len(lines) > 3 else ''
    components = lines[4].replace("Components", "").replace("components","").lstrip(',.:;-') if len(lines) > 4 else ''
    duration = lines[5].replace("Duration", "").replace("duration","").lstrip(',.:;-') if len(lines) > 5 else ''

    # Split spell_level_type into level and type
    if 'cantrip' in spell_level_type.lower():
        level, school = '0', spell_level_type.replace('cantrip', '').strip()
    elif ' ' in spell_level_type:
        level, school = spell_level_type.split(' ', 1)
    else:
        level, school = '', spell_level_type


    #Extract description
    mashed = row['content'].split('Duration',1)[1]
    mashed = mashed.split('\n',1)[1].strip().lower()

    if 'at higher levels' in mashed:
        description = mashed.split('at higher levels')[0].strip()
        upcastingMashed = mashed.split('at higher levels')[1].lstrip(',.:;-').strip()
        upcasting = upcastingMashed.split('spell lists')[0].strip()
        spellLists = upcastingMashed.split('spell lists')[1].lstrip(',.:;-').strip()
    else:
        description = mashed.split('spell lists')[0].strip()
        mashed = mashed.replace(":",".")
        spellLists = mashed.split('spell lists')[1].lstrip(',.:;-').strip()
        upcasting = ""

    return pd.Series({
        'Source': source,
        'Level': level,
        'School': school,
        'Casting Time': casting_time,
        'Range': spell_range,
        'Components': components,
        'Duration': duration,
        'Description': description,
        'Upcasting': upcasting,
        'Spell Lists': spellLists
    })


In [12]:
# Apply the split function to the spells DataFrame
spell_df_split = spells_df.join(spells_df.apply(split_spell_content, axis=1))

In [13]:
spell_df_split.head()

Unnamed: 0,Url,Name,content,Source,Level,School,Casting Time,Range,Components,Duration,Description,Upcasting,Spell Lists
0,https://dnd5e.wikidot.com/spell:prayer-of-healing,Prayer of Healing,Source: Player's Handbook\n2nd-level evocation...,Player's Handbook,2,evocation,10 minutes,30 feet,V,Instantaneous,up to six creatures of your choice that you ca...,when you cast this spell using a spell slot of...,"cleric, paladin (optional)"
1,https://dnd5e.wikidot.com/spell:thorn-whip,Thorn Whip,Source: Player's Handbook\nTransmutation cantr...,Player's Handbook,0,Transmutation,1 action,30 feet,"V, S, M (the stem of a plant with thorns)",Instantaneous,"you create a long, vine-like whip covered in t...",this spell’s damage increases by 1d6 when you ...,"artificer, druid"
2,https://dnd5e.wikidot.com/spell:arcane-eye,Arcane Eye,Source: Player's Handbook\n4th-level divinatio...,Player's Handbook,4,divination,1 action,30 feet,"V, S, M (a bit of bat fur)","Concentration, up to 1 hour","you create an invisible, magical eye within ra...",,"artificer, wizard"
3,https://dnd5e.wikidot.com/spell:summon-lesser-...,Summon Lesser Demons,Source: Xanathar's Guide to Everything\n3rd-le...,Xanathar's Guide to Everything,3,conjuration,1 action,60 feet,"V, S, M (a vial of blood from a humanoid kill...","Concentration, up to 1 hour","you utter foul words, summoning demons from th...",when you cast this spell using a spell slot of...,"warlock, wizard"
4,https://dnd5e.wikidot.com/spell:speak-with-dead,Speak with Dead,Source: Player's Handbook\n3rd-level necromanc...,Player's Handbook,3,necromancy,1 action,10 feet,"V, S, M (burning incense)",10 minutes,you grant the semblance of life and intelligen...,,"bard, cleric, wizard (optional)"


In [14]:
spell_df_split.describe()

Unnamed: 0,Url,Name,content,Source,Level,School,Casting Time,Range,Components,Duration,Description,Upcasting,Spell Lists
count,530,530,530,530,530,530,530,530,530,530,530,530.0,530
unique,530,530,530,17,10,34,20,37,276,32,530,156.0,161
top,https://dnd5e.wikidot.com/spell:detect-evil-an...,Detect Evil and Good,Source: Player's Handbook\n1st-level divinatio...,Player's Handbook,2,evocation,1 action,60 feet,"V, S",Instantaneous,"for the duration, you know if there is an aber...",,wizard
freq,1,1,1,361,88,90,395,96,161,148,1,329.0,41


In [15]:
non_numeric_levels = spell_df_split[~spell_df_split['Level'].str.isdigit()]['Level'].unique()
print("Non-numeric values in the 'Level' column:", non_numeric_levels)

Non-numeric values in the 'Level' column: []


In [16]:
spell_df_split = spell_df_split.drop('content', axis=1)

In [17]:
spell_df_split = spell_df_split.rename(columns={'name': 'Name', 'Url':"URL"})
print(spell_df_split.columns)

Index(['URL', 'Name', 'Source', 'Level', 'School', 'Casting Time', 'Range',
       'Components', 'Duration', 'Description', 'Upcasting', 'Spell Lists'],
      dtype='object')


In [18]:
desired_column_order = ['Name', 'Level', 'School', 'Casting Time', 'Range', 'Components', 'Duration', 'Description', 'Upcasting', 'Spell Lists', 'Source', 'URL']

In [19]:
finalSpells = spell_df_split[desired_column_order]

In [20]:
finalSpells.head()

Unnamed: 0,Name,Level,School,Casting Time,Range,Components,Duration,Description,Upcasting,Spell Lists,Source,URL
0,Prayer of Healing,2,evocation,10 minutes,30 feet,V,Instantaneous,up to six creatures of your choice that you ca...,when you cast this spell using a spell slot of...,"cleric, paladin (optional)",Player's Handbook,https://dnd5e.wikidot.com/spell:prayer-of-healing
1,Thorn Whip,0,Transmutation,1 action,30 feet,"V, S, M (the stem of a plant with thorns)",Instantaneous,"you create a long, vine-like whip covered in t...",this spell’s damage increases by 1d6 when you ...,"artificer, druid",Player's Handbook,https://dnd5e.wikidot.com/spell:thorn-whip
2,Arcane Eye,4,divination,1 action,30 feet,"V, S, M (a bit of bat fur)","Concentration, up to 1 hour","you create an invisible, magical eye within ra...",,"artificer, wizard",Player's Handbook,https://dnd5e.wikidot.com/spell:arcane-eye
3,Summon Lesser Demons,3,conjuration,1 action,60 feet,"V, S, M (a vial of blood from a humanoid kill...","Concentration, up to 1 hour","you utter foul words, summoning demons from th...",when you cast this spell using a spell slot of...,"warlock, wizard",Xanathar's Guide to Everything,https://dnd5e.wikidot.com/spell:summon-lesser-...
4,Speak with Dead,3,necromancy,1 action,10 feet,"V, S, M (burning incense)",10 minutes,you grant the semblance of life and intelligen...,,"bard, cleric, wizard (optional)",Player's Handbook,https://dnd5e.wikidot.com/spell:speak-with-dead


In [21]:
finalSpells = finalSpells.sort_values(by='Level')

In [22]:
finalSpells.head(50)

Unnamed: 0,Name,Level,School,Casting Time,Range,Components,Duration,Description,Upcasting,Spell Lists,Source,URL
358,Mind Sliver,0,Enchantment,1 action,60 feet,V,1 round,you drive a disorienting spike of psychic ener...,this spell’s damage increases by 1d6 when you ...,"sorcerer (optional), warlock (optional), wizar...",Tasha's Cauldron of Everything,https://dnd5e.wikidot.com/spell:mind-sliver
148,Green-Flame Blade,0,Evocation,1 action,Self (5-foot radius),"S, M (a melee weapon worth at least 1 sp)",Instantaneous,you brandish the weapon used in the spell’s ca...,"at 5th level, the melee attack deals an extra ...","artificer, sorcerer (optional), warlock (optio...",Tasha's Cauldron of Everything/Sword Coast Ad...,https://dnd5e.wikidot.com/spell:green-flame-blade
464,Decompose (HB),0,Necromancy,1 action,Touch,"V, S",1 minute,you reach out and touch the corpse of a creatu...,,cleric,Critical Role (Twitter),https://dnd5e.wikidot.com/spell:decompose
40,Encode Thoughts,0,Enchantment,1 action,Self,S,8 hours,"you pull a memory, an idea, or a message from ...",,wizard,Guildmaster's Guide to Ravnica,https://dnd5e.wikidot.com/spell:encode-thoughts
41,True Strike,0,Divination,1 action,30 feet,S,"Concentration, up to 1 round",you extend your hand and point a finger at a t...,,"bard, sorcerer, warlock, wizard",Player's Handbook,https://dnd5e.wikidot.com/spell:true-strike
462,Blade Ward,0,Abjuration,1 action,Self,"V, S",1 round,you extend your hand and trace a sigil of ward...,,"bard, sorcerer, warlock, wizard",Player's Handbook,https://dnd5e.wikidot.com/spell:blade-ward
88,Frostbite,0,Evocation,1 action,60 feet,"V, S",Instantaneous,you cause numbing frost to form on one creatur...,the spell’s damage increases by 1d6 when you r...,"druid, sorcerer, warlock, wizard, artificer",Xanathar's Guide to Everything,https://dnd5e.wikidot.com/spell:frostbite
459,Control Flames,0,Transmutation,1 action,60 feet,S,Instantaneous or 1 hour,you choose nonmagical flame that you can see w...,,"druid, sorcerer, wizard",Xanathar's Guide to Everything,https://dnd5e.wikidot.com/spell:control-flames
274,Dancing Lights,0,Evocation,1 action,120 feet,"V, S, M (a bit of phosphorus or wychwood, or ...","Concentration, up to 1 minute",you create up to four torch-sized lights withi...,,"artificer, bard, sorcerer, wizard",Player's Handbook,https://dnd5e.wikidot.com/spell:dancing-lights
277,Thunderclap,0,Evocation,1 action,Self (5-foot radius),S,Instantaneous,"you create a burst of thunderous sound, which ...",the spell’s damage increases by 1d6 when you r...,"bard, druid, sorcerer, warlock, wizard, artificer",Xanathar's Guide to Everything,https://dnd5e.wikidot.com/spell:thunderclap


### IV. The real deal

In [23]:
#Step IV.1 - Prepare the data to feed to the model

from sentence_transformers import SentenceTransformer
import faiss
import numpy as np

# Create a 'searchable_text' column
finalSpells['searchable_text'] = finalSpells.apply(lambda row: f"{row['Name']} {row['Level']} {row['School']} {row['Casting Time']} {row['Range']} {row['Components']} {row['Duration']} {row['Description']} {row['Spell Lists']} {row['Source']}", axis=1)

# Show example
print(finalSpells[['Name', 'searchable_text']].head())


                  Name                                    searchable_text
358        Mind Sliver  Mind Sliver 0 Enchantment  1 action  60 feet  ...
148  Green-Flame Blade  Green-Flame Blade 0 Evocation  1 action  Self ...
464     Decompose (HB)  Decompose (HB) 0 Necromancy  1 action  Touch  ...
40     Encode Thoughts  Encode Thoughts 0 Enchantment  1 action  Self ...
41         True Strike  True Strike 0 Divination  1 action  30 feet  S...


In [24]:
#Step IV.2 - Embed the searchable text

# Load the embedding model
model = SentenceTransformer('all-MiniLM-L6-v2')

# Generate embeddings for all spells
embeddings = model.encode(finalSpells['searchable_text'].tolist(), show_progress_bar=True)

# Convert to numpy array for FAISS
embeddings_np = np.array(embeddings)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.5k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Batches:   0%|          | 0/17 [00:00<?, ?it/s]

In [25]:
#Step IV.3 - Create FAISS index

index = faiss.IndexFlatL2(embeddings_np.shape[1])
index.add(embeddings_np)

In [26]:
#Step IV.4 v2 - Create a function to query the model

def hybrid_search(query, level=None, spell_class=None, k=5):
    filtered_df = finalSpells.copy()

    # Optional filter for level
    if level is not None:
        print(f"Filtering for level {level}")
        filtered_df = filtered_df[filtered_df['Level'] == str(level)]
        print(f"Remaining after level filter: {len(filtered_df)}")

    # Optional filter for class (case-insensitive)
    if spell_class is not None:
        print(f"Filtering for class {spell_class}")
        mask = filtered_df['Spell Lists'].str.lower().str.contains(spell_class.lower(), na=False)
        filtered_df = filtered_df[mask]
        print(f"Remaining after class filter: {len(filtered_df)}")

    if filtered_df.empty:
        return pd.DataFrame()  # Return empty DataFrame for consistency

    # Step 2: Embed the filtered subset
    filtered_texts = filtered_df['searchable_text'].tolist()
    filtered_embeddings = model.encode(filtered_texts, convert_to_numpy=True)

    d = filtered_embeddings.shape[1]
    index = faiss.IndexFlatL2(d)
    index.add(filtered_embeddings)

    query_embedding = model.encode([query], convert_to_numpy=True)

    D, I = index.search(query_embedding, min(k, len(filtered_df)))

    results = filtered_df.iloc[I[0]].reset_index(drop=True)
    return results


In [27]:
results = hybrid_search("burn enemies", level=3, spell_class="sorcerer", k=5)

if not results.empty:
    print(results[['Name', 'Level','Description']])
else:
    print("No spells found for the given level and class.")


Filtering for level 3
Remaining after level filter: 75
Filtering for class sorcerer
Remaining after class filter: 34
                    Name Level  \
0               Fireball     3   
1           Flame Arrows     3   
2             Antagonize     3   
3    Ashardalon's Stride     3   
4  Melf's Minute Meteors     3   

                                         Description  
0  a bright streak flashes from your pointing fin...  
1  you touch a quiver containing arrows or bolts....  
2  you whisper magical words that antagonize one ...  
3  the billowing flames of a dragon blast from yo...  
4  you create six tiny meteors in your space. the...  


In [28]:
results.head()

Unnamed: 0,Name,Level,School,Casting Time,Range,Components,Duration,Description,Upcasting,Spell Lists,Source,URL,searchable_text
0,Fireball,3,evocation,1 action,150 feet,"V, S, M (a tiny ball of bat guano and sulfur)",Instantaneous,a bright streak flashes from your pointing fin...,when you cast this spell using a spell slot of...,"sorcerer, wizard",Player's Handbook,https://dnd5e.wikidot.com/spell:fireball,"Fireball 3 evocation 1 action 150 feet V, S..."
1,Flame Arrows,3,transmutation,1 action,Touch,"V, S","Concentration, up to 1 hour",you touch a quiver containing arrows or bolts....,when you cast this spell using a spell slot of...,"druid, ranger, sorcerer, wizard, artificer",Xanathar's Guide to Everything,https://dnd5e.wikidot.com/spell:flame-arrows,Flame Arrows 3 transmutation 1 action Touch ...
2,Antagonize,3,Enchantment,1 action,30 feet,"V, S, M (a playing card depicting a rogue)",Instantaneous,you whisper magical words that antagonize one ...,when you cast this spell using a spell slot of...,"bard, sorcerer, warlock, wizard",The Book of Many Things,https://dnd5e.wikidot.com/spell:antagonize,Antagonize 3 Enchantment 1 action 30 feet V...
3,Ashardalon's Stride,3,Transmutation,1 bonus action,Self,"V,S","Concentration, up to 1 minute",the billowing flames of a dragon blast from yo...,when you cast this spell using a spell slot of...,"artificer, ranger, sorcerer, wizard",Fizban's Treasury of Dragons,https://dnd5e.wikidot.com/spell:ashardalons-st...,Ashardalon's Stride 3 Transmutation 1 bonus a...
4,Melf's Minute Meteors,3,evocation,1 action,Self,"V, S, M (niter, sulfur, and pine tar formed i...","Concentration, up to 10 minutes",you create six tiny meteors in your space. the...,when you cast this spell using a spell slot of...,"sorcerer, wizard",Xanathar's Guide to Everything,https://dnd5e.wikidot.com/spell:melfs-minute-m...,Melf's Minute Meteors 3 evocation 1 action S...


In [29]:
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer

# Load model and tokenizer once
llm_model_name = "google/flan-t5-small"
tokenizer = AutoTokenizer.from_pretrained(llm_model_name)
model_llm = AutoModelForSeq2SeqLM.from_pretrained(llm_model_name)

def llm_rewrite_query_hf(user_query):
    prompt = f"Rewrite this D&D spell search query to focus it for filtering a spell database:\n\n{user_query}\n\nRewritten query:"
    inputs = tokenizer(prompt, return_tensors="pt", max_length=128, truncation=True)
    outputs = model_llm.generate(**inputs, max_length=50)
    rewritten = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return rewritten


tokenizer_config.json:   0%|          | 0.00/2.54k [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.42M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/2.20k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.40k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/308M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

In [30]:
def hybrid_search_with_llm_hf(query, level=None, spell_class=None, k=5):
    rewritten_query = llm_rewrite_query_hf(query)
    print(f"Original query: {query}")
    print(f"Rewritten query: {rewritten_query}")

    level = str(level) if level is not None else None
    spell_class = spell_class if spell_class != "" else None

    filtered_df = finalSpells.copy()
    if level:
        filtered_df = filtered_df[filtered_df['Level'] == level]
    if spell_class:
        filtered_df = filtered_df[filtered_df['Spell Lists'].str.contains(spell_class, case=False, na=False)]

    if filtered_df.empty:
        return f"No spells found for level={level} and class={spell_class}"

    filtered_texts = filtered_df['searchable_text'].tolist()
    filtered_embeddings = model.encode(filtered_texts, convert_to_numpy=True)

    d = filtered_embeddings.shape[1]
    index = faiss.IndexFlatL2(d)
    index.add(filtered_embeddings)

    query_embedding = model.encode([rewritten_query], convert_to_numpy=True)
    D, I = index.search(query_embedding, min(k, len(filtered_df)))

    results = filtered_df.iloc[I[0]].reset_index(drop=True)
    return results


In [31]:
def summarize_spells_with_llm(spells_df, user_query, max_spells=5):
    # Format the spells info into a plain text summary prompt
    spells_summary = ""
    for i, row in spells_df.head(max_spells).iterrows():
        spells_summary += f"- {row['Name']} (Level {row['Level']} {row['School']}): {row['Description'][:150].strip()}...\n"

    prompt = f"User query: {user_query}\n\nBased on the following D&D spells, provide a friendly summary highlighting the best options:\n\n{spells_summary}\nSummary:"

    inputs = tokenizer(prompt, return_tensors="pt", max_length=512, truncation=True)
    outputs = model_llm.generate(**inputs, max_length=150, do_sample=True, top_p=0.95, temperature=0.7)
    summary = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return summary


In [32]:
def hybrid_search_and_summarize(query, level=None, spell_class=None, k=5):
    results_df = hybrid_search_with_llm_hf(query, level, spell_class, k)

    if isinstance(results_df, str):
        # means "No spells found" message or error string
        return results_df

    summary = summarize_spells_with_llm(results_df, query, max_spells=k)
    return summary


In [33]:
summary = hybrid_search_and_summarize("burn enemies", level=2, spell_class="sorcerer", k=5)
print(summary)


Original query: burn enemies
Rewritten query: Burn enemies is a list of all the spells that are in the database.
Scorching Ray (Level 2 evocation): you create three rays of fire and hurl them at targets within range. you can hurl them at one target or several. make a ranged spell attack for each r...


In [50]:
import gradio as gr

def gradio_spell_search(query, level, spell_class, k):
    level = str(level) if level is not None else None
    spell_class = spell_class if spell_class != "" else None

    results = hybrid_search(query, level=level, spell_class=spell_class, k=int(k))

    if isinstance(results, str):  # No results found
        return results
    else:
        # Format results nicely
        return results[['Name', 'Level', 'Spell Lists', 'Description', 'URL']].to_markdown(index=False)

# Gradio interface
with gr.Blocks() as demo:
    gr.Markdown("## D&D 5e Spell Search")
    gr.Markdown("Search for spells based on a natural language query, with optional level and class filtering.")
    with gr.Column():
        query_input = gr.Textbox(label="Search Query", placeholder="Describe what you're looking for...")
        level_input = gr.Number(label="Spell Level (optional)", value=None)
        class_input = gr.Textbox(label="Class Filter (optional)", placeholder="e.g. wizard, sorcerer...")
        k_input = gr.Number(label="Top K Results", value=5)
        search_button = gr.Button("Search")
    with gr.Row(): # Use a Row to control width
        output_markdown = gr.Markdown(label="Matching Spells")

    search_button.click(
        fn=gradio_spell_search,
        inputs=[query_input, level_input, class_input, k_input],
        outputs=output_markdown
    )

demo.launch()

It looks like you are running Gradio on a hosted a Jupyter notebook. For the Gradio app to work, sharing must be enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://5f0578ca1ddf02addc.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


