In [1]:
!pip install -q langchain sqlalchemy pandas transformers

In [2]:
from google.colab import files

uploaded = files.upload()

KeyboardInterrupt: 

In [9]:
# This script normalizes restaurant CSV data and loads it into an SQLite database

import pandas as pd
import sqlite3
import os

from google.colab import files

uploaded = files.upload()

dfs = []  # list to hold all dataframes

for csv_filename in uploaded.keys():
    temp_df = pd.read_csv(csv_filename)
    dfs.append(temp_df)

# Concatenate all into one big dataframe
df = pd.concat(dfs, ignore_index=True)

# Now df has data from ALL files
print(df.head())


# Step 1: Setup SQLite DB
db_name = "normalized_restaurant_data.db"
if os.path.exists(db_name):
    os.remove(db_name)

conn = sqlite3.connect(db_name)
cur = conn.cursor()

# Step 2: Create tables for normalized schema
cur.executescript("""
CREATE TABLE IF NOT EXISTS restaurants (
    id INTEGER PRIMARY KEY AUTOINCREMENT,
    name TEXT UNIQUE,
    contact_info TEXT
);

CREATE TABLE IF NOT EXISTS locations (
    id INTEGER PRIMARY KEY AUTOINCREMENT,
    restaurant_id INTEGER,
    address TEXT,
    FOREIGN KEY (restaurant_id) REFERENCES restaurants(id)
);

CREATE TABLE IF NOT EXISTS categories (
    id INTEGER PRIMARY KEY AUTOINCREMENT,
    name TEXT UNIQUE
);

CREATE TABLE IF NOT EXISTS menu_items (
    id INTEGER PRIMARY KEY AUTOINCREMENT,
    restaurant_id INTEGER,
    category_id INTEGER,
    item_name TEXT,
    description TEXT,
    options TEXT,
    tags TEXT,
    FOREIGN KEY (restaurant_id) REFERENCES restaurants(id),
    FOREIGN KEY (category_id) REFERENCES categories(id)
);
""")

# Step 3: Populate normalized tables
def get_or_create_id(table, column, value):
    cur.execute(f"SELECT id FROM {table} WHERE {column} = ?", (value,))
    result = cur.fetchone()
    if result:
        return result[0]
    cur.execute(f"INSERT INTO {table} ({column}) VALUES (?)", (value,))
    return cur.lastrowid

for _, row in df.iterrows():
    # Restaurant
    cur.execute("SELECT id FROM restaurants WHERE name = ?", (row["restaurant"],))
    res = cur.fetchone()
    if res:
        restaurant_id = res[0]
    else:
        cur.execute("INSERT INTO restaurants (name, contact_info) VALUES (?, ?)",
                    (row["restaurant"], row["contact_info"]))
        restaurant_id = cur.lastrowid

    # Normalize address and state
    # address = str(row["address"]).strip().lower()

    # Check if location already exists for this restaurant
    cur.execute("""
                SELECT id FROM locations
                WHERE restaurant_id = ? AND LOWER(address) = ?
                """, (restaurant_id, address))
    loc_res = cur.fetchone()
    if not cur.fetchone():
      cur.execute("""
                  INSERT INTO locations (restaurant_id, address)
                  VALUES (?, ?)
                  """, (restaurant_id, row["address"]))

    # Category
    category_id = get_or_create_id("categories", "name", row["category"])

    # Menu Item
    cur.execute("""INSERT INTO menu_items
        (restaurant_id, category_id, item_name, description, options, tags)
        VALUES (?, ?, ?, ?, ?, ?)""",
        (restaurant_id, category_id, row["item_name"], row["description"], row["options"], row["tags"]))

# Step 4: Finalize
conn.commit()


# Step 5: View tables
def view_table(table_name):
    df = pd.read_sql_query(f"SELECT * FROM {table_name} LIMIT 10", conn)
    print(f"--- {table_name} ---")
    display(df)

tables = ["restaurants", "locations", "categories", "menu_items"]
for table in tables:
    view_table(table)

conn.close()

"Database created and normalized data loaded successfully."


Saving formatted_rasa_data_flat.csv to formatted_rasa_data_flat (3).csv
Saving formatted_products_spicezone.csv to formatted_products_spicezone (3).csv
Saving formatted_products_nasha.csv to formatted_products_nasha (3).csv
Saving formatted_products_chaiatacos.csv to formatted_products_chaiatacos (3).csv
Saving formatted_products_cafespice.csv to formatted_products_cafespice (3).csv
  restaurant            category                item_name  \
0       Rasa  CHEF CURATED BOWLS  Tikka Chance On Me Bowl   
1       Rasa  CHEF CURATED BOWLS  Tikka Chance On Me Bowl   
2       Rasa  CHEF CURATED BOWLS  Tikka Chance On Me Bowl   
3       Rasa  CHEF CURATED BOWLS  Tikka Chance On Me Bowl   
4       Rasa  CHEF CURATED BOWLS  Tikka Chance On Me Bowl   

                                         description options tags  \
0  chicken tikka | tomato garlic sauce | basmati ...     NaN  NaN   
1  chicken tikka | tomato garlic sauce | basmati ...     NaN  NaN   
2  chicken tikka | tomato garlic sauce |

Unnamed: 0,id,name,contact_info
0,1,Rasa,https://www.rasa.co/
1,2,Spice Zone,
2,3,Nasha,
3,4,Chaia Tacos,
4,5,Cafe Spice,


--- locations ---


Unnamed: 0,id,restaurant_id,address
0,1,1,"1247 First Street SE, Washington, D.C, 20003"
1,2,1,"485 K Street NW, Washington, D.C, 20001"
2,3,1,"12033 Rockville Pike, Rockville, MD 20852"
3,4,1,"2200 Crystal Drive, Ste F, Arlington, VA, 22202"
4,5,1,"2905 District Avenue #160, Fairfax, VA, 22031"
5,6,1,"1247 First Street SE, Washington, D.C, 20003"
6,7,1,"485 K Street NW, Washington, D.C, 20001"
7,8,1,"12033 Rockville Pike, Rockville, MD 20852"
8,9,1,"2200 Crystal Drive, Ste F, Arlington, VA, 22202"
9,10,1,"2905 District Avenue #160, Fairfax, VA, 22031"


--- categories ---


Unnamed: 0,id,name
0,1,CHEF CURATED BOWLS
1,2,BUILD YOUR OWN BOWL
2,3,Wraps + Burritos
3,4,sides and sweets
4,5,DRINKS
5,6,LIBATIONS (21+)
6,7,Appetizers
7,8,Soup & Salad
8,9,Traditional Offerings
9,10,Please Choose your Sauce from this section to ...


--- menu_items ---


Unnamed: 0,id,restaurant_id,category_id,item_name,description,options,tags
0,1,1,1,Tikka Chance On Me Bowl,chicken tikka | tomato garlic sauce | basmati ...,,
1,2,1,1,Tikka Chance On Me Bowl,chicken tikka | tomato garlic sauce | basmati ...,,
2,3,1,1,Tikka Chance On Me Bowl,chicken tikka | tomato garlic sauce | basmati ...,,
3,4,1,1,Tikka Chance On Me Bowl,chicken tikka | tomato garlic sauce | basmati ...,,
4,5,1,1,Tikka Chance On Me Bowl,chicken tikka | tomato garlic sauce | basmati ...,,
5,6,1,1,lamb kebab Bowl,lamb kebab | peanut sesame sauce | basmati ric...,,
6,7,1,1,lamb kebab Bowl,lamb kebab | peanut sesame sauce | basmati ric...,,
7,8,1,1,lamb kebab Bowl,lamb kebab | peanut sesame sauce | basmati ric...,,
8,9,1,1,lamb kebab Bowl,lamb kebab | peanut sesame sauce | basmati ric...,,
9,10,1,1,lamb kebab Bowl,lamb kebab | peanut sesame sauce | basmati ric...,,


'Database created and normalized data loaded successfully.'

In [10]:
pip install -U langchain-community

Collecting langchain-community
  Downloading langchain_community-0.3.21-py3-none-any.whl.metadata (2.4 kB)
Collecting dataclasses-json<0.7,>=0.5.7 (from langchain-community)
  Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)
Collecting pydantic-settings<3.0.0,>=2.4.0 (from langchain-community)
  Downloading pydantic_settings-2.9.1-py3-none-any.whl.metadata (3.8 kB)
Collecting httpx-sse<1.0.0,>=0.4.0 (from langchain-community)
  Downloading httpx_sse-0.4.0-py3-none-any.whl.metadata (9.0 kB)
Collecting marshmallow<4.0.0,>=3.18.0 (from dataclasses-json<0.7,>=0.5.7->langchain-community)
  Downloading marshmallow-3.26.1-py3-none-any.whl.metadata (7.3 kB)
Collecting typing-inspect<1,>=0.4.0 (from dataclasses-json<0.7,>=0.5.7->langchain-community)
  Downloading typing_inspect-0.9.0-py3-none-any.whl.metadata (1.5 kB)
Collecting python-dotenv>=0.21.0 (from pydantic-settings<3.0.0,>=2.4.0->langchain-community)
  Downloading python_dotenv-1.1.0-py3-none-any.whl.metadata (24 kB

In [11]:
from langchain.chains import create_sql_query_chain
from langchain.sql_database import SQLDatabase


# Setup LangChain SQL connection
db = SQLDatabase.from_uri(f"sqlite:///{db_name}")

In [12]:
from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
from langchain.llms import HuggingFacePipeline

model_name = "google/flan-t5-large"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)

hf_pipeline = pipeline(
    "text2text-generation",
    model=model,
    tokenizer=tokenizer,
    max_length=512,
    temperature=0.5,
    repetition_penalty=1.3
)

llm = HuggingFacePipeline(pipeline=hf_pipeline)


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/2.54k [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.42M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/2.20k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/662 [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/3.13G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

Device set to use cpu
  llm = HuggingFacePipeline(pipeline=hf_pipeline)


In [13]:
pip install langchain-experimental

Collecting langchain-experimental
  Downloading langchain_experimental-0.3.4-py3-none-any.whl.metadata (1.7 kB)
Downloading langchain_experimental-0.3.4-py3-none-any.whl (209 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m209.2/209.2 kB[0m [31m3.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: langchain-experimental
Successfully installed langchain-experimental-0.3.4


In [14]:
from langchain.prompts import PromptTemplate
from langchain_experimental.sql import SQLDatabaseChain

custom_prompt = PromptTemplate.from_template("""
Given an input question, create a syntactically correct SQL query to run against a restaurant database.

Only use the following tables:
- restaurants(id, name, contact_info)
- locations(id, restaurant_id, address, state)
- categories(id, name)
- menu_items(id, restaurant_id, category_id, item_name, description, options, tags)

Use only necessary columns to answer the question.
Do not use SELECT *.

Question: {input}
SQL Query:
""")

sql_chain = SQLDatabaseChain.from_llm(
    llm=llm,
    db=db,
    prompt=custom_prompt,
    return_intermediate_steps=True,
    verbose=True,
)


In [15]:
!pip show langchain

Name: langchain
Version: 0.3.23
Summary: Building applications with LLMs through composability
Home-page: 
Author: 
Author-email: 
License: MIT
Location: /usr/local/lib/python3.11/dist-packages
Requires: langchain-core, langchain-text-splitters, langsmith, pydantic, PyYAML, requests, SQLAlchemy
Required-by: langchain-community


In [21]:
user_query = "Names of all restaurants"

# Step 1: Generate SQL and intermediate steps
response = sql_chain(user_query)

# Step 2: Inspect the response structure
print("Intermediate Steps Response:\n", response['intermediate_steps'])

# Ensure generated_sql is a string
generated_sql = next(
    (step for step in response['intermediate_steps'] if isinstance(step, str) and 'SELECT' in step.upper()),
    ''
)

print("Generated SQL Query:\n", generated_sql)

# Step 3: Run the SQL manually on your db
from sqlalchemy import text

with db._engine.connect() as connection:
    sql_result = connection.execute(text(generated_sql)).fetchall()
    result_as_list = [dict(row._mapping) for row in sql_result]

print("SQL Query Result:\n", result_as_list)

# Step 4: Feed the result back into Flan-T5 for natural text answer
result_prompt = f"""
You are given the result of a SQL query:
{result_as_list}

Based on this result, write a human-readable answer to the original question:
"{user_query}"

Give a non repititive human like answer.
Answer:
"""

# Generate text
final_answer = llm.invoke(result_prompt)
print(final_answer)




[1m> Entering new SQLDatabaseChain chain...[0m
Names of all restaurants
SQLQuery:



[32;1m[1;3mSELECT name FROM restaurants[0m
SQLResult: [33;1m[1;3m[('Cafe Spice',), ('Chaia Tacos',), ('Nasha',), ('Rasa',), ('Spice Zone',)][0m
Answer:



[32;1m[1;3mSELECT name FROM restaurants[0m
[1m> Finished chain.[0m
Intermediate Steps Response:
 [{'input': 'Names of all restaurants\nSQLQuery:', 'top_k': '5', 'dialect': 'sqlite', 'table_info': '\nCREATE TABLE categories (\n\tid INTEGER, \n\tname TEXT, \n\tPRIMARY KEY (id), \n\tUNIQUE (name)\n)\n\n/*\n3 rows from categories table:\nid\tname\n1\tCHEF CURATED BOWLS\n2\tBUILD YOUR OWN BOWL\n3\tWraps + Burritos\n*/\n\n\nCREATE TABLE locations (\n\tid INTEGER, \n\trestaurant_id INTEGER, \n\taddress TEXT, \n\tPRIMARY KEY (id), \n\tFOREIGN KEY(restaurant_id) REFERENCES restaurants (id)\n)\n\n/*\n3 rows from locations table:\nid\trestaurant_id\taddress\n1\t1\t1247 First Street SE, Washington, D.C, 20003\n2\t1\t485 K Street NW, Washington, D.C, 20001\n3\t1\t12033 Rockville Pike, Rockville, MD 20852\n*/\n\n\nCREATE TABLE menu_items (\n\tid INTEGER, \n\trestaurant_id INTEGER, \n\tcategory_id INTEGER, \n\titem_name TEXT, \n\tdescription TEXT, \n\toptions TEXT, \n\ttags TEXT, \n\tPRIMARY KEY



['name': 'Cafe Spice', 'name': 'Chaia Tacos', 'name': 'Nasha', 'name': 'Rasa', 'name': 'Spice Zone']


In [17]:
!pip install gradio


Collecting gradio
  Downloading gradio-5.25.2-py3-none-any.whl.metadata (16 kB)
Collecting aiofiles<25.0,>=22.0 (from gradio)
  Downloading aiofiles-24.1.0-py3-none-any.whl.metadata (10 kB)
Collecting fastapi<1.0,>=0.115.2 (from gradio)
  Downloading fastapi-0.115.12-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.5.0-py3-none-any.whl.metadata (3.0 kB)
Collecting gradio-client==1.8.0 (from gradio)
  Downloading gradio_client-1.8.0-py3-none-any.whl.metadata (7.1 kB)
Collecting groovy~=0.1 (from gradio)
  Downloading groovy-0.1.2-py3-none-any.whl.metadata (6.1 kB)
Collecting pydub (from gradio)
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting python-multipart>=0.0.18 (from gradio)
  Downloading python_multipart-0.0.20-py3-none-any.whl.metadata (1.8 kB)
Collecting ruff>=0.9.3 (from gradio)
  Downloading ruff-0.11.6-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (25 kB)
Collecting safehttpx<0.2.0,>=0.1.6 (

In [29]:
import gradio as gr
from sqlalchemy import text

def chatbot(user_query):
    try:
        # Step 1: Generate SQL and intermediate steps
        response = sql_chain(user_query)

        # Step 2: Inspect the response structure
        print("Intermediate Steps Response:\n", response['intermediate_steps'])

        # Ensure generated_sql is a string
        generated_sql = next(
            (step for step in response['intermediate_steps'] if isinstance(step, str) and 'SELECT' in step.upper()),
            ''
        )

        print("Generated SQL Query:\n", generated_sql)

        # Step 3: Run the SQL manually on your db
        with db._engine.connect() as connection:
            sql_result = connection.execute(text(generated_sql)).fetchall()
            result_as_list = [dict(row._mapping) for row in sql_result]

        print("SQL Query Result:\n", result_as_list)

        # Step 4: Feed the result back into Flan-T5 for natural text answer
        result_prompt = f"""
        You are given the result of a SQL query:
        {result_as_list}

        Based on this result, write a human-readable answer to the original question:
        "{user_query}"

        Give a non-repetitive human-like answer.
        Answer:
        """

        # Generate text
        final_answer = llm.invoke(result_prompt)
        print(final_answer)

        return f"""
Answer: {final_answer}

SQL Query: {generated_sql}
"""

    except Exception as e:
        return f"⚠️ Error: {str(e)}"


# Custom theme setup
custom_theme = gr.themes.Base(
    primary_hue="rose",
    secondary_hue="red",
).set(
    body_background_fill="#FAFAFA",
    button_primary_background_fill="#E23744",
    button_primary_text_color="#FFFFFF",
    button_primary_background_fill_hover="#A6192E",
    input_background_fill="#FFFFFF",
    input_border_color="#E23744",
)

# Gradio interface
gr.Interface(
    fn=chatbot,
    inputs=gr.Textbox(lines=2, placeholder="Ask me about the menu..."),
    outputs="text",
    title="Allergen and Food Info Chatbot",
    description="Ask about categories, items, allergens, vegetarian options, and more!",
    theme=custom_theme,
).launch(share=True)


Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://27d1db369416cc902b.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


