In [1]:
!pip install -q langchain sqlalchemy pandas transformers

In [None]:
# This script normalizes restaurant CSV data and loads it into an SQLite database

import pandas as pd
import sqlite3
import os
from tkinter import Tk
from tkinter.filedialog import askopenfilename

# Step 0: Pick the CSV file interactively
Tk().withdraw()  # Hide the root window
csv_filename = askopenfilename(title="Select your restaurant CSV file", filetypes=[("CSV Files", "*.csv")])

# Step 1: Load CSV file
df = pd.read_csv(csv_filename)

# Step 2: Setup SQLite DB
db_name = "normalized_restaurant_data.db"
if os.path.exists(db_name):
    os.remove(db_name)
conn = sqlite3.connect(db_name)
cur = conn.cursor()

# Step 3: Create tables for normalized schema
cur.executescript("""
CREATE TABLE IF NOT EXISTS restaurants (
    id INTEGER PRIMARY KEY AUTOINCREMENT,
    name TEXT UNIQUE,
    contact_info TEXT
);

CREATE TABLE IF NOT EXISTS locations (
    id INTEGER PRIMARY KEY AUTOINCREMENT,
    restaurant_id INTEGER,
    address TEXT,
    state TEXT,
    FOREIGN KEY (restaurant_id) REFERENCES restaurants(id)
);

CREATE TABLE IF NOT EXISTS categories (
    id INTEGER PRIMARY KEY AUTOINCREMENT,
    name TEXT UNIQUE
);

CREATE TABLE IF NOT EXISTS menu_items (
    id INTEGER PRIMARY KEY AUTOINCREMENT,
    restaurant_id INTEGER,
    category_id INTEGER,
    item_name TEXT,
    description TEXT,
    options TEXT,
    tags TEXT,
    FOREIGN KEY (restaurant_id) REFERENCES restaurants(id),
    FOREIGN KEY (category_id) REFERENCES categories(id)
);
""")

# Step 4: Populate normalized tables
def get_or_create_id(table, column, value):
    cur.execute(f"SELECT id FROM {table} WHERE {column} = ?", (value,))
    result = cur.fetchone()
    if result:
        return result[0]
    cur.execute(f"INSERT INTO {table} ({column}) VALUES (?)", (value,))
    return cur.lastrowid

for _, row in df.iterrows():
    # Restaurant
    cur.execute("SELECT id FROM restaurants WHERE name = ?", (row["restaurant"],))
    res = cur.fetchone()
    if res:
        restaurant_id = res[0]
    else:
        cur.execute("INSERT INTO restaurants (name, contact_info) VALUES (?, ?)",
                    (row["restaurant"], row["contact_info"]))
        restaurant_id = cur.lastrowid

    # Location
    cur.execute("""SELECT id FROM locations WHERE restaurant_id = ? AND address = ? AND state = ?""",
                (restaurant_id, row["address"], row["state"]))
    if not cur.fetchone():
        cur.execute("""INSERT INTO locations (restaurant_id, address, state) VALUES (?, ?, ?)""",
                    (restaurant_id, row["address"], row["state"]))

    # Category
    category_id = get_or_create_id("categories", "name", row["category"])

    # Menu Item
    cur.execute("""INSERT INTO menu_items
        (restaurant_id, category_id, item_name, description, options, tags)
        VALUES (?, ?, ?, ?, ?, ?)""",
        (restaurant_id, category_id, row["item_name"], row["description"], row["options"], row["tags"]))

conn.commit()
conn.close()

print("Database created and normalized data loaded successfully.")


Saving formatted_rasa_data_flat.csv to formatted_rasa_data_flat (5).csv
Saving formatted_products_spicezone.csv to formatted_products_spicezone (5).csv
Saving formatted_products_nasha.csv to formatted_products_nasha (5).csv
Saving formatted_products_chaiatacos.csv to formatted_products_chaiatacos (5).csv
Saving formatted_products_cafespice.csv to formatted_products_cafespice (5).csv


'Database created and normalized data loaded successfully.'

In [4]:
pip install -U langchain-community



In [5]:
from langchain.chains import create_sql_query_chain
from langchain.sql_database import SQLDatabase


# Setup LangChain SQL connection
db = SQLDatabase.from_uri(f"sqlite:///{db_name}")

In [6]:
from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM
from langchain.llms import HuggingFacePipeline

model_name = "google/flan-t5-large"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)

hf_pipeline = pipeline(
    "text2text-generation",
    model=model,
    tokenizer=tokenizer,
    max_length=512,
    temperature=0.5,
    repetition_penalty=1.3
)

llm = HuggingFacePipeline(pipeline=hf_pipeline)


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.
Device set to use cpu
  llm = HuggingFacePipeline(pipeline=hf_pipeline)


In [7]:
pip install langchain-experimental



In [11]:
from langchain.prompts import PromptTemplate
from langchain_experimental.sql import SQLDatabaseChain

custom_prompt = PromptTemplate.from_template("""
Given an input question, create a syntactically correct SQL query to run against a restaurant database.

Only use the following tables:
- restaurants(id, name, contact_info)
- locations(id, restaurant_id, address, state)
- categories(id, name)
- menu_items(id, restaurant_id, category_id, item_name, description, options, tags)

Use only necessary columns to answer the question.
Do not use SELECT *.

Question: {input}
SQL Query:
""")

sql_chain = SQLDatabaseChain.from_llm(
    llm=llm,
    db=db,
    prompt=custom_prompt,
    return_intermediate_steps=True,
    verbose=True,
)


In [9]:
!pip show langchain

Name: langchain
Version: 0.3.23
Summary: Building applications with LLMs through composability
Home-page: 
Author: 
Author-email: 
License: MIT
Location: /usr/local/lib/python3.11/dist-packages
Requires: langchain-core, langchain-text-splitters, langsmith, pydantic, PyYAML, requests, SQLAlchemy
Required-by: langchain-community


In [17]:
user_query = "Names of all restaurants"

# Step 1: Generate SQL and intermediate steps
response = sql_chain(user_query)

# Step 2: Inspect the response structure
print("Intermediate Steps Response:\n", response['intermediate_steps'])

# Ensure generated_sql is a string
generated_sql = next(
    (step for step in response['intermediate_steps'] if isinstance(step, str) and 'SELECT' in step.upper()),
    ''
)

print("Generated SQL Query:\n", generated_sql)

# Step 3: Run the SQL manually on your db
from sqlalchemy import text

with db._engine.connect() as connection:
    sql_result = connection.execute(text(generated_sql)).fetchall()
    result_as_list = [dict(row._mapping) for row in sql_result]

print("SQL Query Result:\n", result_as_list)

# Step 4: Feed the result back into Flan-T5 for natural text answer
result_prompt = f"""
You are given the result of a SQL query:
{result_as_list}

Based on this result, write a human-readable answer to the original question:
"{user_query}"

Answer:
"""

# Generate text
final_answer = llm.invoke(result_prompt)
print("\nFinal Text Answer:\n", final_answer)




[1m> Entering new SQLDatabaseChain chain...[0m
Names of all restaurants
SQLQuery:



[32;1m[1;3mSELECT name FROM restaurants[0m
SQLResult: [33;1m[1;3m[('Rasa',)][0m
Answer:



[32;1m[1;3mSELECT name FROM restaurants[0m
[1m> Finished chain.[0m
Intermediate Steps Response:
 [{'input': 'Names of all restaurants\nSQLQuery:', 'top_k': '5', 'dialect': 'sqlite', 'table_info': '\nCREATE TABLE categories (\n\tid INTEGER, \n\tname TEXT, \n\tPRIMARY KEY (id), \n\tUNIQUE (name)\n)\n\n/*\n3 rows from categories table:\nid\tname\n1\tCHEF CURATED BOWLS\n2\tBUILD YOUR OWN BOWL\n3\tWraps + Burritos\n*/\n\n\nCREATE TABLE locations (\n\tid INTEGER, \n\trestaurant_id INTEGER, \n\taddress TEXT, \n\tstate TEXT, \n\tPRIMARY KEY (id), \n\tFOREIGN KEY(restaurant_id) REFERENCES restaurants (id)\n)\n\n/*\n3 rows from locations table:\nid\trestaurant_id\taddress\tstate\n1\t1\t1247 First Street SE, Washington, D.C, 20003\tDC\n2\t1\t485 K Street NW, Washington, D.C, 20001\tDC\n3\t1\t12033 Rockville Pike, Rockville, MD 20852\tmaryland\n*/\n\n\nCREATE TABLE menu_items (\n\tid INTEGER, \n\trestaurant_id INTEGER, \n\tcategory_id INTEGER, \n\titem_name TEXT, \n\tdescription TEXT, \n\topt




Final Text Answer:
 name = 'Rasa'


In [18]:
!pip install gradio




In [19]:
import gradio as gr

def chatbot(query):
    result = sql_chain.invoke(query)
    final_answer = result["result"]
    sources = "\n".join([doc.page_content for doc in result["source_documents"]])
    return f"🍽️ Answer:\n{final_answer}\n\n📋 Sources:\n{sources}"


custom_theme = gr.themes.Base(
    primary_hue="rose",
    secondary_hue="red", ).set(
    body_background_fill="#FAFAFA",NameError: name 'item_documents' is not defined
    button_primary_background_fill="#E23744",
    button_primary_text_color="#FFFFFF",
    button_primary_background_fill_hover="#A6192E",
    # text_color="#2C2C2C",
    input_background_fill="#FFFFFF",
    input_border_color="#E23744",
)

gr.Interface(
    fn=chatbot,
    inputs=gr.Textbox(lines=2, placeholder="Ask me about the menu..."),
    outputs="text",
    title="Alergen and Food Info Chatbot",
    description="Ask about categories, items, allergens, vegetarian options and more!",
    theme=custom_theme,
).launch(share=True)


Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://e363120cdf4ca38a33.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


