In [1]:
import pandas as pd
import dbio

In [2]:
import os
import openai
from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv()) # read local .env file

openai.api_key  = os.getenv('OPENAI_API_KEY')

In [3]:
from langchain.prompts.prompt import PromptTemplate
from langchain import SQLDatabase, SQLDatabaseChain
from langchain.chat_models import ChatOpenAI

# First pass:

Put everything in one table for the initial prototype.

In [None]:
# columns = Item type, Prices (Large, Medium, Small), Description
menu = {'Pepperoni pizza': ['pizza', '12.95, 10, 7', 'Our most popular'],
        'Cheese pizza':  ['pizza', '10.95, 9.25, 6.50', 'Extreme cheesiness'],
        'Eggplant pizza':  ['pizza', '11.95, 9.75, 6.75', 'A contemporary classic'],
        'Fries': ['side item', '4.50, 3.50', 'Available for a limited time'],
        'Greek salad': ['side item', '7.25', 'Positively pantheonic'],
        'Extra cheese': ['topping', '2.00', ''],
        'Mushrooms': ['topping', '1.50', 'Non-hallucigenic'],
        'Sausage': ['topping', '3.00', 'Spicy with a hint of sweet'],
        'Canadian bacon': ['topping', '3.50', ''],
        'AI sauce': ['topping', '1.50', 'No human would have made it this way'],
        'Peppers': ['topping', '1.00', ''],
        'Coke': ['drink', '3.00, 2.00, 1.00', '3 Liter, 2 Liter, 16 ounces'],
        'Sprite': ['drink', '3.00, 2.00, 1.00', '3 Liter, 2 Liter, 16 ounces'],
        'Bottled water': ['drink', '5.00', '20 ounces'],
        }

menu_df = pd.DataFrame(menu).T.reset_index()
menu_df.columns = ['Item', 'Type', 'Prices (Large, Medium, Small)', 'Description']
menu_df

In [None]:
# store table in pizza_v1.db
cobj = dbio.connectors.SQLite('pizza_v1.db')
cobj.write(menu_df, 'menu', if_exists='replace')

cobj.read('select count(*) from menu')

In [None]:
from langchain import OpenAI, SQLDatabase, SQLDatabaseChain

db = SQLDatabase.from_uri("sqlite:///./pizza_v1.db")
llm = OpenAI(model_name="gpt-3.5-turbo"temperature=0, verbose=True)

In [None]:
# database chain

db_chain = SQLDatabaseChain.from_llm(llm, db, verbose=True)

In [None]:
# query

db_chain.run("How many kinds of pizza do you offer?")

In [None]:
# query - drinks

db_chain.run("What drinks can I order")

In [None]:
# query - bottled water

db_chain.run("lease tell me how many sizes of bottled water \
                       you have. Do you know how big they are?")

Try again with updated prompt:
 - check in greater 1 field
 - lower case text

In [None]:
from langchain.prompts.prompt import PromptTemplate

_DEFAULT_TEMPLATE = """Given an input question, first create a syntactically correct {dialect} query to run, 
then look at the results of the query and return the answer.
Use the following format:

Question: "Question here"
SQLQuery: "SQL Query to run"
SQLResult: "Result of the SQLQuery"
Answer: "Final answer here"

Only use the following tables:

{table_info}

When looking up information on products, be sure to check the Item and Type columns. Also lowercase the user
input and fields in the tables to ensure a match. Use the LIKE operator to allow for partial string matches.

In the WHERE CLAUSE combine mutliple filters together with a logical OR operator.

Question: {input}"""
PROMPT = PromptTemplate(
    input_variables=["input", "table_info", "dialect"], template=_DEFAULT_TEMPLATE
)

In [None]:
from langchain.prompts.prompt import PromptTemplate

_DEFAULT_TEMPLATE = """Step 1:
Given an input question, write the syntactically correct {dialect} query that meets the following guidelines:
  a. Be sure to check the the Item, Type, and Description columns.
  b. Combine multiple WHERE filters with a logical OR
  c. Lowercase the user input

Step 2: Run the query.

Step 3: Examine the results and return the answer.

Use the following format:
Question: "Question here"
SQLQuery: "SQL Query to run"
SQLResult: "Result of the SQLQuery"
Answer: "Final answer here"

Only use the following tables:

{table_info}

Table schema explanation:
Item - the product name
Type - kind of item (pizza, side item, topping, drink)
Price - comma separate list of prices for large, medium, small.
Description - extra information including sizes

Question: {input}"""
PROMPT = PromptTemplate(
    input_variables=["input", "table_info", "dialect"], template=_DEFAULT_TEMPLATE
)

In [None]:
db_chain = SQLDatabaseChain.from_llm(llm, db, prompt=PROMPT, verbose=True)

In [None]:
db_chain.run("Please tell me how many sizes of water containing items do\
                       you have. Do you know how big they are?")

In [None]:
db_chain.run("How much is a large cheese pizza?")

# try again with tables for each kind of item

In [5]:
# columns = Large, Medium, Small, Description
menu_pizzas = {'Pepperoni pizza': ['12.95', '10', '7', 'Our most popular'],
        'Cheese pizza':  ['10.95', '9.25', '6.50', 'Extreme cheesiness'],
        'Eggplant pizza':  ['11.95', '9.75', '6.75', 'A contemporary classic'],
        }

menu_sides = { # Item, Large, Medium, Small, Description
        'Fries': ['4.50', '3.50', '', 'Available for a limited time'],
        'Greek salad': ['7.25', '', '', 'Positively pantheonic'],
        }

menu_toppings = { # Topping price
        'Extra cheese': ['2.00', ''],
        'Mushrooms': [ '1.50', 'Non-hallucigenic'],
        'Sausage': ['3.00', 'Spicy with a hint of sweet'],
        'Canadian bacon': ['3.50', ''],
        'AI sauce': ['1.50', 'No human would have made it this way'],
        'Peppers': ['1.00', ''],
        }

menu_drinks = { # Price_large, Price_medium, Price_small, Description
        'Coke': ['3.00', '2.00', '1.00', 'Large = 3 Liter, Medium = 2 Liter, Smal = 16 ounces'],
        'Sprite': ['3.00', '2.00', '1.00', 'Large = 3 Liter, Medium = 2 Liter, Smal = 16 ounces'],
        'Bottled water': ['5.00', None, None, 'Single size = 20 ounces'],
        }

menu_pizzas_df = pd.DataFrame(menu_pizzas).T.reset_index()
menu_pizzas_df.columns = ['Item', 'Price_Large', 'Price_Medium', 'Price_Small', 'Description']

menu_sides_df = pd.DataFrame(menu_sides).T.reset_index()
menu_sides_df.columns = ['Item', 'Price_Large', 'Price_Medium', 'Price_Small', 'Description']

menu_drinks_df = pd.DataFrame(menu_drinks).T.reset_index()
menu_drinks_df.columns = ['Item', 'Price_Large', 'Price_Medium', 'Price_Small', 'Description']

menu_toppings_df = pd.DataFrame(menu_toppings).T.reset_index()
menu_toppings_df.columns = ['Item', 'Price', 'Description']

display(menu_pizzas_df.head(2))
display(menu_sides_df.head(2))
display(menu_toppings_df.head(2))
display(menu_drinks_df.head(2))

Unnamed: 0,Item,Price_Large,Price_Medium,Price_Small,Description
0,Pepperoni pizza,12.95,10.0,7.0,Our most popular
1,Cheese pizza,10.95,9.25,6.5,Extreme cheesiness


Unnamed: 0,Item,Price_Large,Price_Medium,Price_Small,Description
0,Fries,4.5,3.5,,Available for a limited time
1,Greek salad,7.25,,,Positively pantheonic


Unnamed: 0,Item,Price,Description
0,Extra cheese,2.0,
1,Mushrooms,1.5,Non-hallucigenic


Unnamed: 0,Item,Price_Large,Price_Medium,Price_Small,Description
0,Coke,3.0,2.0,1.0,"Large = 3 Liter, Medium = 2 Liter, Smal = 16 o..."
1,Sprite,3.0,2.0,1.0,"Large = 3 Liter, Medium = 2 Liter, Smal = 16 o..."


In [6]:
# store table in pizza_v1.db
cobj = dbio.connectors.SQLite('pizza_v2.db')
cobj.write(menu_pizzas_df, 'pizzas', if_exists='replace')
cobj.write(menu_toppings_df, 'toppings', if_exists='replace')
cobj.write(menu_sides_df, 'sides', if_exists='replace')
cobj.write(menu_drinks_df, 'drinks', if_exists='replace')

cobj.read('select count(*) from pizzas')

INFO:root:time taken to write 3 rows (15 elements): 0.0s (62.5 rows/s)
INFO:root:time taken to write 6 rows (18 elements): 0.0s (284.3 rows/s)
INFO:root:time taken to write 2 rows (10 elements): 0.0s (77.8 rows/s)
INFO:root:time taken to write 3 rows (15 elements): 0.0s (85.7 rows/s)


Unnamed: 0,count(*)
0,3


In [7]:
_DEFAULT_TEMPLATE = """Given an input question, first create a syntactically correct {dialect} query to run, 
then look at the results of the query and return the answer.
Use the following format:

Question: "Question here"
SQLQuery: "SQL Query to run"
SQLResult: "Result of the SQLQuery"
Answer: "Final answer here"

Only use the following tables:

{table_info}

When looking up information on products, be sure to check the Item and Type columns. Also lowercase the user
input and fields in the tables to ensure a match. Use the LIKE operator to allow for partial string matches.

In the WHERE CLAUSE combine mutliple filters together with a logical OR operator.

Question: {input}"""
PROMPT = PromptTemplate(
    input_variables=["input", "table_info", "dialect"], template=_DEFAULT_TEMPLATE
)

In [8]:
db = SQLDatabase.from_uri("sqlite:///./pizza_v2.db")
llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0, verbose=True)

In [9]:
db_chain = SQLDatabaseChain.from_llm(llm, db, prompt=PROMPT, verbose=True)

In [11]:
db_chain.run("How many sizes of pizza do you have?  Name them.")



[1m> Entering new SQLDatabaseChain chain...[0m
How many sizes of pizza do you have?  Name them.
SQLQuery:[32;1m[1;3mSELECT DISTINCT Price_Large, Price_Medium, Price_Small FROM pizzas[0m
SQLResult: [33;1m[1;3m[('12.95', '10', '7'), ('10.95', '9.25', '6.50'), ('11.95', '9.75', '6.75')][0m
Answer:[32;1m[1;3mThere are three sizes of pizza: Large, Medium, and Small. The prices for each size are as follows: Large - $12.95, $10.95, $11.95; Medium - $10, $9.25, $9.75; Small - $7, $6.50, $6.75.[0m
[1m> Finished chain.[0m


'There are three sizes of pizza: Large, Medium, and Small. The prices for each size are as follows: Large - $12.95, $10.95, $11.95; Medium - $10, $9.25, $9.75; Small - $7, $6.50, $6.75.'

In [12]:
# query - drinks

db_chain.run("What drinks can I order?")



[1m> Entering new SQLDatabaseChain chain...[0m
What drinks can I order?
SQLQuery:[32;1m[1;3mSELECT Item FROM drinks[0m
SQLResult: [33;1m[1;3m[('Coke',), ('Sprite',), ('Bottled water',)][0m
Answer:[32;1m[1;3mYou can order Coke, Sprite, and Bottled water.[0m
[1m> Finished chain.[0m


'You can order Coke, Sprite, and Bottled water.'

In [None]:
# query - bottled water

db_chain.run("lease tell me how many sizes of bottled water \
                       you have. Do you know how big they are?")

scratchwork

In [None]:
cobj.read("select * from menu where Item like '%ater%'")