In [1]:
import pandas as pd
import dbio

In [2]:
import os
import openai
from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv()) # read local .env file

openai.api_key  = os.getenv('OPENAI_API_KEY')

In [3]:
from langchain.prompts.prompt import PromptTemplate
from langchain import SQLDatabase, SQLDatabaseChain
from langchain.chat_models import ChatOpenAI

# First pass:

Put everything in one table for the initial prototype.

In [None]:
# columns = Item type, Prices (Large, Medium, Small), Description
menu = {'Pepperoni pizza': ['pizza', '12.95, 10, 7', 'Our most popular'],
        'Cheese pizza':  ['pizza', '10.95, 9.25, 6.50', 'Extreme cheesiness'],
        'Eggplant pizza':  ['pizza', '11.95, 9.75, 6.75', 'A contemporary classic'],
        'Fries': ['side item', '4.50, 3.50', 'Available for a limited time'],
        'Greek salad': ['side item', '7.25', 'Positively pantheonic'],
        'Extra cheese': ['topping', '2.00', ''],
        'Mushrooms': ['topping', '1.50', 'Non-hallucinogenic'],
        'Sausage': ['topping', '3.00', 'Spicy with a hint of sweet'],
        'Canadian bacon': ['topping', '3.50', ''],
        'AI sauce': ['topping', '1.50', 'No human would have made it this way'],
        'Peppers': ['topping', '1.00', ''],
        'Coke': ['drink', '3.00, 2.00, 1.00', '3 Liter, 2 Liter, 16 ounces'],
        'Sprite': ['drink', '3.00, 2.00, 1.00', '3 Liter, 2 Liter, 16 ounces'],
        'Bottled water': ['drink', '5.00', '20 ounces'],
        }

menu_df = pd.DataFrame(menu).T.reset_index()
menu_df.columns = ['Item', 'Type', 'Prices (Large, Medium, Small)', 'Description']
menu_df

In [None]:
# store table in pizza_v1.db
cobj = dbio.connectors.SQLite('pizza_v1.db')
cobj.write(menu_df, 'menu', if_exists='replace')

cobj.read('select count(*) from menu')

In [None]:
from langchain import OpenAI, SQLDatabase, SQLDatabaseChain

db = SQLDatabase.from_uri("sqlite:///./pizza_v1.db")
llm = OpenAI(model_name="gpt-3.5-turbo"temperature=0, verbose=True)

In [None]:
# database chain

db_chain = SQLDatabaseChain.from_llm(llm, db, verbose=True)

In [None]:
# query

db_chain.run("How many kinds of pizza do you offer?")

In [None]:
# query - drinks

db_chain.run("What drinks can I order")

In [None]:
# query - bottled water

db_chain.run("lease tell me how many sizes of bottled water \
                       you have. Do you know how big they are?")

Try again with updated prompt:
 - check in greater 1 field
 - lower case text

In [None]:
from langchain.prompts.prompt import PromptTemplate

_DEFAULT_TEMPLATE = """Given an input question, first create a syntactically correct {dialect} query to run, 
then look at the results of the query and return the answer.
Use the following format:

Question: "Question here"
SQLQuery: "SQL Query to run"
SQLResult: "Result of the SQLQuery"
Answer: "Final answer here"

Only use the following tables:

{table_info}

When looking up information on products, be sure to check the Item and Type columns. Also lowercase the user
input and fields in the tables to ensure a match. Use the LIKE operator to allow for partial string matches.

In the WHERE CLAUSE combine mutliple filters together with a logical OR operator.

Question: {input}"""
PROMPT = PromptTemplate(
    input_variables=["input", "table_info", "dialect"], template=_DEFAULT_TEMPLATE
)

In [None]:
from langchain.prompts.prompt import PromptTemplate

_DEFAULT_TEMPLATE = """Step 1:
Given an input question, write the syntactically correct {dialect} query that meets the following guidelines:
  a. Be sure to check the the Item, Type, and Description columns.
  b. Combine multiple WHERE filters with a logical OR
  c. Lowercase the user input

Step 2: Run the query.

Step 3: Examine the results and return the answer.

Use the following format:
Question: "Question here"
SQLQuery: "SQL Query to run"
SQLResult: "Result of the SQLQuery"
Answer: "Final answer here"

Only use the following tables:

{table_info}

Table schema explanation:
Item - the product name
Type - kind of item (pizza, side item, topping, drink)
Price - comma separate list of prices for large, medium, small.
Description - extra information including sizes

Question: {input}"""
PROMPT = PromptTemplate(
    input_variables=["input", "table_info", "dialect"], template=_DEFAULT_TEMPLATE
)

In [None]:
db_chain = SQLDatabaseChain.from_llm(llm, db, prompt=PROMPT, verbose=True)

In [None]:
db_chain.run("Please tell me how many sizes of water containing items do\
                       you have. Do you know how big they are?")

In [None]:
db_chain.run("How much is a large cheese pizza?")

# try again with tables for each kind of item

In [4]:
# columns = Large, Medium, Small, Description
menu_pizzas = {'Pepperoni pizza': ['12.95', '10', '7', 'Our most popular'],
        'Cheese pizza':  ['10.95', '9.25', '6.50', 'Extreme cheesiness'],
        'Eggplant pizza':  ['11.95', '9.75', '6.75', 'A contemporary classic'],
        }

menu_sides = { # Item, Large, Medium, Small, Description
        'Fries': ['4.50', '3.50', '', 'Available for a limited time'],
        'Greek salad': ['7.25', '', '', 'Positively pantheonic'],
        }

menu_toppings = { # Topping price
        'Extra cheese': ['2.00', ''],
        'Mushrooms': [ '1.50', 'Non-hallucinogenic'],
        'Sausage': ['3.00', 'Spicy with a hint of sweet'],
        'Canadian bacon': ['3.50', ''],
        'AI sauce': ['1.50', 'No human would have made it this way'],
        'Peppers': ['1.00', ''],
        }

menu_drinks = { # Price_large, Price_medium, Price_small, Description
        'Coke': ['3.00', '2.00', '1.00', 'Large = 3 Liter, Medium = 2 Liter, Smal = 16 ounces'],
        'Sprite': ['3.00', '2.00', '1.00', 'Large = 3 Liter, Medium = 2 Liter, Smal = 16 ounces'],
        'Bottled water': ['5.00', None, None, 'Single size = 20 ounces'],
        }

menu_pizzas_df = pd.DataFrame(menu_pizzas).T.reset_index()
menu_pizzas_df.columns = ['Item', 'Price_Large', 'Price_Medium', 'Price_Small', 'Description']

menu_sides_df = pd.DataFrame(menu_sides).T.reset_index()
menu_sides_df.columns = ['Item', 'Price_Large', 'Price_Medium', 'Price_Small', 'Description']

menu_drinks_df = pd.DataFrame(menu_drinks).T.reset_index()
menu_drinks_df.columns = ['Item', 'Price_Large', 'Price_Medium', 'Price_Small', 'Description']

menu_toppings_df = pd.DataFrame(menu_toppings).T.reset_index()
menu_toppings_df.columns = ['Item', 'Price', 'Description']

display(menu_pizzas_df.head(2))
display(menu_sides_df.head(2))
display(menu_toppings_df.head(2))
display(menu_drinks_df.head(2))

Unnamed: 0,Item,Price_Large,Price_Medium,Price_Small,Description
0,Pepperoni pizza,12.95,10.0,7.0,Our most popular
1,Cheese pizza,10.95,9.25,6.5,Extreme cheesiness


Unnamed: 0,Item,Price_Large,Price_Medium,Price_Small,Description
0,Fries,4.5,3.5,,Available for a limited time
1,Greek salad,7.25,,,Positively pantheonic


Unnamed: 0,Item,Price,Description
0,Extra cheese,2.0,
1,Mushrooms,1.5,Non-hallucinogenic


Unnamed: 0,Item,Price_Large,Price_Medium,Price_Small,Description
0,Coke,3.0,2.0,1.0,"Large = 3 Liter, Medium = 2 Liter, Smal = 16 o..."
1,Sprite,3.0,2.0,1.0,"Large = 3 Liter, Medium = 2 Liter, Smal = 16 o..."


In [5]:
# store table in pizza_v1.db
cobj = dbio.connectors.SQLite('pizza_v2.db')
cobj.write(menu_pizzas_df, 'pizzas', if_exists='replace')
cobj.write(menu_toppings_df, 'toppings', if_exists='replace')
cobj.write(menu_sides_df, 'sides', if_exists='replace')
cobj.write(menu_drinks_df, 'drinks', if_exists='replace')

cobj.read('select count(*) from pizzas')

INFO:root:time taken to write 3 rows (15 elements): 0.0s (88.4 rows/s)
INFO:root:time taken to write 6 rows (18 elements): 0.0s (141.0 rows/s)
INFO:root:time taken to write 2 rows (10 elements): 0.0s (71.4 rows/s)
INFO:root:time taken to write 3 rows (15 elements): 0.0s (127.6 rows/s)


Unnamed: 0,count(*)
0,3


In [6]:
_DEFAULT_TEMPLATE = """Given an input question, first create a syntactically correct {dialect} query to run, 
then look at the results of the query and return the answer.
Use the following format:

Question: "Question here"
SQLQuery: "SQL Query to run"
SQLResult: "Result of the SQLQuery"
Answer: "Final answer here"

Only use the following tables:

{table_info}

When looking up information on products, be sure to check the Item and Type columns. Also lowercase the user
input and fields in the tables to ensure a match. Use the LIKE operator to allow for partial string matches.

In the WHERE CLAUSE combine mutliple filters together with a logical OR operator.

Question: {input}"""
PROMPT = PromptTemplate(
    input_variables=["input", "table_info", "dialect"], template=_DEFAULT_TEMPLATE
)

In [7]:
db = SQLDatabase.from_uri("sqlite:///./pizza_v2.db")
llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0, verbose=True)

In [8]:
db_chain = SQLDatabaseChain.from_llm(llm, db, prompt=PROMPT, verbose=True)

In [36]:
result = db_chain.run("How many sizes of pizza do you have?  Name them.")
result



[1m> Entering new SQLDatabaseChain chain...[0m
How many sizes of pizza do you have?  Name them.
SQLQuery:[32;1m[1;3mSELECT DISTINCT Item, Description FROM pizzas WHERE Price_Large IS NOT NULL OR Price_Medium IS NOT NULL OR Price_Small IS NOT NULL[0m
SQLResult: [33;1m[1;3m[('Pepperoni pizza', 'Our most popular'), ('Cheese pizza', 'Extreme cheesiness'), ('Eggplant pizza', 'A contemporary classic')][0m
Answer:[32;1m[1;3mThere are three sizes of pizza: Large, Medium, and Small. The pizza types are Pepperoni, Cheese, and Eggplant.[0m
[1m> Finished chain.[0m


'There are three sizes of pizza: Large, Medium, and Small. The pizza types are Pepperoni, Cheese, and Eggplant.'

In [10]:
# query - drinks

db_chain.run("What drinks can I order?")



[1m> Entering new SQLDatabaseChain chain...[0m
What drinks can I order?
SQLQuery:[32;1m[1;3mSELECT Item FROM drinks[0m
SQLResult: [33;1m[1;3m[('Coke',), ('Sprite',), ('Bottled water',)][0m
Answer:INFO:openai:error_code=None error_message='Rate limit reached for default-gpt-3.5-turbo in organization org-xv1rthCl7lUSO5JPFfmTuZtK on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False
INFO:openai:error_code=None error_message='Rate limit reached for default-gpt-3.5-turbo in organization org-xv1rthCl7lUSO5JPFfmTuZtK on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have

'You can order Coke, Sprite, and Bottled water.'

In [11]:
# query - bottled water

db_chain.run("Please tell me how many sizes of bottled water \
                       you have. Do you know how big they are?")



[1m> Entering new SQLDatabaseChain chain...[0m
Please tell me how many sizes of bottled water                        you have. Do you know how big they are?
SQLQuery:[32;1m[1;3mSELECT COUNT(*) FROM drinks WHERE Item LIKE '%bottled water%'[0m
SQLResult: [33;1m[1;3m[(1,)][0m
Answer:INFO:openai:error_code=None error_message='Rate limit reached for default-gpt-3.5-turbo in organization org-xv1rthCl7lUSO5JPFfmTuZtK on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False
INFO:openai:error_code=None error_message='Rate limit reached for default-gpt-3.5-turbo in organization org-xv1rthCl7lUSO5JPFfmTuZtK on requests per min. Limit: 3 / min. Please try again 

'Final answer here: There is one size of bottled water available, which is 20 ounces.'

In [14]:
db_chain.run("Are the mushrooms hallucinogenic?")



[1m> Entering new SQLDatabaseChain chain...[0m
Are the mushrooms hallucinogenic?
SQLQuery:[32;1m[1;3mSELECT Description FROM toppings WHERE Item LIKE '%mushrooms%'[0m
SQLResult: [33;1m[1;3m[('Non-hallucinogenic',)][0m
Answer:[32;1m[1;3mNo, the mushrooms are not hallucinogenic according to the Description field in the toppings table.[0m
[1m> Finished chain.[0m


'No, the mushrooms are not hallucinogenic according to the Description field in the toppings table.'

In [31]:
db_chain.run("What is the price of a cheese pizza with sausage?")



[1m> Entering new SQLDatabaseChain chain...[0m
What is the price of a cheese pizza with sausage?
SQLQuery:[32;1m[1;3mSELECT pizzas.Price_Large + toppings.Price AS Total_Price
FROM pizzas, toppings
WHERE pizzas.Item LIKE '%cheese%' AND toppings.Item LIKE '%sausage%'[0m
SQLResult: [33;1m[1;3m[(13.95,)][0m
Answer:[32;1m[1;3mThe price of a cheese pizza with sausage is $13.95.[0m
[1m> Finished chain.[0m


'The price of a cheese pizza with sausage is $13.95.'

In [32]:
db_chain.run("What is the price of a cheese pizza topped with sausage and mushroom?")



[1m> Entering new SQLDatabaseChain chain...[0m
What is the price of a cheese pizza topped with sausage and mushroom?
SQLQuery:[32;1m[1;3mSELECT pizzas.Price_Large + toppings.Price + toppings.Price AS Total_Price
FROM pizzas
JOIN toppings ON pizzas.Item LIKE '%cheese%' AND toppings.Item LIKE '%sausage%' OR toppings.Item LIKE '%mushroom%'
WHERE pizzas.Item LIKE '%cheese%'[0m
SQLResult: [33;1m[1;3m[(13.95,), (16.95,)][0m
Answer:INFO:openai:error_code=None error_message='Rate limit reached for default-gpt-3.5-turbo in organization org-xv1rthCl7lUSO5JPFfmTuZtK on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error received' stream_error=False
INFO:openai:error_code=None error_messa

'The price of a cheese pizza topped with sausage and mushroom is either $13.95 for a small or $16.95 for a large.'

In [44]:
db_chain.run("What is the price of a small pizza topped with both sausage and mushroom?")



[1m> Entering new SQLDatabaseChain chain...[0m
What is the price of a small pizza topped with both sausage and mushroom?
SQLQuery:[32;1m[1;3mSELECT pizzas.Price_Small + toppings.Price AS Total_Price
FROM pizzas
JOIN toppings ON pizzas.Item LIKE '%sausage%' AND toppings.Item LIKE '%mushroom%'
WHERE pizzas.Item LIKE '%pizza%' AND toppings.Item LIKE '%sausage%' AND toppings.Item LIKE '%mushroom%'[0m
SQLResult: [33;1m[1;3m[][0m
Answer:[32;1m[1;3mThere is no answer to this question as the SQL query did not return any results.[0m
[1m> Finished chain.[0m


'There is no answer to this question as the SQL query did not return any results.'

In [47]:
db_chain.run("What is the diameter of each pizza size?")



[1m> Entering new SQLDatabaseChain chain...[0m
What is the diameter of each pizza size?
SQLQuery:[32;1m[1;3mSELECT "Item", "Description", 
CASE 
	WHEN "Price_Large" IS NOT NULL THEN "Price_Large" 
	WHEN "Price_Medium" IS NOT NULL THEN "Price_Medium" 
	ELSE "Price_Small" 
END AS "Price",
CASE 
	WHEN "Price_Large" IS NOT NULL THEN "Description" 
	WHEN "Price_Medium" IS NOT NULL THEN "Description" 
	ELSE "Description" 
END AS "Size"
FROM pizzas[0m
SQLResult: [33;1m[1;3m[('Pepperoni pizza', 'Our most popular', '12.95', 'Our most popular'), ('Cheese pizza', 'Extreme cheesiness', '10.95', 'Extreme cheesiness'), ('Eggplant pizza', 'A contemporary classic', '11.95', 'A contemporary classic')][0m
Answer:[32;1m[1;3mThe large pizza size has a diameter of 12 inches, the medium pizza size has a diameter of 10 inches, and the small pizza size has a diameter of 7 inches. However, this information is not available in the given tables.[0m
[1m> Finished chain.[0m


'The large pizza size has a diameter of 12 inches, the medium pizza size has a diameter of 10 inches, and the small pizza size has a diameter of 7 inches. However, this information is not available in the given tables.'

In [43]:
# try again with sequential db chain - is this also using a query checker by default?
# not using query checker, but something is causing the sql to be much more accurate and
# the answers are correct

from langchain.chains import SQLDatabaseSequentialChain
#db = SQLDatabase.from_uri("sqlite:///./pizza_v2.db")
seq_chain = SQLDatabaseSequentialChain.from_llm(llm, db, verbose=True)
seq_chain.run("What is the price of a small pizza topped with both sausage and mushroom?")



[1m> Entering new SQLDatabaseSequentialChain chain...[0m
Table names to use:
[33;1m[1;3m['pizzas', 'toppings'][0m

[1m> Entering new SQLDatabaseChain chain...[0m
What is the price of a small pizza topped with both sausage and mushroom?
SQLQuery:[32;1m[1;3mSELECT pizzas.Price_Small + toppings.Price AS Total_Price
FROM pizzas
JOIN toppings ON pizzas.Item = 'Cheese pizza' AND toppings.Item = 'Mushrooms' OR toppings.Item = 'Sausage'
LIMIT 1[0m
SQLResult: [33;1m[1;3m[(10.0,)][0m
Answer:INFO:openai:error_code=None error_message='Rate limit reached for default-gpt-3.5-turbo in organization org-xv1rthCl7lUSO5JPFfmTuZtK on requests per min. Limit: 3 / min. Please try again in 20s. Contact us through our help center at help.openai.com if you continue to have issues. Please add a payment method to your account to increase your rate limit. Visit https://platform.openai.com/account/billing to add a payment method.' error_param=None error_type=requests message='OpenAI API error receiv

'The price of a small pizza topped with both sausage and mushroom is $10.'

In [46]:
seq_chain.run("What is the diameter of each pizza size?")



[1m> Entering new SQLDatabaseSequentialChain chain...[0m
Table names to use:
[33;1m[1;3m['pizzas'][0m

[1m> Entering new SQLDatabaseChain chain...[0m
What is the diameter of each pizza size?
SQLQuery:[32;1m[1;3mSELECT Price_Large, Price_Medium, Price_Small FROM pizzas[0m
SQLResult: [33;1m[1;3m[('12.95', '10', '7'), ('10.95', '9.25', '6.50'), ('11.95', '9.75', '6.75')][0m
Answer:[32;1m[1;3mThe query returned the prices for each pizza size, not the diameter. The answer to the question cannot be determined from the given table.[0m
[1m> Finished chain.[0m

[1m> Finished chain.[0m


'The query returned the prices for each pizza size, not the diameter. The answer to the question cannot be determined from the given table.'

In [41]:
cobj.read('''
SELECT *, pizzas.Price_Large + toppings.Price + toppings.Price AS Total_Price
FROM pizzas, toppings
WHERE pizzas.Item = 'Cheese pizza' AND toppings.Item IN ('Sausage', 'Mushrooms')
''')

Unnamed: 0,Item,Price_Large,Price_Medium,Price_Small,Description,Price,Total_Price
0,Mushrooms,10.95,9.25,6.5,Non-hallucinogenic,1.5,13.95
1,Sausage,10.95,9.25,6.5,Spicy with a hint of sweet,3.0,16.95


scratchwork

In [None]:
cobj.read("select * from menu where Item like '%ater%'")

In [34]:
cobj.read('''
SELECT *, pizzas.Price_Large + toppings.Price + toppings.Price AS Total_Price
FROM pizzas
JOIN toppings ON pizzas.Item LIKE '%cheese%' AND toppings.Item LIKE '%sausage%' OR toppings.Item LIKE '%mushroom%'
WHERE pizzas.Item LIKE '%cheese%'
''')

Unnamed: 0,Item,Price_Large,Price_Medium,Price_Small,Description,Price,Total_Price
0,Mushrooms,10.95,9.25,6.5,Non-hallucinogenic,1.5,13.95
1,Sausage,10.95,9.25,6.5,Spicy with a hint of sweet,3.0,16.95


sqlDatabaseChain notes:
 - you can specify return_direct=True in the SQLDatabaseChain initialization to directly return the output of the SQL query without any additional formatting. This prevents the LLM from seeing any contents within the database. Note, however, the LLM still has access to the database scheme (i.e. dialect, table and key names) by default.
 - SQLDatabaseChain.from_llm(llm, db, verbose=True, use_query_checker=True) for query checking
 - Return intermediate steps instead of just the final answer: SQLDatabaseChain.from_llm(llm, db, prompt=PROMPT, verbose=True, use_query_checker=True, return_intermediate_steps=True)