In [1]:
import warnings
warnings.filterwarnings('ignore')

### Prepare LLM

In [2]:
import json
import os
from crewai import Agent, Task, Crew
from semantic_search_tools import SemanticSearchTool

os.environ['OPENAI_API_KEY'] = 'your-openai-api-key'
os.environ["OPENAI_MODEL_NAME"] = 'gpt-3.5-turbo'

db_schema_search_tool = SemanticSearchTool(n_results=10)

### Prepare experiment data

In [3]:
from spider_env import SpiderEnv

spider = SpiderEnv()

# select a random question in Spider
observation, info = spider.reset()
question = observation["instruction"]
print('question:', question)
print('gold query:', info['gold_query'])
print('gold result:', info['gold_result'])

Loading cached Spider dataset from /Users/bearpig/.cache/spider
Schema file not found for /Users/bearpig/.cache/spider/spider/database/icfp_1
Schema file not found for /Users/bearpig/.cache/spider/spider/database/small_bank_1
Schema file not found for /Users/bearpig/.cache/spider/spider/database/epinions_1
Schema file not found for /Users/bearpig/.cache/spider/spider/database/company_1
Schema file not found for /Users/bearpig/.cache/spider/spider/database/flight_4
Schema file not found for /Users/bearpig/.cache/spider/spider/database/chinook_1
Schema file not found for /Users/bearpig/.cache/spider/spider/database/twitter_1
question: Find the famous titles of artists that do not have any volume.
gold query: SELECT Famous_Title FROM artist WHERE Artist_ID NOT IN(SELECT Artist_ID FROM volume)
gold result: [('Antievangelistical Process (re-release)',), ('Antithesis of All Flesh',)]


### Create Agents

In [4]:
with open('agents.json') as f:
    agent_config = json.load(f)

search_agent = Agent(**agent_config['assistant_dba'], tools=[db_schema_search_tool])
sql_writer_agent = Agent(**agent_config['senior_sql_writer'])
qa_agent = Agent(**agent_config['senior_qa_engineer'])
dba_agent = Agent(**agent_config['chief_dba'])

### Create Tasks

In [5]:
search_task = Task(
    description='Using search tools to find related database/table candidates, and consolidate all relevant tables that belong the same database for information needed to answer the following question:\n{question}',
    expected_output="Return relevant database and tables' schema.",
    agent=search_agent,
    tools=[db_schema_search_tool]
)
sql_writer_task = Task(
    description='Based on the searched database/table schemas, generate a SQL query to answer the following question:\n{question}',
    expected_output='MUST BE ONLY a valid databse sql script.',
    agent=sql_writer_agent
)
qa_task = Task(
    description='Check the given sql query carefully, and refine the sql query to make it fulfill the needs of the following question:\n{question}',
    expected_output='The refined sql query. MUST BE ONLY a valid databse sql script.',
    agent=qa_agent,
    context=[search_task]
)
dba_task = Task(
    description='You will look over the sql query to insure that it is complete and does the job that it is supposed to do which is generating a SQL query to answer the following question:\n{question}',
    expected_output='The final sql query. MUST BE ONLY a valid databse sql script and its database name.',
    agent=dba_agent,
    context=[search_task]
)

### set-up a Crew

In [6]:
text2sql_crew = Crew(
    agents=[
        search_agent,
        sql_writer_agent,
        qa_agent,
        dba_agent
    ],
    tasks=[
        search_task,
        sql_writer_task,
        qa_task,
        dba_task
    ],
    verbose=True
)

### Prepare inputs (question)

In [7]:
text2sql_inputs = {
    'question': question
}

### Kickoff crew actions

In [8]:
result = text2sql_crew.kickoff(inputs=text2sql_inputs)

[1m[93m [DEBUG]: == Working Agent: Assistant Search Engineer[00m
[1m[93m [INFO]: == Starting Task: Using search tools to find related database/table candidates, and consolidate all relevant tables that belong the same database for information needed to answer the following question:
Find the famous titles of artists that do not have any volume.[00m


[1m> Entering new CrewAgentExecutor chain...[0m
[32;1m[1;3mI should start by using the search database and table schemas tool to find relevant databases and tables related to famous titles of artists without any volume.

Action: Search database and table schemas tool
Action Input: {"question": "famous titles of artists without any volume"}[0m[93m 

{"database": "theme_gallery", "table": "artist", "schema": "CREATE TABLE \"artist\" (\n\"Artist_ID\" int,\n\"Name\" text,\n\"Country\" text,\n\"Year_Join\" int,\n\"Age\" int,\nPRIMARY KEY (\"Artist_ID\")\n)"}

{"database": "music_4", "table": "artist", "schema": "CREATE TABLE \"artis

In [9]:
print(result)

```sql
SELECT a.Famous_Title
FROM artist a
LEFT JOIN volume v ON a.Artist_ID = v.Artist_ID
WHERE v.Artist_ID IS NULL
```
Database Name: music_4


### Compare with gold result

In [10]:
import sqlite3

con = sqlite3.connect('spider/database/music_4/music_4.sqlite')
cursor = con.cursor()
cursor.execute("""SELECT a.Famous_Title
FROM artist a
LEFT JOIN volume v ON a.Artist_ID = v.Artist_ID
WHERE v.Artist_ID IS NULL""")
results = cursor.fetchall()
print(results)

[('Antievangelistical Process (re-release)',), ('Antithesis of All Flesh',)]


In [11]:
print(info['gold_result'])

[('Antievangelistical Process (re-release)',), ('Antithesis of All Flesh',)]
