In [10]:
#sql database related
from langchain.agents import create_sql_agent
from langchain.agents.agent_toolkits import SQLDatabaseToolkit
from langchain.sql_database import SQLDatabase

#agent related 
from langchain.agents import AgentExecutor
from langchain.agents.agent_types import AgentType

#for openai
#from langchain.llms.openai import OpenAI
#from langchain.chat_models import ChatOpenAI

#for tongyi-api
#!pip install dashscope --upgrade
from langchain.llms import Tongyi
from langchain.chat_models.tongyi import ChatTongyi


# adding Folder_2 to the system path
import sys
sys.path.insert(0, '../')
from common.utils import Utils


In [3]:

#db = SQLDatabase.from_uri("mysql+mysqlconnector://xxx:xxx@localhost:3306/sys")
db = SQLDatabase.from_uri('postgresql+psycopg2://test:test@localhost/test')


In [7]:
llm = ChatTongyi(temperature=0, dashscope_api_key = Utils.get_tongyi_key())
toolkit = SQLDatabaseToolkit(db=db, llm=llm)

In [13]:
from langchain.chains import create_sql_query_chain

In [17]:
from langchain.schema import AIMessage, HumanMessage, SystemMessage

messages = [
    SystemMessage(
        content="You are a helpful assistant that translates English to French."
    ),
    HumanMessage(
        content="Translate this sentence from English to French. I love programming."
    ),
]
llm(messages)

AIMessage(content="J'aime programmer.")

In [9]:

agent_executor_2 = create_sql_agent(
    llm=ChatTongyi(temperature=0, dashscope_api_key = Utils.get_tongyi_key()),
    toolkit=toolkit,
    verbose=True,
    agent_type=AgentType.OPENAI_FUNCTIONS,
    suffix="""When giving the final answer, also append the final SQL and explain it."""
)


ValidationError: 1 validation error for OpenAIFunctionsAgent
__root__
  Only supported with ChatOpenAI models. (type=value_error)

In [None]:

agent_executor_2.run("How many Countries in the database?")

In [5]:

agent_executor_2.run("How many employees's departments are at america")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `sql_db_list_tables` with ``


[0m[38;5;200m[1;3mcountries, departments, employees, locations, t_stock[0m[32;1m[1;3m
Invoking: `sql_db_schema` with `countries, departments, employees, locations`


[0m[33;1m[1;3m
CREATE TABLE countries (
	country_id CHAR(2) NOT NULL, 
	country_name VARCHAR(40), 
	region_id INTEGER, 
	CONSTRAINT country_c_id_pk PRIMARY KEY (country_id)
)

/*
3 rows from countries table:
country_id	country_name	region_id
AR	Argentina	2
AU	Australia	3
BE	Belgium	1
*/


CREATE TABLE departments (
	department_id INTEGER NOT NULL, 
	department_name VARCHAR(30) NOT NULL, 
	manager_id INTEGER, 
	location_id INTEGER, 
	CONSTRAINT dept_id_pk PRIMARY KEY (department_id)
)

/*
3 rows from departments table:
department_id	department_name	manager_id	location_id
10	Administration	200	1700
20	Marketing	201	1800
30	Purchasing	114	1700
*/


CREATE TABLE employees (
	employee_id INTEGER NOT NULL, 
	first_name

"There are no employees whose departments are located in America.\n\nThe SQL query used to find this is:\n```sql\nSELECT COUNT(DISTINCT e.employee_id) \nFROM employees e \nJOIN departments d ON e.department_id = d.department_id \nJOIN locations l ON d.location_id = l.location_id \nJOIN countries c ON l.country_id = c.country_id \nWHERE c.country_name = 'America'\n```\nThis query works by joining the employees, departments, locations, and countries tables together. It then counts the number of unique employee IDs where the country name is 'America'."

In [None]:
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.schema import Document

embeddings = OpenAIEmbeddings()

few_shotd = few_shots = {'Find all employees whose departments are in USA': "SELECT COUNT(*) FROM employees e JOIN departments d ON e.department_id = d.department_id WHERE d.location_id IN (SELECT location_id FROM locations WHERE country_id = 'US');"
                        }
few_shot_docs = [
    Document(page_content=question, metadata={"sql_query": few_shots[question]})
    for question in few_shots.keys()
]
vector_db = FAISS.from_documents(few_shot_docs, embeddings)
retriever = vector_db.as_retriever()

from langchain.agents.agent_toolkits import create_retriever_tool

tool_description = """
This tool will help you understand similar examples to adapt them to the user question.
Input to this tool should be the user question.
"""

retriever_tool = create_retriever_tool(
    retriever, name="sql_get_similar_examples", description=tool_description
)
custom_tool_list = [retriever_tool]

custom_suffix = """
I should first get the similar examples I know.
If the examples are enough to construct the query, I can build it.
Otherwise, I can then look at the tables in the database to see what I can query.
Then I should query the schema of the most relevant tables
"""


In [6]:
import ast
import re


def run_query_save_results(db, query):
    res = db.run(query)
    res = [el for sub in ast.literal_eval(res) for el in sub if el]
    res = [re.sub(r"\b\d+\b", "", string).strip() for string in res]
    return res


country_name = run_query_save_results(db, "SELECT country_name From countries")
country_id = run_query_save_results(db, "SELECT country_id From countries")
department_name = run_query_save_results(db, "SELECT  department_name FROM departments")

In [8]:
!pip install faiss-cpu

Collecting faiss-cpu
  Downloading faiss_cpu-1.7.4-cp39-cp39-macosx_10_9_x86_64.whl (6.5 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.5/6.5 MB[0m [31m5.3 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hInstalling collected packages: faiss-cpu
Successfully installed faiss-cpu-1.7.4


In [9]:
from langchain.agents.agent_toolkits import create_retriever_tool
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import FAISS


texts = country_name + country_id + department_name

embeddings = OpenAIEmbeddings(openai_api_key = Utils.get_openai_key())
vector_db = FAISS.from_texts(texts, embeddings)
retriever = vector_db.as_retriever()

retriever_tool = create_retriever_tool(
    retriever,
    name="name_search",
    description="use to learn how a piece of data is actually written, can be from names, surnames addresses etc",
)

custom_tool_list = [retriever_tool]

In [10]:
from langchain.agents import create_sql_agent, AgentType
from langchain.agents.agent_toolkits import SQLDatabaseToolkit
from langchain.utilities import SQLDatabase
from langchain.chat_models import ChatOpenAI

# db = SQLDatabase.from_uri("sqlite:///Chinook.db")
llm = ChatOpenAI(model_name="gpt-4", temperature=0, openai_api_key = Utils.get_openai_key())

toolkit = SQLDatabaseToolkit(db=db, llm=llm)

custom_suffix = """
If a user asks for me to filter based on proper nouns, I should first check the spelling using the name_search tool.
Otherwise, I can then look at the tables in the database to see what I can query.
Then I should query the schema of the most relevant tables
"""

agent = create_sql_agent(
    llm=llm,
    toolkit=toolkit,
    verbose=True,
    agent_type=AgentType.OPENAI_FUNCTIONS,
    extra_tools=custom_tool_list,
    suffix=custom_suffix,
)

In [11]:
agent.run("How many employees's departments are at america")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `name_search` with `america`


[0m[33;1m[1;3m[Document(page_content='United States of America'), Document(page_content='US'), Document(page_content='Canada'), Document(page_content='Australia')][0m[32;1m[1;3m
Invoking: `sql_db_list_tables` with ``
responded: The term "America" could refer to the "United States of America", "US", "Canada", or "Australia". I will assume the user is referring to the "United States of America" or "US". Now, I will check the tables in the database.

[0m[38;5;200m[1;3mcountries, departments, employees, locations, t_stock[0m[32;1m[1;3m
Invoking: `sql_db_schema` with `countries, departments, employees, locations`


[0m[33;1m[1;3m
CREATE TABLE countries (
	country_id CHAR(2) NOT NULL, 
	country_name VARCHAR(40), 
	region_id INTEGER, 
	CONSTRAINT country_c_id_pk PRIMARY KEY (country_id)
)

/*
3 rows from countries table:
country_id	country_name	region_id
AR	Argentina	2
AU	Aust

'There are 69 employees whose departments are located in the United States of America or US.'

In [18]:
agent.run("Accroding to the database, How many employees are working in the IT or IT-related Department")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `name_search` with `IT`


[0m[33;1m[1;3m[Document(page_content='IT'), Document(page_content='IT'), Document(page_content='IL'), Document(page_content='IN')][0m[32;1m[1;3m
Invoking: `sql_db_list_tables` with ``


[0m[38;5;200m[1;3mcountries, departments, employees, locations, t_stock[0m[32;1m[1;3m
Invoking: `sql_db_schema` with `departments, employees`


[0m[33;1m[1;3m
CREATE TABLE departments (
	department_id INTEGER NOT NULL, 
	department_name VARCHAR(30) NOT NULL, 
	manager_id INTEGER, 
	location_id INTEGER, 
	CONSTRAINT dept_id_pk PRIMARY KEY (department_id)
)

/*
3 rows from departments table:
department_id	department_name	manager_id	location_id
10	Administration	200	1700
20	Marketing	201	1800
30	Purchasing	114	1700
*/


CREATE TABLE employees (
	employee_id INTEGER NOT NULL, 
	first_name VARCHAR(20), 
	last_name VARCHAR(25) NOT NULL, 
	email VARCHAR(25) NOT NULL, 
	phone_number VARCHAR(20), 
	hi

'According to the database, there are 5 employees working in the IT or IT-related departments.'