## Prerequisites

To run the codes in this notebook, you need to install:

1. GridDB C Client
2. GridDB Python client

Follow the instructions on the [GridDB Python Package Index (Pypi)](https://pypi.org/project/griddb-python/) page to install these clients.

You will also need to install LangChain, Numpy, Pandas and Seaborn libraries.

The script below imports the libraries you will need to run the code in this blog.

In [None]:
!pip install langchain
!pip install langchain-core
!pip install langchain-openai

In [1]:
import griddb_python as griddb
import pandas as pd
from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.pydantic_v1 import BaseModel, Field
from typing import List, Dict

## Creating a Connection With GridDB

In [2]:
factory = griddb.StoreFactory.get_instance()

DB_HOST = "127.0.0.1:10001"
DB_CLUSTER = "myCluster"
DB_USER = "admin"
DB_PASS = "admin"

try:
    gridstore = factory.get_store(
        notification_member = DB_HOST,
        cluster_name = DB_CLUSTER,
        username = DB_USER,
        password = DB_PASS
    )

    container1 = gridstore.get_container("container1")
    if container1 == None:
        print("Container does not exist")
    print("Successfully connected to GridDB")

except griddb.GSException as e:
    for i in range(e.get_error_stack_size()):
        print("[", i, "]")
        print(e.get_error_code(i))
        print(e.get_location(i))
        print(e.get_message(i))


Container does not exist
Successfully connected to GridDB


## Inserting Data with Natural Language Commands Using LangChain

In [3]:
llm = ChatOpenAI(api_key="YOUR_OPENAI_API_KEY",
                 temperature = 0,
                model_name = "gpt-4o")

In [4]:
class InsertData(BaseModel):
    column_names: List[str]= Field(description="All the column names from the structured data")
    column_types: List[str] = Field(description="All the column types from the structured data")
    column_values: List[List[str]] = Field(description="All the column values from the structured data")
    container_name: str = Field(description="Name of container extracted from the user query")

In [5]:

system_command = """ You are an expert who extracts structure from natural language queries that must be converted to database queries.
Parse the user input query and extract the container name and column names along with their types from the user query and the user records.
The types should be parsed as STRING, LONG, FLOAT, INTEGER
"""

user_prompt = ChatPromptTemplate.from_messages([
    ("system", system_command ),
    ("user", "{input}")
])

insert_chain = user_prompt | llm.with_structured_output(InsertData)

In [6]:
user_query = """

Insert the following student records into the student_data container.

Name = Michael
Age = 10
Gender = Male
Grade = A


Name = Sara
Age = 11
Gender = Female
Grade = C

Name = Nick
Age = 9
Gender = Male
Grade = B

"""
user_data = insert_chain.invoke({"input": user_query})

print(user_data.column_names)
print(user_data.column_types)
print(user_data.column_values)
print(user_data.container_name)


['Name', 'Age', 'Gender', 'Grade']
['STRING', 'INTEGER', 'STRING', 'STRING']
[['Michael', '10', 'Male', 'A'], ['Sara', '11', 'Female', 'C'], ['Nick', '9', 'Male', 'B']]
student_data


In [7]:
str_to_griddb_type = {
    "LONG": griddb.Type.LONG,
    "INTEGER": griddb.Type.INTEGER,
    "STRING": griddb.Type.STRING,
    "FLOAT": griddb.Type.FLOAT,
    # Add other types as needed
}

# Function to convert a list of string types to GridDB types
def convert_list_of_types(type_list):
    try:
        return [str_to_griddb_type[type_str] for type_str in type_list]
    except KeyError as e:
        raise ValueError(f"Unsupported type string: {e.args[0]}")

def try_convert_to_int(value):
    try:
        return int(value)
    except ValueError:
        return value


def insert_records(query):
    user_data = insert_chain.invoke({"input": query})

    container_name = user_data.container_name
    column_names = user_data.column_names
    
    column_values = user_data.column_values
    column_values  = [[try_convert_to_int(item) for item in sublist] for sublist in column_values]

    column_types = user_data.column_types
    griddb_type = convert_list_of_types(column_types)

    container_columns = []
    for column_name, dtype in zip(column_names, griddb_type):
        container_columns.append([column_name, dtype])

    container_info = griddb.ContainerInfo(container_name,
                                          container_columns,
                                          griddb.ContainerType.COLLECTION, True)

    try:
        cont = gridstore.put_container(container_info)
        for row in column_values:
            cont.put(row)
        print("All rows have been successfully stored in the GridDB container.")
    
    except griddb.GSException as e:
        for i in range(e.get_error_stack_size()):
            print("[", i, "]")
            print(e.get_error_code(i))
            print(e.get_location(i))
            print(e.get_message(i))


In [13]:
insert_records(user_query)

All rows have been successfully stored in the GridDB container.


## Selecting Data

In [9]:
class SelectData(BaseModel):
    container_name: str = Field(description="the container name from the user query")
    query:str = Field(description="natural language converted to SELECT query")



system_command = """ 
Convert user commands into SQL queries for Griddb.
"""

user_prompt = ChatPromptTemplate.from_messages([
    ("system", system_command),
    ("user", "{input}")
])

select_chain = user_prompt | llm.with_structured_output(SelectData)


In [14]:
def select_records(query):

    select_data = select_chain.invoke(query)
    container_name = select_data.container_name
    select_query = select_data.query
    
    result_container = gridstore.get_container(container_name)
    query = result_container.query(select_query)
    rs = query.fetch()
    result_data = rs.fetch_rows()
    return result_data


select_records("Give me student records from student_data container where Age is greater than or equal to 10")

Unnamed: 0,Name,Age,Gender,Grade
0,Michael,10,Male,A
1,Sara,11,Female,C


## Update Data

In [20]:
class UpdateData(BaseModel):
    container_name: str = Field(description="the container name from the user query")
    select_query:str = Field(description="natural language converted to SELECT query")
    column_name: str = Field(description="name of the column to be updated")
    column_value: str = Field(description="Column value to be updated")


system_command = """ 
Convert user commands into SQL query as follows. If the user enters an Update query, return the following:
1. The name of the container
2. A SELECT query to query records in the update statement.
3. The name of the column to be updated. 
4. The new value for the column. 
"""

user_prompt = ChatPromptTemplate.from_messages([
    ("system", system_command),
    ("user", "{input}")
])

update_chain = user_prompt | llm.with_structured_output(UpdateData)


container_name='student_data' select_query='SELECT * FROM student_data WHERE Age >= 10;' column_name='Age' column_value='11'


In [16]:
def update_records(query):

    update_data = update_chain.invoke(query)

    result_container = gridstore.get_container(update_data.container_name)
    result_container.set_auto_commit(False)
    query = result_container.query(update_data.select_query)
    rs = query.fetch(True)

    select_data = select_records(f"Select all records from {update_data.container_name}")
    
    if rs.has_next():
        data = rs.next()
        column_index = select_data.columns.get_loc(update_data.column_name)
    
        data[column_index] = int(update_data.column_value)
        rs.update(data)
    
    result_container.commit()
    print("record updated successfully.")


update_records("Update the age of the students in the student_data container to 11 where Age is greater than or equal to 10")

record updated successfully.


In [18]:
select_records(f"Select all records from student_data container")

Unnamed: 0,Name,Age,Gender,Grade
0,Michael,11,Male,A
1,Sara,11,Female,C
2,Nick,9,Male,B


## Delete Data

In [30]:
class DeleteData(BaseModel):
    select_query:str = Field(description="natural language converted to SELECT query")
    container_name: str = Field(description="the container name from the user query")


system_command = """ 
Given a user natural language query, return an SQL select statement which selects the records that user wants to delete
"""

user_prompt = ChatPromptTemplate.from_messages([
    ("system", system_command),
    ("user", "{input}")
])

delete_chain = user_prompt | llm.with_structured_output(DeleteData)

result_chain = delete_chain.invoke("Delete all records from student_data container whose Age is greater than 10")
print(result_chain)


select_query='SELECT * FROM student_data WHERE Age > 10' container_name='student_data'


In [31]:
def delete_records(query):

    update_data = update_chain.invoke(query)

    result_container = gridstore.get_container(update_data.container_name)

    result_container.set_auto_commit(False)

    query = result_container.query(update_data.select_query)
    
    rs = query.fetch(True)
    
    while rs.has_next():
        data = rs.next()
        rs.remove()
    
    
    result_container.commit()
    print("Records deleted successfully")

delete_records("Delete all records from student_data container whose Age is greater than 10")

Records deleted successfully


In [32]:
select_records(f"Select all records from student_data container")

Unnamed: 0,Name,Age,Gender,Grade
0,Nick,9,Male,B
