In [1]:
#%pip install -U langchain_community tiktoken langchain-openai langchainhub chromadb langchain langgraph faiss-cpu
key = "yours"
import os
import json
os.environ["OPENAI_API_KEY"] = key

In [11]:
from bs4 import BeautifulSoup as Soup
from langchain_community.document_loaders.recursive_url_loader import RecursiveUrlLoader

# LCEL docs
url = "https://python.langchain.com/docs/expression_language/"
loader = RecursiveUrlLoader(
    url=url, max_depth=20, extractor=lambda x: Soup(x, "html.parser").text
)
docs = loader.load()

# LCEL w/ PydanticOutputParser (outside the primary LCEL docs)
url = "https://python.langchain.com/docs/modules/model_io/output_parsers/quick_start"
loader = RecursiveUrlLoader(
    url=url, max_depth=1, extractor=lambda x: Soup(x, "html.parser").text
)
docs_pydantic = loader.load()

# LCEL w/ Self Query (outside the primary LCEL docs)
url = "https://python.langchain.com/docs/modules/data_connection/retrievers/self_query/"
loader = RecursiveUrlLoader(
    url=url, max_depth=1, extractor=lambda x: Soup(x, "html.parser").text
)
docs_sq = loader.load()

# Add
docs.extend([*docs_pydantic, *docs_sq])

# Sort the list based on the URLs in 'metadata' -> 'source'
d_sorted = sorted(docs, key=lambda x: x.metadata["source"])
d_reversed = list(reversed(d_sorted))

# Concatenate the 'page_content' of each sorted dictionary
concatenated_content = "\n\n\n --- \n\n\n".join(
    [doc.page_content for doc in d_reversed]
)

In [12]:
from typing import Dict, TypedDict


class GraphState(TypedDict):
    """
    Represents the state of our graph.

    Attributes:
        keys: A dictionary where each key is a string.
    """

    keys: Dict[str, any]

In [13]:
from operator import itemgetter

from langchain.output_parsers.openai_tools import PydanticToolsParser
from langchain.prompts import PromptTemplate
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain_core.runnables import RunnablePassthrough
from langchain_core.utils.function_calling import convert_to_openai_tool
from langchain_openai import ChatOpenAI


def generate(state: GraphState):
    """
    Generate a code solution based on LCEL docs and the input question
    with optional feedback from code execution tests

    Args:
        state (dict): The current graph state

    Returns:
        state (dict): New key added to state, documents, that contains retrieved documents
    """

    ## State
    state_dict = state["keys"]
    question = state_dict["question"]
    iter = state_dict["iterations"]

    ## Data model
    class code(BaseModel):
        """Code output"""

        prefix: str = Field(description="Description of the problem and approach")
        imports: str = Field(description="Code block import statements")
        code: str = Field(description="Code block not including import statements")

    ## LLM
    model = ChatOpenAI(temperature=0, model="gpt-4-0125-preview", streaming=True)

    # Tool
    code_tool_oai = convert_to_openai_tool(code)

    # LLM with tool and enforce invocation
    llm_with_tool = model.bind(
        tools=[code_tool_oai],
        tool_choice={"type": "function", "function": {"name": "code"}},
    )

    # Parser
    parser_tool = PydanticToolsParser(tools=[code])

    ## Prompt
    template = """You are a coding assistant with expertise in LCEL, LangChain expression language. \n 
        Here is a full set of LCEL documentation: 
        \n ------- \n
        {context} 
        \n ------- \n
        Answer the user question based on the above provided documentation. \n
        Ensure any code you provide can be executed with all required imports and variables defined. \n
        Structure your answer with a description of the code solution. \n
        Then list the imports. And finally list the functioning code block. \n
        Here is the user question: \n --- --- --- \n {question}"""

    ## Generation
    if "error" in state_dict:
        print("---RE-GENERATE SOLUTION w/ ERROR FEEDBACK---")

        error = state_dict["error"]
        code_solution = state_dict["generation"]

        # Udpate prompt
        addendum = """  \n --- --- --- \n You previously tried to solve this problem. \n Here is your solution:  
                    \n --- --- --- \n {generation}  \n --- --- --- \n  Here is the resulting error from code 
                    execution:  \n --- --- --- \n {error}  \n --- --- --- \n Please re-try to answer this. 
                    Structure your answer with a description of the code solution. \n Then list the imports. 
                    And finally list the functioning code block. Structure your answer with a description of 
                    the code solution. \n Then list the imports. And finally list the functioning code block. 
                    \n Here is the user question: \n --- --- --- \n {question}"""
        template = template + addendum

        # Prompt
        prompt = PromptTemplate(
            template=template,
            input_variables=["context", "question", "generation", "error"],
        )

        # Chain
        chain = (
            {
                "context": lambda _: concatenated_content,
                "question": itemgetter("question"),
                "generation": itemgetter("generation"),
                "error": itemgetter("error"),
            }
            | prompt
            | llm_with_tool
            | parser_tool
        )

        code_solution = chain.invoke(
            {"question": question, "generation": str(code_solution[0]), "error": error}
        )

    else:
        print("---GENERATE SOLUTION---")

        # Prompt
        prompt = PromptTemplate(
            template=template,
            input_variables=["context", "question"],
        )

        # Chain
        chain = (
            {
                "context": lambda _: concatenated_content,
                "question": itemgetter("question"),
            }
            | prompt
            | llm_with_tool
            | parser_tool
        )

        code_solution = chain.invoke({"question": question})

    iter = iter + 1
    return {
        "keys": {"generation": code_solution, "question": question, "iterations": iter}
    }


def check_code_imports(state: GraphState):
    """
    Check imports

    Args:
        state (dict): The current graph state

    Returns:
        state (dict): New key added to state, error
    """

    ## State
    print("---CHECKING CODE IMPORTS---")
    state_dict = state["keys"]
    question = state_dict["question"]
    code_solution = state_dict["generation"]
    imports = code_solution[0].imports
    iter = state_dict["iterations"]

    try:
        # Attempt to execute the imports
        exec(imports)
    except Exception as e:
        print("---CODE IMPORT CHECK: FAILED---")
        # Catch any error during execution (e.g., ImportError, SyntaxError)
        error = f"Execution error: {e}"
        if "error" in state_dict:
            error_prev_runs = state_dict["error"]
            error = error_prev_runs + "\n --- Most recent run error --- \n" + error
    else:
        print("---CODE IMPORT CHECK: SUCCESS---")
        # No errors occurred
        error = "None"

    return {
        "keys": {
            "generation": code_solution,
            "question": question,
            "error": error,
            "iterations": iter,
        }
    }


def check_code_execution(state: GraphState):
    """
    Check code block execution

    Args:
        state (dict): The current graph state

    Returns:
        state (dict): New key added to state, error
    """

    ## State
    print("---CHECKING CODE EXECUTION---")
    state_dict = state["keys"]
    question = state_dict["question"]
    code_solution = state_dict["generation"]
    prefix = code_solution[0].prefix
    imports = code_solution[0].imports
    code = code_solution[0].code
    code_block = imports + "\n" + code
    iter = state_dict["iterations"]

    try:
        # Attempt to execute the code block
        exec(code_block)
    except Exception as e:
        print("---CODE BLOCK CHECK: FAILED---")
        # Catch any error during execution (e.g., ImportError, SyntaxError)
        error = f"Execution error: {e}"
        if "error" in state_dict:
            error_prev_runs = state_dict["error"]
            error = error_prev_runs + "\n --- Most recent run error --- \n" + error
    else:
        print("---CODE BLOCK CHECK: SUCCESS---")
        # No errors occurred
        error = "None"

    return {
        "keys": {
            "generation": code_solution,
            "question": question,
            "error": error,
            "prefix": prefix,
            "imports": imports,
            "iterations": iter,
            "code": code,
        }
    }


### Edges


def decide_to_check_code_exec(state: GraphState):
    """
    Determines whether to test code execution, or re-try answer generation.

    Args:
       state (dict): The current graph state

    Returns:
        str: Next node to call
    """

    print("---DECIDE TO TEST CODE EXECUTION---")
    state_dict = state["keys"]
    error = state_dict["error"]

    if error == "None":
        # All documents have been filtered check_relevance
        # We will re-generate a new query
        print("---DECISION: TEST CODE EXECUTION---")
        return "check_code_execution"
    else:
        # We have relevant documents, so generate answer
        print("---DECISION: RE-TRY SOLUTION---")
        return "generate"


def decide_to_finish(state: GraphState):
    """
    Determines whether to finish (re-try code 3 times.

    Args:
        state (dict): The current graph state

    Returns:
        str: Next node to call
    """

    print("---DECIDE TO TEST CODE EXECUTION---")
    state_dict = state["keys"]
    error = state_dict["error"]
    iter = state_dict["iterations"]

    if error == "None" or iter == 3:
        # All documents have been filtered check_relevance
        # We will re-generate a new query
        print("---DECISION: TEST CODE EXECUTION---")
        return "end"
    else:
        # We have relevant documents, so generate answer
        print("---DECISION: RE-TRY SOLUTION---")
        return "generate"

In [14]:
from langgraph.graph import END, StateGraph

workflow = StateGraph(GraphState)

# Define the nodes
workflow.add_node("generate", generate)  # generation solution
workflow.add_node("check_code_imports", check_code_imports)  # check imports
workflow.add_node("check_code_execution", check_code_execution)  # check execution

# Build graph
workflow.set_entry_point("generate")
workflow.add_edge("generate", "check_code_imports")
workflow.add_conditional_edges(
    "check_code_imports",
    decide_to_check_code_exec,
    {
        "check_code_execution": "check_code_execution",
        "generate": "generate",
    },
)
workflow.add_conditional_edges(
    "check_code_execution",
    decide_to_finish,
    {
        "end": END,
        "generate": "generate",
    },
)

# Compile
app = workflow.compile()

In [15]:
from langchain_core.runnables import RunnableLambda


## Data model
class code(BaseModel):
    """Code output"""

    prefix: str = Field(description="Description of the problem and approach")
    imports: str = Field(description="Code block import statements")
    code: str = Field(description="Code block not including import statements")


## LLM
model = ChatOpenAI(temperature=0, model="gpt-4-0125-preview", streaming=True)

# Tool
code_tool_oai = convert_to_openai_tool(code)

# LLM with tool and enforce invocation
llm_with_tool = model.bind(
    tools=[convert_to_openai_tool(code_tool_oai)],
    tool_choice={"type": "function", "function": {"name": "code"}},
)

# Parser
parser_tool = PydanticToolsParser(tools=[code])

# Create a prompt template with format instructions and the query
prompt = PromptTemplate(
    template="""You are a coding assistant with expertise in LCEL, LangChain expression language. \n 
        Here is a full set of LCEL documentation: 
        \n ------- \n
        {context} 
        \n ------- \n
        Answer the user question based on the above provided documentation. \n
        Ensure any code you provide can be executed with all required imports and variables defined. \n
        Structure your answer with a description of the code solution. \n
        Then list the imports. And finally list the functioning code block. \n
        Here is the user question: \n --- --- --- \n {question}""",
    input_variables=["question", "context"],
)


def parse_answer_to_dict(x):
    return x[0].dict()


chain_base_case = (
    {
        "context": lambda _: concatenated_content,
        "question": RunnablePassthrough(),
    }
    | prompt
    | llm_with_tool
    | parser_tool
    | RunnableLambda(parse_answer_to_dict)
)

In [22]:
answer = chain_base_case.invoke("I'm working with multi-agent teams to complete tasks with langchain. In that paradigm the agents are given tools they can interact with on the host system that allow them to complete the tasks by executing the tools on the host system. In dynamic problem solving it is difficult to know what tools they should have a-priori. I want to develop a set of tools and new paradigm that allows them to create tools as needed and update a tool bank that they can use. This probably requires some technical work on the backend for updating a tool bank, retrieving tool descriptions for them on request to include the new tools created, and interacting with the agents requests to use the tools whenever they request them and send their inputs. I'm also thinking that I should use a RAG system to allow them to search for existing tools in the database by semantic similarity based on the tool description field and their description of the type of tool they want. If the tool they want doesn't exist then they send a request to a tool creator agent team that builds the tool according to the requirements requested, and they build test cases and save the tool out into the tool bank whenever it's completed and let the requester know they can access the tool. The complicated part will be how can I allow the other agents have access to these tools as they get dynamically created. I'm thinking they might have to have a tool retriever tool that loads up the new tool on request and runs it. ")

In [17]:
import pprint
pprint.pprint(answer)

{'code': '# Initialize the tool bank\n'
         'def initialize_system():\n'
         '    create_tool_bank()\n'
         '    for tool in example_tools:\n'
         "        add_tool(tool['name'], tool['description'], "
         "tool['command'])\n"
         '\n'
         '# Example usage\n'
         'initialize_system()\n'
         '\n'
         '# Search for a tool based on a description\n'
         "searched_tool = search_tools('generates reports from data')\n"
         "print(f'Searched tool: {searched_tool}')\n"
         '\n'
         '# Execute the searched tool\n'
         "execute_tool(searched_tool['command'])\n",
 'imports': 'from typing import List, Dict\n'
            'import sqlite3\n'
            'import subprocess\n'
            'import json\n'
            'from sklearn.feature_extraction.text import TfidfVectorizer\n'
            'from sklearn.metrics.pairwise import cosine_similarity\n'
            '\n'
            '# Assuming a simple SQLite database for the tool ba

In [39]:
from langchain.pydantic_v1 import BaseModel, Field
from langchain.tools import BaseTool, StructuredTool, tool

@tool
def multiply(a: int, b: int) -> int:
    """Multiply two numbers."""
    return a * b

print(multiply.name)
print(multiply.description)
print(multiply.args)

multiply
multiply(a: int, b: int) -> int - Multiply two numbers.
{'a': {'title': 'A', 'type': 'integer'}, 'b': {'title': 'B', 'type': 'integer'}}


In [64]:
from typing import List, Dict
import sqlite3
import subprocess
import json
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Assuming a simple SQLite database for the tool bank
db_connection = sqlite3.connect('tool_bank.db')

def create_tool_bank():
    # Function to initialize the tool bank database
    cursor = db_connection.cursor()
    cursor.execute('''CREATE TABLE IF NOT EXISTS tools (
        id INTEGER PRIMARY KEY,
        name TEXT NOT NULL,
        description TEXT NOT NULL,
        args TEXT NOT NULL
    )''')
    db_connection.commit()

# Example tool descriptions
example_tools = [
    {'name': multiply.name, 'description': multiply.description, 
     'args': multiply.args, 'function': multiply}
]

# Function to add tools to the tool bank
# def add_tool(name: str, description: str, args: str):
#     cursor = db_connection.cursor()
#     cursor.execute('INSERT INTO tools (name, description, args) VALUES (?, ?, ?)', (name, description, str(args)))
#     db_connection.commit()
# Function to add tools to the tool bank
def add_tool(name: str, description: str, args: str, function):
    cursor = db_connection.cursor()
    cursor.execute('INSERT INTO tools (name, description, args) VALUES (?, ?, ?)', (name, description, str(args)))
    db_connection.commit()
    # Dynamically declare the tool function in the runtime global namespace
    globals()[name] = function
    
# Function to search for tools based on description similarity
def search_tools(description: str) -> List[Dict]:
    cursor = db_connection.cursor()
    cursor.execute('SELECT name, description, args FROM tools')
    tools = cursor.fetchall()
    tool_descriptions = [tool[1] for tool in tools]
    vectorizer = TfidfVectorizer().fit(tool_descriptions)
    query_vec = vectorizer.transform([description])
    similarity = cosine_similarity(query_vec, vectorizer.transform(tool_descriptions))
    most_similar_index = similarity.argmax()
    return {'name': tools[most_similar_index][0], 'args': tools[most_similar_index][2]}

# Function to execute a tool command
# def execute_tool(command: str):
#     process = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE)
#     output, error = process.communicate()
#     if error:
#         print(f'Error executing tool: {error}')
#     else:
#         print(f'Tool output: {output.decode()}')
# Function to execute a tool command
# def execute_tool(tool_name: str, args: str):
#     cursor = db_connection.cursor()
#     cursor.execute('SELECT args FROM tools WHERE name=?', (tool_name,))
#     tool_args = cursor.fetchone()
#     if tool_args:
#         # Construct the command with tool name and arguments
#         command = f"{tool_name} {args}"
#         process = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE)
#         output, error = process.communicate()
#         if error:
#             print(f'Error executing tool: {error}')
#         else:
#             print(f'Tool output: {output.decode()}')
#     else:
#         print("Tool not found in the tool bank.")
# Function to execute a tool command
# def execute_tool(tool_name: str, tool_args: str):
#     cursor = db_connection.cursor()
#     cursor.execute('SELECT args FROM tools WHERE name=?', (tool_name,))
#     stored_tool_args = cursor.fetchone()
#     if stored_tool_args:
#         # Construct the command with tool name and arguments
#         command = f"{tool_name} {tool_args}"
#         # Execute the tool command
#         process = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE)
#         output, error = process.communicate()
#         if error:
#             print(f'Error executing tool: {error}')
#         else:
#             print(f'Tool output: {output.decode()}')
#     else:
#         print("Tool not found in the tool bank.")
# Function to execute a tool command
def execute_tool(tool_name: str, tool_args: str):
    try:
        # Dynamically retrieve the function by its name
        tool_function = globals()[tool_name]
        # Execute the tool function with provided arguments
        result = tool_function(tool_args)
        print(f'Tool output: {result}')
    except KeyError:
        print("Tool not found in the tool bank.")
    except Exception as e:
        print(f'Error executing tool: {e}')


In [65]:
# Initialize the tool bank
def initialize_system():
    create_tool_bank()
    for tool in example_tools:
        add_tool(tool['name'], tool['description'], tool['args'], tool['function'])

# Example usage\n'
initialize_system()

# Search for a tool based on a description\n'
searched_tool = search_tools('multiply two numbers')
print(f'Searched tool: {searched_tool}')

# Execute the searched tool
execute_tool(searched_tool['name'], {'a': 3, 'b': 4})

Searched tool: {'name': 'multiply', 'args': "{'a': {'title': 'A', 'type': 'integer'}, 'b': {'title': 'B', 'type': 'integer'}}"}
Tool output: 12


In [69]:
searched_tool['name']

'multiply'

In [2]:
from typing import List, Dict
import sqlite3
import subprocess
import json
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Assuming a simple SQLite database for the tool bank
db_connection = sqlite3.connect('tool_bank.db')

def create_tool_bank():
    # Function to initialize the tool bank database
    cursor = db_connection.cursor()
    cursor.execute('''CREATE TABLE IF NOT EXISTS tools (
        id INTEGER PRIMARY KEY,
        name TEXT NOT NULL,
        description TEXT NOT NULL,
        args TEXT NOT NULL
    )''')
    db_connection.commit()

# Function to add tools to the tool bank
def add_tool(name: str, description: str, args: str, function):
    cursor = db_connection.cursor()
    cursor.execute('INSERT INTO tools (name, description, args) VALUES (?, ?, ?)', (name, description, str(args)))
    db_connection.commit()
    # Dynamically declare the tool function in the runtime global namespace
    globals()[name] = function
    
# Function to search for tools based on description similarity
def search_tools(description: str) -> List[Dict]:
    cursor = db_connection.cursor()
    cursor.execute('SELECT name, description, args FROM tools')
    tools = cursor.fetchall()
    tool_descriptions = [tool[1] for tool in tools]
    vectorizer = TfidfVectorizer().fit(tool_descriptions)
    query_vec = vectorizer.transform([description])
    similarity = cosine_similarity(query_vec, vectorizer.transform(tool_descriptions))
    most_similar_index = similarity.argmax()
    return {'name': tools[most_similar_index][0], 'args': tools[most_similar_index][2]}

# Function to execute a tool command
def execute_tool(tool_name: str, tool_args: str):
    try:
        # Dynamically retrieve the function by its name
        tool_function = globals()[tool_name]
        # Execute the tool function with provided arguments
        result = tool_function(tool_args)
        print(f'Tool output: {result}')
    except KeyError:
        print("Tool not found in the tool bank.")
    except Exception as e:
        print(f'Error executing tool: {e}')

from langchain.tools import tool

@tool
def multiply(a: int, b: int) -> int:
    """Multiply two numbers."""
    return a * b

@tool
def concat(a: str, b: str) -> str:
    """Concatenate two strings."""
    return a + b

print("Tool name: ", multiply.name)
print("Tool description: ", multiply.description)
print("Tool argument schema: ", multiply.args)
'''
multiply
multiply(a: int, b: int) -> int - Multiply two numbers.
{'a': {'title': 'A', 'type': 'integer'}, 'b': {'title': 'B', 'type': 'integer'}}
'''

# Example tool descriptions
example_tools = [
    {'name': multiply.name, 'description': multiply.description, 
     'args': multiply.args, 'function': multiply},
    {'name': concat.name, 'description': concat.description, 
     'args': concat.args, 'function': concat}
    
]


# Initialize the tool bank
def initialize_system():
    create_tool_bank()
    for tool in example_tools:
        add_tool(tool['name'], tool['description'], tool['args'], tool['function'])

# Example usage\n'
initialize_system()

tool_search_query1 = 'I need a tool that allows me to take the product of two numbers'

# Search for a tool based on a description
searched_tool = search_tools(tool_search_query1)
print(f'Searched tool: {searched_tool}')

# Execute the searched tool
execute_tool(searched_tool['name'], {'a': 3, 'b': 4})

tool_search_query2 = 'I need a tool that allows me to concatenate two strings'

# Search for a tool based on a description
searched_tool = search_tools(tool_search_query2)
print(f'Searched tool: {searched_tool}')

# Execute the searched tool
execute_tool(searched_tool['name'], {'a': 'hello', 'b': 'world'})

Tool name:  multiply
Tool description:  multiply(a: int, b: int) -> int - Multiply two numbers.
Tool argument schema:  {'a': {'title': 'A', 'type': 'integer'}, 'b': {'title': 'B', 'type': 'integer'}}
Searched tool: {'name': 'multiply', 'args': "{'a': {'title': 'A', 'type': 'integer'}, 'b': {'title': 'B', 'type': 'integer'}}"}
Tool output: 12
Searched tool: {'name': 'concat', 'args': "{'a': {'title': 'A', 'type': 'string'}, 'b': {'title': 'B', 'type': 'string'}}"}
Tool output: helloworld


In [3]:
class Tool:
    def __init__(self, name: str, description: str, args: dict, function):
        self.name = name
        self.description = description
        self.args = args
        self.function = function

    def validate_args(self, **kwargs):
        # Implement validation logic based on self.args
        pass

    def execute(self, **kwargs):
        self.validate_args(**kwargs)
        return self.function(**kwargs)

class ToolBank:
    def __init__(self, db_path='tool_bank.db'):
        self.db_path = db_path
        self.db_connection = sqlite3.connect(db_path)
        self.initialize_db()
        self.tools = {}  # Cache tools in memory for quicker access

    def initialize_db(self):
        cursor = self.db_connection.cursor()
        cursor.execute('''CREATE TABLE IF NOT EXISTS tools (
            id INTEGER PRIMARY KEY,
            name TEXT NOT NULL,
            description TEXT NOT NULL,
            args TEXT NOT NULL
        )''')
        self.db_connection.commit()

    def add_tool(self, tool: Tool):
        cursor = self.db_connection.cursor()
        cursor.execute('INSERT INTO tools (name, description, args) VALUES (?, ?, ?)', 
                       (tool.name, tool.description, json.dumps(tool.args)))
        self.db_connection.commit()
        self.tools[tool.name] = tool  # Cache the tool

    def search_tools(self, description: str) -> Tool:
        # This method now returns a Tool instance for better encapsulation
        cursor = self.db_connection.cursor()
        cursor.execute('SELECT name, description, args FROM tools')
        tools = cursor.fetchall()
        tool_descriptions = [tool[1] for tool in tools]
        vectorizer = TfidfVectorizer().fit(tool_descriptions)
        query_vec = vectorizer.transform([description])
        similarity = cosine_similarity(query_vec, vectorizer.transform(tool_descriptions))
        most_similar_index = similarity.argmax()
        tool_data = tools[most_similar_index]
        tool_args = json.loads(tool_data[2])
        # Assuming tool functions are globally accessible for simplicity; could use a more secure/flexible approach
        return self.tools[tool_data[0]]

    def execute_tool(self, tool_name: str, **kwargs):
        tool = self.tools.get(tool_name)
        if tool:
            return tool.execute(**kwargs)
        else:
            raise KeyError("Tool not found.")
            
    async def execute_tool_async(self, tool_name: str, **kwargs):
        tool = self.tools.get(tool_name)
        if tool:
            # Assume tool.execute is an async function
            return await tool.execute(**kwargs)
        else:
            raise KeyError("Tool not found.")

In [4]:
import json




class ToolBank:
    def __init__(self, json_path='tool_bank.json'):
        self.json_path = json_path
        self.tools = {}  # Cache tools in memory for quicker access
        self.load_tools()

    def load_tools(self):
        with open(self.json_path, 'r') as file:
            data = json.load(file)
            for tool_data in data["tools"]:
                exec(tool_data["code"], globals())
                self.tools[tool_data["name"]] = {
                    "function": globals()[tool_data["name"]],
                    "args": tool_data["args"],
                    "description": tool_data["description"]
                }

    def add_tool(self, name: str, description: str, args: dict, code: str):
        tool_data = {
            "name": name,
            "description": description,
            "args": args,
            "code": code
        }
        # Update the tools dictionary
        exec(code, globals())
        self.tools[name] = {
            "function": globals()[name],
            "args": args,
            "description": description
        }
        # Save to JSON
        self.save_to_json(tool_data)

    def save_to_json(self, tool_data):
        with open(self.json_path, 'r+') as file:
            data = json.load(file)
            data["tools"].append(tool_data)
            file.seek(0)
            json.dump(data, file, indent=4)

    def execute_tool(self, tool_name: str, **kwargs):
        tool = self.tools.get(tool_name)
        if tool:
            return tool["function"](**kwargs)
        else:
            raise KeyError("Tool not found.")


In [5]:
tool_bank = ToolBank()

In [6]:
tool_bank.tools

{'multiply': {'function': <function __main__.multiply(a, b)>,
  'args': {'a': {'title': 'A', 'type': 'integer'},
   'b': {'title': 'B', 'type': 'integer'}},
  'description': 'Multiply two numbers.'},
 'concat': {'function': <function __main__.concat(a, b)>,
  'args': {'a': {'title': 'A', 'type': 'string'},
   'b': {'title': 'B', 'type': 'string'}},
  'description': 'Concatenate two strings.'}}

In [None]:
# !pip install transformers
# !pip install torch
# !pip install PyYAML


# from langchain.tools import tool

# @tool
# def multiply(a: int, b: int) -> int:
#     """Multiply two numbers."""
#     return a * b

# @tool
# def concat(a: str, b: str) -> str:
#     """Concatenate two strings."""
#     return a + b

# print("Tool name: ", multiply.name)
# print("Tool description: ", multiply.description)
# print("Tool argument schema: ", multiply.args)

# multiply
# multiply(a: int, b: int) -> int - Multiply two numbers.
# {'a': {'title': 'A', 'type': 'integer'}, 'b': {'title': 'B', 'type': 'integer'}}


# # Example tool descriptions
# example_tools = [
#     {'name': multiply.name, 'description': multiply.description, 
#      'args': multiply.args, 'function': multiply},
#     {'name': concat.name, 'description': concat.description, 
#      'args': concat.args, 'function': concat}
    
# ]




''' tool_bank.yaml
tools:
  - name: multiply
    description: Multiply two numbers.
    args:
      a:
        title: A
        type: integer
      b:
        title: B
        type: integer
    code: |
      def multiply(a, b): return a * b
  
  - name: concat
    description: Concatenate two strings.
    args:
      a:
        title: A
        type: string
      b:
        title: B
        type: string
    code: |
      def concat(a, b): return a + b
'''

from transformers import AutoTokenizer, AutoModel
import torch
import numpy as np

model_name = 'sentence-transformers/all-MiniLM-L6-v2'
model = AutoModel.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)


import yaml
import torch
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

from transformers import AutoTokenizer, AutoModel


class ToolBank:
    def __init__(self, yaml_path='tool_bank.yaml', model_name='sentence-transformers/all-MiniLM-L6-v2'):
        self.yaml_path = yaml_path
        self.model = AutoModel.from_pretrained(model_name)
        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
        self.tools = {}  
        self.load_tools()
        self.embeddings = self.generate_embeddings()

    def load_tools(self):
        with open(self.yaml_path, 'r') as file:
            data = yaml.safe_load(file)
            for tool_data in data["tools"]:
                # Assuming security is managed and the code comes from a trusted source
                exec(tool_data["code"], globals())
                self.tools[tool_data["name"]] = {
                    "function": globals()[tool_data["name"]],
                    "args": tool_data["args"],
                    "description": tool_data["description"]
                }

    def add_tool(self, name: str, description: str, args: dict, code: str):
        tool_data = {
            "name": name,
            "description": description,
            "args": args,
            "code": code
        }
        # Update the tools dictionary
        exec(code, globals())
        self.tools[name] = {
            "function": globals()[name],
            "args": args,
            "description": description
        }
        # Save to YAML
        self.save_to_yaml(tool_data)

    def save_to_yaml(self, tool_data):
        with open(self.yaml_path, 'r') as file:
            data = yaml.safe_load(file) or {"tools": []}
        data["tools"].append(tool_data)
        with open(self.yaml_path, 'w') as file:
            yaml.safe_dump(data, file)

    def execute_tool(self, tool_name: str, **kwargs):
        tool = self.tools.get(tool_name)
        if tool:
            return tool["function"](**kwargs)
        else:
            raise KeyError("Tool not found.")


    def generate_embeddings(self):
        embeddings = []
        for description in [tool["description"] for tool in self.tools.values()]:
            inputs = self.tokenizer(description, return_tensors='pt', padding=True, truncation=True, max_length=512)
            with torch.no_grad():
                outputs = self.model(**inputs)
            embeddings.append(outputs.last_hidden_state.mean(dim=1).squeeze().cpu().numpy())
        return np.array(embeddings)

    def search_tools(self, query):
        inputs = self.tokenizer(query, return_tensors='pt', padding=True, truncation=True, max_length=512)
        with torch.no_grad():
            outputs = self.model(**inputs)
        query_embedding = outputs.last_hidden_state.mean(dim=1).squeeze().cpu().numpy()
        
        similarity_scores = cosine_similarity([query_embedding], self.embeddings)[0]
        most_similar_indices = similarity_scores.argsort()[::-1]
        # Return the names of the top matching tools
        return [list(self.tools.keys())[i] for i in most_similar_indices[0:1]]  

tb = ToolBank()

tb.search_tools('I need a tool that concat two strings')