# Yield's Question Answering (QA) bot

### High-level approach
#### Pre-processing
- Load files from the repo, chunk them based on langugage-specific separators to maintain enough context
- Categorize each chunk as 'general' or 'technical' based on content type to allow semantic search to filter on query's underlying intent (general usage vs coding/technical)
- Add chunks to vector DB


#### Query time
- Call LLM to categorize the query as 'general' or 'technical' based on intent
- Run semantic search with category filter on vector db to get relevant chunks for LLM context
- Call LLM to answer user query with context


### Improvements
#### Parameters to explore to improve quality
- chunk size
- top-k count
- LLMs with larger context window (GPT-4 vs GPT-3.5 16k)
- Alternative Code-optimized LLMs such as BigCode https://huggingface.co/bigcode

#### Code suggestions
Code suggesstions could be improved by indexing [COOKBOOK](https://github.com/yieldprotocol/addendum-docs/blob/main/COOKBOOK.md) as the V2 documentation does not have extensive code examples

In [None]:
# Install dependencies
!pip install langchain openai chromadb GitPython ipython tiktoken

In [9]:
import os
from langchain.document_loaders import GitLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter, Language
from langchain.text_splitter import MarkdownHeaderTextSplitter
from langchain.vectorstores import Chroma
from langchain.llms import OpenAI
from langchain.embeddings import OpenAIEmbeddings
from langchain.chains import RetrievalQAWithSourcesChain, RetrievalQA
from IPython.display import display
from IPython.display import Markdown
from getpass import getpass
from pathlib import Path
from langchain.callbacks import StdOutCallbackHandler
from langchain.prompts import PromptTemplate
from langchain.callbacks import get_openai_callback
from langchain import LLMChain
from langchain.chat_models import ChatOpenAI
import json

stdout_handler = StdOutCallbackHandler() 

In [10]:
OPENAI_API_KEY = getpass()

········


In [11]:
os.environ['OPENAI_API_KEY'] = OPENAI_API_KEY

## Common Functions

### Load contents from repo

In [12]:
def load_repo(remote_repo_url, local_repo_path, branch, file_filter=None):
    local_repo_exists = Path(local_repo_path).is_dir()

    if local_repo_exists:
        loader = GitLoader(
            repo_path=local_repo_path,
            branch=branch,
            file_filter=file_filter
        ) 
    else:
        loader = GitLoader(
            clone_url=remote_repo_url,
            repo_path=local_repo_path,
            branch=branch,
            file_filter=file_filter
        )
    return loader.load()

### Split document into chunks based on  Programming Language separators/syntax
* Split on programming language separators/syntax
* Categorize each chunk as 'general' or 'technical' based on content type

In [13]:
def split_docs(docs, language, chunk_size, chunk_overlap):
    text_splitter = RecursiveCharacterTextSplitter.from_language(language=language, chunk_size=chunk_size, chunk_overlap=chunk_overlap)

    all_splits=[]
    all_metadatas=[]
    for d in docs:
        doc_file=d.page_content   
        metadata = d.metadata
        splits = text_splitter.split_text(doc_file)
        
        if 'cookbook' in metadata['file_path'].lower():
            metadata['category'] = "technical"
        else:
            metadata['category'] = "general"
        
        metadatas = [metadata for _ in splits]
        all_splits += splits
        all_metadatas += metadatas
        
    return {
        'all_splits': all_splits,
        'all_metadatas': all_metadatas
    }
    

## QA Workflow

In [14]:
# documentation repo
remote_repo_docsV2_url="https://github.com/yieldprotocol/docs-v2"
local_repo_docsV2_path="/tmp/yield_docs_v2_repo"

# cookbook repo
remote_repo_addendum_url="https://github.com/yieldprotocol/addendum-docs"
local_repo_addendum_path="/tmp/yield_addendum-docs"


branch="main"
file_filter=lambda file_path: file_path.endswith(".md")

In [15]:
chunk_size_chars = 2000
chunk_overlap_chars = 0

documentation_docs = load_repo(remote_repo_docsV2_url, local_repo_docsV2_path, branch, file_filter)
addendum_docs = load_repo(remote_repo_addendum_url, local_repo_addendum_path, branch, file_filter)

In [16]:
documentation_splits = split_docs(documentation_docs, Language.MARKDOWN, chunk_size_chars, chunk_overlap_chars)
addendum_splits = split_docs(addendum_docs, Language.MARKDOWN, chunk_size_chars, chunk_overlap_chars)

In [17]:
vector_db = Chroma(embedding_function=OpenAIEmbeddings())
documentation_splits_ids = vector_db.add_texts(documentation_splits['all_splits'], documentation_splits['all_metadatas'])
addendum_splits_ids = vector_db.add_texts(addendum_splits['all_splits'], addendum_splits['all_metadatas'])

In [18]:
categorization_prompt_template = """
You are a Web3 expert who is able to answer any user query on Yield protocol's documentation, code, whitepapers and many other such topics.

# INSTRUCTIONS
- Classify the user's query into one of these categories - 'general' or 'technical' or 'na'
- Use the examples below as reference, do not make up any categories.
- Always return the category in plain text format.
- If you are unable to process the query, just return 'na'

QUERY: How do I borrow
ANSWER: general

QUERY: How do I borrow using code
ANSWER: technical

QUERY: How do I integrate
ANSWER: technical

QUERY: How is lending rate calculated
ANSWER: general

QUERY: {query}
ANSWER:"""

QUERY_CATEGORIZATION_PROMPT = PromptTemplate(
    template=categorization_prompt_template, input_variables=["query"]
)

In [19]:
final_answer_prompt_template = """
You are a Web3 expert who is able to answer any user query on Yield protocol's documentation, code, whitepapers and many other such topics.

# INSTRUCTIONS
- The user's query will be wrapped in triple back ticks
- Only answer the query using the provided context below which may include general information and code, do not make up any information.
- If the query is related to code or integration, provide a step by step explanation on the process, use code suggestions whereever relevant and always use markdown format annotated with the language to show the code.
- For code suggestions, use comments to explain each important concept, class, variable, function and parameter.
- Yield Protocol has no JS SDK so always use ethers package for JS code suggestions.

# CONTEXT
{context}

# QUERY
```
{question}
```
"""
FINAL_ANSWER_PROMPT = PromptTemplate(
    template=final_answer_prompt_template, input_variables=["context", "question"]
)

In [29]:
def ask(query, show_sources=False):
    display(Markdown(f"### Query\n{query}"))
    
    # 1. Call LLM to categorize query based on intent
    categorization_llm_chain = LLMChain(
        llm=ChatOpenAI(temperature=0, model="gpt-3.5-turbo"),
        prompt=QUERY_CATEGORIZATION_PROMPT
    )

    with get_openai_callback() as cb:
        result = categorization_llm_chain.run(query=query)
        category = result.replace("'", "")
        display(Markdown(f"### Query Category\n**{category}**"))
        print(cb)

    # 2. Call LLM to answer user's query
    chain_type_kwargs = {"prompt": FINAL_ANSWER_PROMPT}
    qa_chain = RetrievalQA.from_chain_type(
        llm=ChatOpenAI(temperature=0, model="gpt-4"), 
        chain_type="stuff", 
        retriever=vector_db.as_retriever(search_kwargs = {
            'k': 5,
            'filter': {'category': category}

        }), 
        chain_type_kwargs=chain_type_kwargs,
        return_source_documents=True
    )

    with get_openai_callback() as cb:
        answer = qa_chain({'query': query})
        display(Markdown("### Final Answer"))
        display(Markdown(answer['result']))
        print(cb)
        
        if show_sources:
            display(Markdown("### Sources"))
            for i, d in enumerate(answer['source_documents']):
                display(Markdown(f"**[Source {i+1}]**"))
                display(Markdown(d.page_content))
                display(Markdown(f"*File path: {d.metadata['file_path']}*"))


In [30]:
ask("how do i borrow using code")

### Query
how do i borrow using code

### Query Category
**technical**

Tokens Used: 171
	Prompt Tokens: 170
	Completion Tokens: 1
Successful Requests: 1
Total Cost (USD): $0.000257


### Final Answer

To borrow using code, you can use the `serveAction` function provided by the Yield Protocol's Ladle contract. This function allows you to borrow fyToken from an existing vault, which can then be exchanged for the underlying token in a YieldSpace pool.

Here is a step-by-step guide on how to do it:

1. Import the ethers library and initialize a provider and a signer. The signer is the Ethereum account you will be using to interact with the Ethereum network.

```javascript
const ethers = require('ethers');

// Use default provider (mainnet)
const provider = ethers.getDefaultProvider();

// Use a private key
const signer = new ethers.Wallet('YOUR_PRIVATE_KEY');
```

2. Define the contract addresses and the ABI for the Ladle contract. The ABI is a JSON representation of the smart contract, including all of its functions and variables.

```javascript
const ladleAddress = 'LADLE_CONTRACT_ADDRESS'; // Replace with the actual contract address

// Ladle contract ABI
const ladleABI = [
  // ... ladle contract ABI
];
```

3. Initialize a contract instance using the ethers library.

```javascript
const ladleContract = new ethers.Contract(ladleAddress, ladleABI, provider).connect(signer);
```

4. Define the parameters for the `serveAction` function. These include the vault ID, the receiver address, the amount to borrow, and the maximum debt.

```javascript
const vaultId = 'VAULT_ID'; // Replace with your vault ID
const receiver = 'RECEIVER_ADDRESS'; // Replace with the receiver address
const borrowed = ethers.utils.parseEther('10'); // Borrow 10 fyTokens
const maximumDebt = ethers.utils.parseEther('100'); // Maximum debt of 100 fyTokens
```

5. Call the `serveAction` function using the parameters defined above.

```javascript
const tx = await ladleContract.serveAction(vaultId, receiver, 0, borrowed, maximumDebt);
```

6. Wait for the transaction to be mined.

```javascript
const receipt = await tx.wait();
console.log('Transaction mined:', receipt.transactionHash);
```

This code will borrow fyTokens from the specified vault and send them to the receiver address. The amount of debt added to the vault is equal to the amount of fyTokens borrowed. The maximum debt parameter is used to prevent the debt from exceeding a certain limit.

Tokens Used: 2172
	Prompt Tokens: 1674
	Completion Tokens: 498
Successful Requests: 1
Total Cost (USD): $0.0801


### Sources

**[Source 1]**

### Provide liquidity by borrowing, using only underlying

This batch relies on creating a vault where the underlying is used as collateral to borrow the fyToken of the same underlying.

With this vault built, an amount of underlying is used to provide liquidity. That amount is split into the same proportions as the pool reserves, and the portion in the same proportion as the pool fyToken reserves put as collateral in a vault, to borrow fyToken into the pool.

```
  await ladle.batch([
    ladle.buildAction(seriesId, baseId, 0),
    ladle.forwardPermitAction(
      base, ladle, totalBase, deadline, v, r, s
    ),
    ladle.transferAction(base, baseJoin, baseToFYToken),
    ladle.transferAction(base, pool, baseToPool),
    ladle.pourAction(0, pool, baseToFYToken, baseToFYToken),
    ladle.routeAction(pool, ['mint', [receiver, receiver, minRatio, maxRatio]),
  ])

*File path: COOKBOOK.md*

**[Source 2]**

[Liquidity Providing](#liquidity-providing)
  - [Provide liquidity by borrowing](#provide-liquidity-by-borrowing)
  - [Provide liquidity by borrowing, using only underlying](#provide-liquidity-by-borrowing-using-only-underlying)
  - [Provide liquidity by buying](#provide-liquidity-by-buying)
  - [Remove liquidity and repay](#remove-liquidity-and-repay)
  - [Remove liquidity, repay and sell](#remove-liquidity-repay-and-sell)
  - [Remove liquidity and redeem](#remove-liquidity-and-redeem)
  - [Remove liquidity and sell](#remove-liquidity-and-sell)
  - [Roll liquidity before maturity](#roll-liquidity-before-maturity)

 [Strategies](#strategies)
  - [Provide liquidity to strategy by borrowing](#provide-liquidity-to-strategy-by-borrowing)
  - [Provide liquidity to strategy by buying](#provide-liquidity-to-strategy-by-buying)
  - [Remove liquidity from strategy](#remove-liquidity-from-strategy)
  - [Remove liquidity from deprecated strategy](#remove-liquidity-from-deprecated-strategy)

[Ether](#ether)
  - [Post Ether as collateral](#post-ether-as-collateral)
  - [Withdraw Ether collateral](#withdraw-ether-collateral)
  - [Redeem fyETH](#redeem-fyeth)
  - [Provide Ether as liquidity (borrowing)](#provide-ether-as-liquidity-borrowing)
  - [Provide Ether as liquidity (buying)](#provide-ether-as-liquidity-buying)
  - [Remove liquidity from Ether pools](#remove-liquidity-from-ether-pools)

[ERC1155](#erc1155)
  - [Post ERC1155 collateral (Ladle Approval)](#post-erc1155-collateral-ladle-approval)
  - [Withdraw ERC1155 collateral](#withdraw-erc1155-collateral)

*File path: COOKBOOK.md*

**[Source 3]**

### Provide liquidity by borrowing

When providing liquidity by borrowing, the user borrows an amount of fyToken to provide to the pool, along with underlying in the same proportion as the pool reserves.

Prepend this batch with actions to create a vault or provide collateral if necessary.

An option can be shown to the user where an amount of underlying is taken to provide liquidity. That amount is then split into the same proportions as the pool reserves, and the portion in the same proportion as the pool fyToken reserves put as collateral in a vault, to borrow fyToken into the pool.

```
  await ladle.batch([
    ladle.forwardPermitAction(
      base, ladle, baseToPool, deadline, v, r, s
    ),
    ladle.transferAction(base, pool, baseToPool),
    ladle.pourAction(vaultId, pool, 0, fyTokenBorrowed),
    ladle.routeAction(pool, ['mint', [receiver, receiver, minRatio, maxRatio]),
  ])
```
|Param  | Description|
|--------------|------------------------------------------------------------------------------------|
| `  base  `   | Contract for the underlying tokens.      |
| `  ladle  `   | Ladle for Yield v2.      |
| ` pool  `   | Contract YieldSpace pool trading base and the fyToken for the series.      |
| `  baseToPool  `   | Amount of underlying that the user will provide liquidity with.      |
| `  vaultId  `   | Vault to add the debt to. Set to 0 if the vault was created as part of this same batch.      |
| `  0  `   | Collateral change, zero in this case.      |
| `  fyTokenBorrowed  `   | Amount of fyToken that the user will borrow and provide liquidity with.      |
| ` receiver  `   | Receiver for the LP tokens.      |
| `  true  `   | Make any rounding surplus to be fyToken, left in the pool.      |
| `  minRatio  `   | Minimum base/fyToken ratio accepted in the pool reserves.      |
| `  maxRatio  `   | Maximum base/fyToken ratio accepted in the pool reserves.      |

*File path: COOKBOOK.md*

**[Source 4]**

### Borrow underlying

This action borrows fyToken from an existing vault, which is then exchanged for underlying in a YieldSpace pool. The amount of underlying obtained is an exact number provided as a parameter, and the debt incurred in the vault is variable but within provided limits. It can be combined with previous actions that create vaults and post collateral, among others.

```
  await ladle.batch([
    ladle.serveAction(vaultId, receiver, 0, borrowed, maximumDebt),
  ])
```

|Param  | Description|
|--------------|------------------------------------------------------------------------------------|
| `  vaultId  `   | Vault to add the collateral to. Set to 0 if the vault was created as part of this same batch.      |
| `  receiver  `   | Receiver of the collateral.      |
| `  0  `   | Collateral change, zero in this case      |
| `  borrowed  `   | Amount of debt to add to the vault, and fyTokens to send to the receiver.      |
| `  ladle  `   | Maximum debt to accept for the vault in fyToken terms.      |

*File path: COOKBOOK.md*

**[Source 5]**

```
                                        __________________   __________________
                                    .-/|                  \ /                  |\-.
                                    ||||                   |                   ||||
                                    ||||                   |                   ||||
                                    ||||                   |                   ||||
                                    ||||      Yield        |   "Recipes        ||||
                                    ||||                   |   made with love  ||||
                                    ||||     COOKBOOK      |   just like mama  ||||
                                    ||||                   |   used to make"   ||||
                                    ||||                   |                   ||||
                                    ||||                   |                   ||||
                                    ||||                   |                   ||||
                                    ||||__________________ | __________________||||
                                    ||/===================\|/===================\||
                                    `--------------------~___~-------------------''
```

*File path: COOKBOOK.md*

## Question Bank

In [21]:
# Question Bank
question_bank_prompt_template = """
You are a Web3 user who wants to learn about the Yield protocol and its inner workings, concepts and methods to integrate using both client-side and smart contract code.

# INSTRUCTIONS
- Generate 5 questions to ask about the protocol from the document provided below, always ensure the questions are related to the document text and do not make up any information.
- The result should be in JSON format like {{"questions": []}}

# DOCUMENT
{document}

# RESULT"""
QUESTION_BANK_PROMPT = PromptTemplate(
    template=question_bank_prompt_template, input_variables=["document"]
)

question_bank_llm_chain = LLMChain(
    llm=ChatOpenAI(temperature=0, model="gpt-3.5-turbo"),
    prompt=QUESTION_BANK_PROMPT
)

In [22]:
def gen_questions(texts):
    questions = []
    print(f"total texts: {len(texts)}")
    with get_openai_callback() as cb:
        for i, t in enumerate(texts):
            print(f"Working on text #{i+1} ...")
            result = question_bank_llm_chain.run(document=t)
            questions.extend(json.loads(result)["questions"])
        print(cb)
    return questions

# Gets saved in the "tests/question_answering" directory
def save_questions(file_name, questions):
    with open(f"./question_answering/{file_name}", 'w') as file:
        for q in questions:
            file.write(f"{q}\n")

In [23]:
#documentation_questions = gen_questions(documentation_splits['all_splits'])
#save_questions('yield_documentation_qs.txt', documentation_questions)

In [24]:
#addendum_questions = gen_questions(addendum_splits['all_splits'])
#save_questions("yield_addendum_cookbook_qs.txt", addendum_questions)`

In [25]:
def read_questions(file_name):
    with open(file_name, 'r') as file:
        questions = [line.strip() for line in file]
    return questions

In [26]:
documentation_questions = read_questions('yield_documentation_qs.txt')

In [27]:
addendum_questions = read_questions('yield_addendum_cookbook_qs.txt')

In [None]:
# for q in documentation_questions[:5]:
#     ask(q)

In [None]:
# for q in addendum_questions[:5]:
#     ask(q)