In [2]:
import os
import sys
root_dir = sys.path[0]

## Prepare Documents
Only needs to be run once, then will persist in memory

In [3]:
from langchain.document_loaders import TextLoader, DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
loader = DirectoryLoader("texts", glob="*.txt", show_progress=True)
docs = loader.load()
len(docs)

100%|██████████| 1/1 [00:03<00:00,  3.30s/it]


1

In [4]:
raw_text = ''
for i, doc in enumerate(docs):
    text = doc.page_content
    if text:
        raw_text += text
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 500,
    chunk_overlap  = 100,
    length_function = len,
    is_separator_regex = False,
)
texts = text_splitter.split_text(raw_text)
len(texts)

53

In [5]:
texts[0]

'A Appendix: Instructions This appendix provides the English translations of the instructions for the 4I treatment.'

## Prepare Database

In [6]:
from langchain.embeddings import HuggingFaceBgeEmbeddings
from langchain.vectorstores.chroma import Chroma
create_db = False # set True to create a new database
model_name = "BAAI/bge-small-en-v1.5"
encode_kwargs = {'normalize_embeddings': True} # set True to compute cosine similarity

embedding_function = HuggingFaceBgeEmbeddings(
    model_name=model_name,
    model_kwargs={'device': 'cuda'},
    encode_kwargs=encode_kwargs,

)

In [7]:
db_dir = os.path.join(root_dir, "chroma_db")
if create_db:
### Make the chroma and persiste to disk
    db = Chroma.from_texts(texts,
                        embedding_function,
                        persist_directory=db_dir,)
else:
    db = Chroma(persist_directory=db_dir, embedding_function=embedding_function)

In [8]:
### Query the database with 5 most similar documents
query = "When can you accept trades?"

db.similarity_search(query, k=5)

[Document(page_content='Another trader can now accept the new Ask of 70 that you have just made by clicking on the ‘Immediately BUY’ button at the bottom of the screen. The transaction price will then appear in the middle column ‘Transaction Details’. Because you sold the asset the Buy/Sell column will indicate ‘Sell’ so that you can keep track of what you are buying and selling. As a result of the transaction, the Ask will not be available anymore in the ‘Outstanding Ask’ column.'),
 Document(page_content='Another trader can now accept the new Bid of 50 that you have just made by clicking on the ‘Immediately SELL’ button at the bottom of the screen. The transaction price will then appear in the middle column ‘Transaction Details’ (see image below). Because you bought the asset the Buy/Sell column will indicate ‘Buy’ so that you can keep track of what you are buying and selling.'),
 Document(page_content='Because the Ask of 70 has been accepted, you have sold one unit of Asset Y at 70 

In [9]:
# create an mmr retriever to get the most relevant matching documents
retriever = db.as_retriever(k=5, fetch_k=20, search_type="mmr")

retriever.get_relevant_documents(query)[1]

Document(page_content='4. You can submit Bids and Asks or accept available offers from other traders to trade units. 5. The amount units pay to their owners at the end of a market period is based on the state (A, B, or C) for Type I and Type II as follows. All States are equally likely.')

## Create Chat

In [10]:
import textwrap
def wrap_text(text, width=90): #preserve_newlines
    # Split the input text into lines based on newline characters
    lines = text.split('\n')

    # Wrap each line individually
    wrapped_lines = [textwrap.fill(line, width=width) for line in lines]

    # Join the wrapped lines back together using newline characters
    wrapped_text = '\n'.join(wrapped_lines)

    return wrapped_text

In [11]:
from langchain.llms import GPT4All
from langchain.schema.runnable import RunnableLambda, RunnablePassthrough
from langchain.prompts import ChatPromptTemplate
from langchain.schema.output_parser import StrOutputParser



template = """Answer the question based only on the following context:
{context}

Question: {question}
"""

prompt = ChatPromptTemplate.from_template(template)

model_path = os.path.join(root_dir,
                          "model",
                          "mistral-7b-instruct-v0.1.Q4_0.gguf")

model = GPT4All(
    model=model_path
)

chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | prompt
    | model
    | StrOutputParser()
)

In [12]:
text_reply = chain.invoke("Tell me about auctions. When can you accept trades?")

print(wrap_text(text_reply))

Answer: Auctions are a type of trading where buyers and sellers come together to trade
units based on their perceived value. In an auction, bids and asks are submitted by
traders for each asset available in the market. The lowest (best) ask and the highest
(best) bid appear at the top of the list, indicating that they have been accepted by other
traders. To accept a trade, you can click on the ‘Immediately BUY’ button if you want to
buy units or the ‘Immediately SELL’ button if you want to sell units. However, you cannot
highlight one of your own bids (asks) and click these buttons as it will result in an
error message.


## Normal Retreival

In [13]:
from langchain.schema.output_parser import StrOutputParser
from langchain.prompts import SystemMessagePromptTemplate, HumanMessagePromptTemplate
from langchain.prompts import ChatMessagePromptTemplate, PromptTemplate

In [14]:
prompt = ChatPromptTemplate(
    input_variables=["original_query"],
    messages=[
        SystemMessagePromptTemplate(
            prompt=PromptTemplate(
                input_variables=[],
                template="You are a helpful assistant that generates multiple search queries based on a single input query.",
            )
        ),
        HumanMessagePromptTemplate(
            prompt=PromptTemplate(
                input_variables=["original_query"],
                template="Generate multiple search queries related to: {question} \n OUTPUT (4 queries):",
            )
        ),
    ],
)

In [15]:
model = GPT4All(
    model=model_path
)


In [16]:
chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | prompt
    | model
    | StrOutputParser()
)

In [17]:
generate_queries = (
    prompt | model | StrOutputParser() | (lambda x: x.split("\n"))
)

In [18]:
original_query = "how to bid"

In [19]:
from langchain.load import dumps, loads


def reciprocal_rank_fusion(results: list[list], k=60):
    fused_scores = {}
    for docs in results:
        # Assumes the docs are returned in sorted order of relevance
        for rank, doc in enumerate(docs):
            doc_str = dumps(doc)
            if doc_str not in fused_scores:
                fused_scores[doc_str] = 0
            previous_score = fused_scores[doc_str]
            fused_scores[doc_str] += 1 / (rank + k)

    reranked_results = [
        (loads(doc), score)
        for doc, score in sorted(fused_scores.items(), key=lambda x: x[1], reverse=True)
    ]
    return reranked_results

In [20]:
ragfusion_chain = generate_queries | retriever.map() | reciprocal_rank_fusion

In [21]:
ragfusion_chain.input_schema.schema()

{'title': 'PromptInput',
 'type': 'object',
 'properties': {'question': {'title': 'Question', 'type': 'string'}}}

In [22]:
ragfusion_chain.invoke({"question": original_query})

  warn_beta(


[(Document(page_content='A.1 Introduction This is an experiment in the economics of market decision-making. You will be compensated at the end of the experiment based on your decisions. So, it is important that you understand the instructions completely. We will ask you a 7-question quiz at the end of the instructions. If you answer these 7 questions correctly, you will earn an extra bonus of 5 CNYs. If you have a question during the experiment, please raise your hand, and a monitor will approach you. Otherwise, you'),
  0.0803975388851116),
 (Document(page_content='bids or asks. You will receive an error message if you highlight one of your own bids (asks) and click the ‘Immediately SELL’ (‘Immediately BUY’) button.'),
  0.06584699453551913),
 (Document(page_content='A.12 Trading (Order Book) Several Bids or Asks can be available at the same time for each Asset. In the example screen below, there are three Bids (285, 280, 270) and three Asks (340, 350, 355) available for Asset X. Note

In [23]:
from langchain.schema.runnable import RunnablePassthrough
template = """Answer the question based only on the following context:
{context}

Question: {question}
"""
prompt = ChatPromptTemplate.from_template(template)

full_rag_fusion_chain = (
    {
        "context": ragfusion_chain,
        "question": RunnablePassthrough()
    }
    | prompt
    | model
    | StrOutputParser()
)

In [24]:
response = full_rag_fusion_chain.invoke({"question": "Tell me about auctions. When can you accept trades?"})
print(wrap_text(response))

Answer: Auctions are a type of trading where buyers and sellers come together to trade
units at an agreed-upon price. In this experiment, traders can submit Bids (Offers to Buy)
or Asks (Offers to Sell) for Asset X and Asset Y units from one another during market
periods. To create a Bid for Asset X (Asset Y), you have to type in the price you would
like to buy in the Bid box on the left side of the trading screen for Asset X (Asset Y).
Note that you will not be able to create a Bid with a higher price than your available
cash, because you would not have enough money to buy the unit. You may view how much cash
you have available for Bids by looking at the Cash amount in the ‘Available’ row of the
‘Your Current Holdings’ table. The image below shows a case where you have no cash
available to buy units. You have no cash available because the cash you own (500 in the
Cash column for ‘Total Owned’ in the image below) has already been used to place Bids (see
500 in the Cash column). During 