In [2]:
from langchain.document_loaders import UnstructuredPDFLoader, OnlinePDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

### Load your data

In [3]:
?UnstructuredPDFLoader

[0;31mInit signature:[0m
[0mUnstructuredPDFLoader[0m[0;34m([0m[0;34m[0m
[0;34m[0m    [0mfile_path[0m[0;34m:[0m [0mstr[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mmode[0m[0;34m:[0m [0mstr[0m [0;34m=[0m [0;34m'single'[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0;34m**[0m[0munstructured_kwargs[0m[0;34m:[0m [0mAny[0m[0;34m,[0m[0;34m[0m
[0;34m[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;31mDocstring:[0m      Loader that uses unstructured to load PDF files.
[0;31mInit docstring:[0m Initialize with file path.
[0;31mFile:[0m           /opt/homebrew/anaconda3/envs/ds2023/lib/python3.8/site-packages/langchain/document_loaders/pdf.py
[0;31mType:[0m           ABCMeta
[0;31mSubclasses:[0m     

In [4]:
loader = UnstructuredPDFLoader("../data/curve-stablecoin.pdf")
# loader = OnlinePDFLoader("https://wolfpaulus.com/wp-content/uploads/2017/05/field-guide-to-data-science.pdf")

In [5]:
data = loader.load()

In [6]:
print (f'You have {len(data)} document(s) in your data')
print (f'There are {len(data[0].page_content)} characters in your document')

You have 1 document(s) in your data
There are 11829 characters in your document


### Chunk your data up into smaller documents

In [8]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
texts = text_splitter.split_documents(data)

In [9]:
print (f'Now you have {len(texts)} documents')

Now you have 14 documents


### Create embeddings of your documents to get ready for semantic search

In [10]:
from langchain.vectorstores import Chroma, Pinecone
from langchain.embeddings.openai import OpenAIEmbeddings
import pinecone

In [11]:
OPENAI_API_KEY = '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'
PINECONE_API_KEY = 'cb5d9140-8325-422e-a68c-8fd98184f99b'
PINECONE_API_ENV = 'asia-southeast1-gcp'

In [12]:
embeddings = OpenAIEmbeddings(openai_api_key=OPENAI_API_KEY)

In [13]:
# initialize pinecone
pinecone.init(
    api_key=PINECONE_API_KEY,  # find at app.pinecone.io
    environment=PINECONE_API_ENV  # next to api key in console
)
index_name = "langchain2"

In [14]:
docsearch = Pinecone.from_texts([t.page_content for t in texts], embeddings, index_name=index_name)

In [15]:
query = "What are the key concepts in design of this stablecoin"
docs = docsearch.similarity_search(query, include_metadata=True)

In [16]:
docs[1]

Document(page_content='Overview\n\nThe design of the stablecoin has few concepts: lending-liquidating amm algo- rithm (LLAMMA), PegKeeper, Monetary Policy are the most important ones. But the main idea is in LLAMMA: replacing liquidations with a special-purpose AMM.\n\n\n\nFigure 1: Overall schematic\n\nLosses during a dip range_size=0.20 range_size=0.05 8 6 4 2 0 40 30 20 10 0 ]%[ ssol evitaleR\n\nFigure 2: Dependence of the loss on the price shift relative to the liquidation theshold. Time window for the observation is 3 days\n\nIn this design, if someone borrows against collateral, even at liquidation threshold, and the price of collateral dips and bounces - no significant loss hap- pen. For example, according to simulations using historic data for ETH/USD since Sep 2017, if one leaves the CDP unattended for 3 days and during this time the price drop of 10% below the liquidation theshold happened - only 1% of collateral gets lost.\n\nAMM for continuous liquidation/deliquidation (LLA

### Query those docs to get your answer back

In [17]:
from langchain.llms import OpenAI
from langchain.chains.question_answering import load_qa_chain

In [18]:
llm = OpenAI(temperature=0, openai_api_key=OPENAI_API_KEY)
chain = load_qa_chain(llm, chain_type="stuff")

In [19]:
query = "What are the key concepts in design of this stablecoin"
docs = docsearch.similarity_search(query, include_metadata=True)

In [20]:
chain.run(input_documents=docs, question=query)

' The key concepts in the design of this stablecoin are Lending-Liquidating AMM Algorithm, PegKeeper, and Monetary Policy.'

In [23]:
query = "How Lending-Liquidating AMM Algorithm, PegKeeper, and Monetary Policy connect each other, what they are relationships?"
docs = docsearch.similarity_search(query, include_metadata=True)

In [24]:
chain.run(input_documents=docs, question=query)

" I don't know."

In [25]:
query = "What is LLAMMA"
docs = docsearch.similarity_search(query, include_metadata=True)

In [26]:
chain.run(input_documents=docs, question=query)

' LLAMMA is a lending-liquidating automated market maker algorithm that replaces liquidations with a special-purpose AMM.'

In [27]:
query = "How LLAMMA works?"
docs = docsearch.similarity_search(query, include_metadata=True)

In [28]:
chain.run(input_documents=docs, question=query)

" LLAMMA works by converting between collateral (for example, ETH) and the stablecoin (let's call it USD here). If the price of collateral is high, a user has deposits all in ETH, but as it goes lower, it converts to USD. This is different from traditional AMM designs where one has USD on top and ETH on the bottom instead. LLAMMA uses an external price oracle and a bonding curve (a piece of hyperbola) to calculate the amounts of stablecoin and collateral x and y if the current price moves to the current price p. It then looks at how much stablecoin or collateral is obtained if the price adiabatically changes to either the lowest price of the lowest band, or the highest price of the highest band respectively. This way, it can get a measure of how much stablecoin is obtained which is not dependent on the current instantaneous price, which is important for sandwich attack resistance."

In [29]:
query = "How to understand the word \"It then looks at how much stablecoin or collateral is obtained if the price adiabatically changes to either the lowest price of the lowest band, or the highest price of the highest band respectively.\""
docs = docsearch.similarity_search(query, include_metadata=True)

In [30]:
chain.run(input_documents=docs, question=query)

' This means that the amount of stablecoin or collateral obtained is calculated by looking at how much would be obtained if the price of the asset slowly changed to the lowest price of the lowest band, or the highest price of the highest band.'

In [31]:
query = "What the benifits of this stablecoin design?"
docs = docsearch.similarity_search(query, include_metadata=True)
chain.run(input_documents=docs, question=query)

" The benefits of this stablecoin design are that it converts between collateral (for example, ETH) and the stablecoin (let's call it USD here) and it uses an external price oracle, a PegKeeper contract, and a Monetary Policy to help maintain the peg. Additionally, losses during a dip are minimized, as simulations using historic data for ETH/USD since Sep 2017 show that if the price drops 10% below the liquidation threshold, only 1% of collateral is lost."

In [33]:
query = "Where PegKeeper contract get the external data from?"
docs = docsearch.similarity_search(query, include_metadata=True)
chain.run(input_documents=docs, question=query)

' The PegKeeper contract gets the external data from the current price of the collateral.'

In [34]:
query = "What is Automatic Stabilizer and Monetary Policy?"
docs = docsearch.similarity_search(query, include_metadata=True)
chain.run(input_documents=docs, question=query)

' Automatic Stabilizer and Monetary Policy is a mechanism used to keep the price of a stablecoin at a peg of 1. It works by minting uncollateralized stablecoin and depositing it to a stableswap pool when the price is higher than 1, and withdrawing and burning the stablecoin when the price is lower than 1. The amount of stablecoin minted to the stabilizer (debt) is denoted as d. The mechanism also uses a "slow" mechanism of stabilization via the borrow rate, which is adjusted depending on the change in price.'

In [35]:
query = "What is PSM?"
docs = docsearch.similarity_search(query, include_metadata=True)
chain.run(input_documents=docs, question=query)

' PSM stands for peg-keeping reserve mechanism. It is a reserve of funds used to keep the price of a stablecoin pegged to a reference asset.'

In [36]:
query = "What type of the coin could be used as collateral? "
docs = docsearch.similarity_search(query, include_metadata=True)
chain.run(input_documents=docs, question=query)

' Cryptocurrency, for example, ETH.'

In [37]:
query = "How is the rate associated with LLAMMA "
docs = docsearch.similarity_search(query, include_metadata=True)
chain.run(input_documents=docs, question=query)

' The rate associated with LLAMMA is reflected in the AMM by adjusting all the grid of prices. When a stablecoin charges interest rate r, all the grid of prices in the AMM shifts upwards with the same rate r which is done via a base_price multiplier.'

In [38]:
query = "What the consequence if the borrowers can not pay off the interest rate "
docs = docsearch.similarity_search(query, include_metadata=True)
chain.run(input_documents=docs, question=query)

' If borrowers can not pay off the interest rate, the grid of prices in the AMM will shift upwards with the same rate, which will increase the amount of stablecoin and collateral needed to cover the loan.'

In [39]:
query = "What if the collateral devalued? "
docs = docsearch.similarity_search(query, include_metadata=True)
chain.run(input_documents=docs, question=query)

' If the collateral devalued, the LLAMMA smart contract would start converting the collateral to the stablecoin. If the coverage is too close to dangerous limits, an external liquidator may be involved.'