# WILMA Chains demo

## Step 0 - Install the package

In [1]:
# %pip install chatur-chains

## Step 1 - Set up the environment variables

In [2]:
import os
from pathlib import Path

# Select the RESEARCHPAPER vector database from the parent directory
vdb_dir = Path() / ".." / "testing" / "data" / "RESEARCHPAPER"

# Import the API keys form the environment
llm_url = os.environ.get('LLM_URL')
api_key = os.environ.get('LLM_API_KEY')
openai_api_key = os.environ.get('OPENAI_API_KEY')


## Step 2 - Build an LLM proxy

In [3]:
from chains.llm_proxy import build_llm_proxy

llm = build_llm_proxy(
    model="Mistral-7B-Instruct-v0.2",
    url=llm_url,
    engine="OpenAI",
    temperature=0.9,
    api_key=api_key,
)

In [4]:
message = llm.invoke("Compose a haiku about the beauty of Tucson")
print(message.content)

 Saguaro's reach skyward,

Desert bloom, Cactus castle shines,
Tucson's beauty ignites.


In [5]:
# Print the complete resulting data structure
from pprint import pprint
pprint(llm.invoke("Compose a rap about the constitution of the USA"))

AIMessage(content=" (Verse 1)\nYo, gather 'round, it's time to drop some knowledge,\nAbout the foundation that keeps our freedom unfoldable,\nThe United States Constitution, a legal masterpiece,\nCreated by the people, for the people, to reach peace\n\nBorn in 1787, in Philadelphia's hall,\nOpened up the debate, no time to stall,\nFifty-five delegates, from thirteen colonies strong,\nCame together, to give form to a new, long-lasting song\n\n(Chorus)\nWe the people, we the power,\nCreating laws, that will endower,\nThe land of the free, the home of the brave,\nWith checks and balances, on which we'll pave\n\n(Verse 2)\nFirst, the Articles of Confederation, gave us a try,\nBut the union was weak, about to dry,\nSo they called for a convention, to draft a new design,\nTo strengthen the nation, and redefine\n\nThe three branches of government, keeping power in line,\nThe legislative, executive, and the judicial, fine\nA checks-and-balances system, preventing a monopoly,\nEnsuring fairness

## Step 3-1: Custom application - PDF denoising

In [6]:
from chains.pdf_fixer import build_fix_pdf_chain

fixer = build_fix_pdf_chain(
    model_name="Mistral-7B-Instruct-v0.2",
    llm_host=llm_url,
    llm_engine="OpenAI",
    api_key=api_key,
)

nosy_text = """1.Framestate-of-the-artAImethodologiesinyourareaofexpertiseIntheareaoflargelanguagemodel,themostpopularAImethodologiesinclude,butnotlimitto:a.
RecurrentNeuralNetworkMotivation:preservecontextProblem:sequentialprocessingofinformationisslow,weneedparalleltrainingb.
TransformerKeyingredientistheattentionblockHighlyparallelizableProblem:finitewindow;quadraticscalingc.
StateSpaceModelLinearscalingMambablockwithselectivestatespacesisprovedtobeasperformantastransformersundercertainconditions2.IdentifykeybarriersinachievinggreaterimpactofAILackofhighqualitytrainingdataExtremerequirementformemoryandcomputingpowerfortrainingHighcostforinferenceLackofexplainability3.OutlinekeyenablerstoachievinggreaterimpactofAI.HighqualitymodelsthatachieveshighperformancewithlowcostTrainingtechniquesthatenableshighefficiencytrainingAccesstohighqualitydatabasefortrainingAccesstohighperformancecomputingresourcesBio:SikanLiisaresearchassociateattheTexasAdvancedComputingCenter(TACC)’sScalableComputationalIntelligence(SCI)group.
Herworkfocusesondevelopingmachinelearninganddataminingtechniquestoanalyzelarge-scale,complexdatasets.
She’spublishedseveralpapersinthisfieldandactivelycontributestoresearch,development,andsupportinitiativesinvolvingbigdata,statisticalanalysis,andmachinelearningatTACC.SikanLi"""

print(fixer.invoke(nosy_text))

 1. Frame state-of-the-art AI methodologies in your area of expertise in the field of large language models, the most popular AI methodologies include, but are not limited to:
   a. Recurrent Neural Network
      Motivation: preserve context
      Problem: sequential processing of information is slow. We need parallel training.
   b. Transformer
      Key ingredient is the attention block.
      Highly parallelizable.
      Problem: finite window; quadratic scaling.
   c. State Space Model (Mamba block with selective state spaces)
      Is proven to be as performant as transformers under certain conditions.

  Identify key barriers in achieving greater impact of AI:
  1. Lack of high-quality training data
  2. Extreme requirement for memory and computing power for training
  3. High cost for inference
  4. Lack of explainability

  Outline key enablers to achieving greater impact of AI:
  1. High-quality models that achieve high performance with low cost
  2. Training techniques that e

## Step 4: Dense Passage Retriever

In [7]:
from chains.retrieval import build_retriever

retriever = build_retriever(
    vector_store= str(vdb_dir), # Can also be a weviate database
    top_k=10,
    embeddings_engine="HuggingFace" # Can also be OpenAI, or GPT4ALL 
)

docs = retriever.invoke("pine trees non native to Arizona")

for ix, doc in enumerate(docs, start=1):
    print(f"{ix}. ", doc)
    print()

1.  page_content='29 The University of Arizona Cooperative ExtensionSection 2. Non-native pine trees suited to Arizona climate 1. Pinus canariensis:\nCanary Island pine\n2. Pinus eldarica:\nAfghan pine\n3. Pinus halepensis:  Aleppo pine\n4. Pinus nigra:  Austrian black pine\n5. Pinus mugo mugo:  Mugo pine\n6. Pinus pinea:  Italian stone pine\n7. Pinus thunbergii:  Japanese black pine\n8. Pinus roxburghii:  Chir pine' metadata={'page': 28, 'source': 'az1584.pdf'}

2.  page_content='9 The University of Arizona Cooperative ExtensionSection 1. Pine trees native to Arizona 1. Pinus aristata:  Rocky Mountain bristlecone pine\n2. Pinus cembroides:  Mexican pinyon pine\n3. Pinus edulis:  Pinyon pine\n4. Pinus engelmannii:  Apache pine\n5. Pinus flexilis:  Limber pine\n6. Pinus leiophylla var.\nchihuahuana:  Chihuahua pine\n7. Pinus monophylla:  One-needled pinyon pine\n8. Pinus ponderosa var.\nscopulorum : Ponderosa pine\nAlso Pinus ponderosa var.\narizonica : Arizona pine 9. Pinus strobiformi

## Step 5-1: RAG using CyVerse LLM

In [10]:
from chains.rag import build_rag_chain

cyverse_rag = build_rag_chain(llm_host=llm_url,
                        model_name="Mistral-7B-Instruct-v0.2",
                        llm_engine="openai",
                        api_key=api_key,
                        org_id=None,
                        vector_store=str(vdb_dir),
                        embeddings_engine="HuggingFace",
                        embeddings_model=None)



In [13]:
question = "How can you treat wood rot on lemon trees?"
print(cyverse_rag.invoke({"input":question, "chat_history":[]}))

 To treat wood rot on lemon trees, you should prune infected limbs only when the trees are dormant, cutting back to at least one foot below the canker (```Since sunburned bark is the primary infection site, large limbs should be pruned only when trees are dormant. When removing infected limbs, cut back to at least one foot below the canker.```). The cut area and pruning tools should be treated with a solution of one part household bleach and nine parts water, and the pruning wounds should be painted with a copper fungicide to prevent infection (```The cut area and pruning tools should be treated with a solution of one part household bleach and nine parts water. Pruning wounds should be painted with a copper fungicide to prevent infection. Reapply the copper compound to the wound each spring to ensure adequate protection against infection.```). Maintaining tree vigor through proper fertilization and deep watering on a regular schedule is also important for wound healing and minimizing d

## Step 5-2: RAG using OpenAI

In [15]:
openai_rag = build_rag_chain(llm_host="https://api.openai.com/v1/",
                            model_name="gpt-4o",
                            llm_engine="openai",
                            api_key=openai_api_key,
                            org_id=None,
                            vector_store=str(vdb_dir),
                            embeddings_engine="GPT4All",
                            embeddings_model=None)



In [16]:
print(openai_rag.invoke({"input":question, "chat_history":[]}))

### Treating Wood Rot on Lemon Trees

To treat wood rot on lemon trees, follow these steps:

1. **Pruning Infected Limbs**: 
   - "Broken or dying branches should be removed with a flush, clean cut close to the main branch to minimize development of wood rot at wound sites."
   - "When removing infected limbs, cut back to at least one foot below the canker."

2. **Disinfecting Tools and Wounds**:
   - "The cut area and pruning tools should be treated with a solution of one part household bleach and nine parts water."
   - "Pruning wounds should be painted with a copper fungicide to prevent infection."

3. **Preventing Further Infection**:
   - "Reapply the copper compound to the wound each spring to ensure adequate protection against infection."
   - "Whitewash, applied to exposed lower trunk areas, will reduce the possibilities of infection. This material reflects radiation and reduces bark temperature."

4. **Maintaining Tree Vigor**:
   - "Tree vigor should be maintained through pro

## Step 6: DIY chain - _Unit Testing LLMs_

In [19]:
from langchain_core.output_parsers import JsonOutputParser
from langchain_core.prompts import ChatPromptTemplate, HumanMessagePromptTemplate


def build_rag_tester(llm_proxy):
    """ Use to test for correctness using an LLM itself """
    prompt = ChatPromptTemplate.from_messages([
        HumanMessagePromptTemplate.from_template(
            """You are a testing the generation capabilities of a large language model. You will get a passage 
            generated by an LLM and the reference provided by a human. Your job is to decide whether the llm's 
            passage is paraphrasing the reference. Consider the following criteria to determine that the llm's 
            answer is correct: - None of the information asked for in the question is missing in the answer - All the 
            key concepts mentioned in the reference are contained in the answer - None of the elements in the 
            reference are missing in the answer - The answer totally agrees with the reference The output is json 
            object that states if the llm answered correctly and if and only if it didn't, also include an 
            explanation of why it didn't. Use the following structure:
                
                ```{{
                    "correct": false,
                    "explanation": "The response is partially correct but is missing information from the reference"
                }}```
                
            
            Reference: ```{reference}```
            
            LLM answer: ```{answer}```
            """
        )
    ])

    parser = JsonOutputParser()

    chain = prompt | llm_proxy | parser

    return chain

rag_tester = build_rag_tester(llm)

In [20]:
rag_response = cyverse_rag.invoke({"input": "How do you know if bark is slipping?", "chat_history": []})
print(rag_response)
print()

pprint(rag_tester.invoke({
    "reference": "[For a citrus tree] it is a good idea to test the tree to see if the bark slips. Simply "
                         "score the bark with a knife, and see if it peels away easily.",
    "answer": rag_response
}))

 To determine if bark is slipping, you can test the tree by scoring the bark with a knife and observing if it peels away easily. According to the context passage, "Before budding, it is a good idea to test the tree to see if the bark slips. Simply score the bark with a knife, and see if it peels away easily. Bark slipping occurs in the spring and resumes in the fall in Arizona." (emphasis added) Therefore, if the bark peels away easily after scoring it with a knife, it is considered to be slipping.

{'correct': True,
 'explanation': "The LLM's passage paraphrases the reference by providing "
                'additional context and clarification without missing any key '
                'concepts or information from the reference.'}
