-
Notifications
You must be signed in to change notification settings - Fork 1.9k
/
Copy pathexample-4-rag-text-query.py
193 lines (126 loc) · 7.7 KB
/
example-4-rag-text-query.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
""" Fast Start Example #4 - RAG with Text Query
This example shows a basic RAG recipe using text query combined with LLM prompt.
We will show two different ways to achieve this basic recipe:
-- Example 4A - this will integrate Library + Prompt - and is the most scalable general solution
-- Example 4B - this will illustrate another capability of the Prompt class to add sources "inline"
without necessarily a library in-place. It is another useful tool when you want to be able to quickly
pick up a document and start asking questions to it.
Note: both of the examples are designed to achieve the same output.
"""
import os
import re
from llmware.prompts import Prompt, HumanInTheLoop
from llmware.setup import Setup
from llmware.configs import LLMWareConfig
from llmware.retrieval import Query
from llmware.library import Library
def example_4a_contract_analysis_from_library (model_name, verbose=False):
""" Example #4a: Main general case to run a RAG workflow from a Library """
# Load the llmware sample files
print (f"\n > Loading the llmware sample files...")
sample_files_path = Setup().load_sample_files()
contracts_path = os.path.join(sample_files_path,"Agreements")
contracts_lib = Library().create_new_library("example4_library")
contracts_lib.add_files(contracts_path)
# questions that we want to ask each contract
question_list = [{"topic": "executive employment agreement", "llm_query": "What are the names of the two parties?"},
{"topic": "base salary", "llm_query": "What is the executive's base salary?"},
{"topic": "governing law", "llm_query": "What is the governing law?"}]
print (f"\n > Loading model {model_name}...")
q = Query(contracts_lib)
# get a list of all of the unique documents in the library
# doc id list
doc_list = q.list_doc_id()
print("update: document id list - ", doc_list)
# filename list
fn_list = q.list_doc_fn()
print("update: filename list - ", fn_list)
prompter = Prompt().load_model(model_name)
for i, doc_id in enumerate(doc_list):
print("\nAnalyzing contract: ", str(i+1), doc_id, fn_list[i])
print("LLM Responses")
for question in question_list:
query_topic = question["topic"]
llm_question = question["llm_query"]
doc_filter = {"doc_ID": [doc_id]}
query_results = q.text_query_with_document_filter(query_topic,doc_filter,result_count=5,exact_mode=True)
if verbose:
# this will display the query results from the query above
for j, qr in enumerate(query_results):
print("update: querying document - ", query_topic, j, doc_filter, qr)
source = prompter.add_source_query_results(query_results)
# *** this is the call to the llm with the source packaged in the context automatically ***
responses = prompter.prompt_with_source(llm_question, prompt_name="default_with_context", temperature=0.3)
# unpacking the results from the LLM
for r, response in enumerate(responses):
print("update: llm response - ", llm_question, re.sub("[\n]"," ", response["llm_response"]).strip())
# We're done with this contract, clear the source from the prompt
prompter.clear_source_materials()
# Save jsonl report to jsonl to /prompt_history folder
print("\nPrompt state saved at: ", os.path.join(LLMWareConfig.get_prompt_path(),prompter.prompt_id))
prompter.save_state()
# Save csv report that includes the model, response, prompt, and evidence for human-in-the-loop review
csv_output = HumanInTheLoop(prompter).export_current_interaction_to_csv()
print("\nCSV output saved at: ", csv_output)
return 0
def example_4b_contract_analysis_direct_from_prompt(model_name, verbose=False):
""" Example #4b: Alternative implementation using prompt in-line capabilities without using a library """
# Load the llmware sample files
print(f"\n > Loading the llmware sample files...")
sample_files_path = Setup().load_sample_files()
contracts_path = os.path.join(sample_files_path, "Agreements")
# questions that we want to ask each contract
question_list = [{"topic": "executive employment agreement", "llm_query": "What are the names of the two parties?"},
{"topic": "base salary", "llm_query": "What is the executive's base salary?"},
{"topic": "governing law", "llm_query": "What is the governing law?"}]
print(f"\n > Loading model {model_name}...")
prompter = Prompt().load_model(model_name)
for i, contract in enumerate(os.listdir(contracts_path)):
# exclude potential mac os created file artifact in the samples folder path
if contract != ".DS_Store":
print("\nAnalyzing contract: ", str(i + 1), contract)
print("LLM Responses")
for question in question_list:
query_topic = question["topic"]
llm_question = question["llm_query"]
# introducing "add_source_document"
# this will perform 'inline' parsing, text chunking and query filter on a document
# input is a file folder path, file name, and an optional query filter
# the source is automatically packaged into the prompt object
source = prompter.add_source_document(contracts_path,contract,query=query_topic)
if verbose:
print("update: document created source - ", source)
# calling the LLM with 'source' information from the contract automatically packaged into the prompt
responses = prompter.prompt_with_source(llm_question, prompt_name="default_with_context",
temperature=0.3)
# unpacking the LLM responses
for r, response in enumerate(responses):
print("update: llm response: ", llm_question, re.sub("[\n]", " ",
response["llm_response"]).strip())
# We're done with this contract, clear the source from the prompt
prompter.clear_source_materials()
# Save jsonl report to jsonl to /prompt_history folder
print("\nupdate: Prompt state saved at: ", os.path.join(LLMWareConfig.get_prompt_path(), prompter.prompt_id))
prompter.save_state()
# Save csv report that includes the model, response, prompt, and evidence for human-in-the-loop review
csv_output = HumanInTheLoop(prompter).export_current_interaction_to_csv()
print("\nupdate: CSV output saved at - ", csv_output)
return 0
if __name__ == "__main__":
# you can pick any model from the ModelCatalog
# we list a few representative good choices below
LLMWareConfig().set_active_db("sqlite")
example_models = ["bling-phi-3-gguf",
"llmware/bling-1b-0.1",
"llmware/bling-tiny-llama-v0",
"llmware/dragon-yi-6b-gguf"]
# to swap in a gpt-4 openai model - uncomment these two lines and `pip3 install openai`
# model_name = "gpt-4"
# os.environ["USER_MANAGED_OPENAI_API_KEY"] = "<insert-your-openai-key>"
# use local cpu model
model_name = example_models[0]
# two good recipes to address the use case
# first let's look at the main way of retrieving and analyzing from a library
example_4a_contract_analysis_from_library(model_name)
# second - uncomment this line, and lets run the "in-line" prompt way
# example_4b_contract_analysis_direct_from_prompt(model_name)