In [1]:
%load_ext dotenv
%dotenv .env

In [None]:
from redbox.app import Redbox
from redbox.models.settings import Settings, get_settings
from redbox.models.chain import RedboxQuery, RedboxState, AISettings, ChatLLMBackend
from langfuse.callback import CallbackHandler
from uuid import uuid4
from langchain_core.runnables.graph import MermaidDrawMethod
import langchain

from uuid import UUID

In [14]:
from langchain_community.chat_models import BedrockChat
from deepeval.models.base_model import DeepEvalBaseLLM

class AWSBedrock(DeepEvalBaseLLM):
    def __init__(
        self,
        model
    ):
        self.model = model

    def load_model(self):
        return self.model

    def generate(self, prompt: str) -> str:
        chat_model = self.load_model()
        return chat_model.invoke(prompt).content

    async def a_generate(self, prompt: str) -> str:
        chat_model = self.load_model()
        res = await chat_model.ainvoke(prompt)
        return res.content

    def get_model_name(self):
        return "Custom Azure OpenAI Model"

# Replace these with real values
custom_model = BedrockChat(
    credentials_profile_name="default", # e.g. "default"
    region_name="eu-west-2", # e.g. "us-east-1"
    endpoint_url=f"https://bedrock-runtime.eu-west-2.amazonaws.com", # e.g. "https://bedrock-runtime.us-east-1.amazonaws.com"
    model_id="mistral.mixtral-8x7b-instruct-v0:1", # e.g. "anthropic.claude-v2"
    model_kwargs={"temperature": 0.4},
)

aws_bedrock = AWSBedrock(model=custom_model)

In [3]:
documents = ["richie.bird@digital.trade.gov.uk/associated-british-ports-financial-statements-2023.pdf"]
user_prompt_summarise = ["summarise the document"]
user_prompt_search = ["what are the expansion plans of ABP?"]
# user_prompt_search = ["@search what are the expansion plans of ABP?"]

In [None]:
def get_state(user_uuid, prompts, documents, ai_setting):
    q = RedboxQuery(
        question=f"{prompts[-1]}",
        s3_keys=documents,
        user_uuid=user_uuid,
        chat_history=prompts[:-1],
        ai_settings=ai_setting,
        permitted_s3_keys=documents,
    )

    return RedboxState(
        request=q,
)

env = get_settings()
env = env.model_copy(update={"elastic_root_index": 'redbox-data-integration'})
env = env.model_copy(update={"elastic_chunk_alias": 'redbox-data-integration-chunk-current'})

ai_setting = AISettings(chat_backend=ChatLLMBackend(name="anthropic.claude-3-sonnet-20240229-v1:0", provider="bedrock"))
app = Redbox(debug=False, env=env)

x = get_state(uuid4(), prompts = user_prompt_summarise, documents = documents, ai_setting = ai_setting)
final_state_summarise = await app.run(x)

In [None]:
final_state_summarise.messages[0].content

In [None]:
final_state_summarise.messages[1].content

In [9]:
from langchain_community.document_loaders import PyPDFLoader

file_path = "/Users/richiebird/Downloads/associated-british-ports-financial-statements-2023.pdf"

loader = PyPDFLoader(file_path)
pages = []
content = []
async for page in loader.alazy_load():
    pages.append(page)
    content.append(page.page_content)

content_subset = " ".join(content[0:5]) # cant use all the pages, context window not large enough and langchain errors/hangs

In [None]:
len(content)

In [None]:
from deepeval import evaluate
from deepeval.metrics import HallucinationMetric
from deepeval.test_case import LLMTestCase

test_case = LLMTestCase(
    input=user_prompt_summarise[0],
    actual_output=final_state_summarise.messages[1].content,
    context=content[0:5],
)
metric = HallucinationMetric(threshold=0.3, model=aws_bedrock)
# To run metric as a standalone
# metric.measure(test_case)
# print(metric.score, metric.reason)

hallucination = evaluate(test_cases=[test_case], metrics=[metric])

In [None]:
hallucination

In [None]:
from deepeval.metrics import SummarizationMetric

test_case = LLMTestCase(input=content_subset, actual_output=final_state_summarise.messages[1].content)
metric = SummarizationMetric(
    threshold=0.5,
    model=aws_bedrock,
    assessment_questions=[
        "Is the coverage score based on a percentage of 'yes' answers?",
        "Does the score ensure the summary's accuracy with the source?",
        "Does a higher score mean a more comprehensive summary?"
    ]
)

# To run metric as a standalone
# metric.measure(test_case)
# print(metric.score, metric.reason)

evaluate(test_cases=[test_case], metrics=[metric])

In [None]:
# Evaluating RAG and our retrieved chunks

from opensearchpy import OpenSearch,  RequestsHttpConnection, client
#"http://admin:admin@opensearch:9200"
client = OpenSearch(
            hosts=[{"host": "localhost", "port": "9200"}],
            http_auth=("admin", "admin"),
            use_ssl=False, #to run locally, changed from True to False
            connection_class=RequestsHttpConnection,
            retry_on_timeout=True
        )

query = {
    "size": 1000,
    "track_total_hits": True,
    "query" : {
        "match_all" : {}
    }
}

#redbox-data-integration-chunk-current

response = client.search(index='redbox-data-integration-chunk-current', body=query)
print(response)

client.indices.get_mapping(index='redbox-data-integration-chunk-current')

In [33]:
from pydantic import BaseModel
class AISettings(BaseModel):
    """Prompts and other AI settings"""

    # LLM settings
    context_window_size: int = 128_000
    llm_max_tokens: int = 1024

    # Prompts and LangGraph settings
    max_document_tokens: int = 1_000_000
    self_route_enabled: bool = False
    map_max_concurrency: int = 128
    stuff_chunk_context_ratio: float = 0.75
    recursion_limit: int = 50

    # Elasticsearch RAG and boost values
    rag_k: int = 1000
    rag_num_candidates: int = 1000
    rag_gauss_scale_size: int = 3
    rag_gauss_scale_decay: float = 0.5
    rag_gauss_scale_min: float = 1.1
    rag_gauss_scale_max: float = 2.0
    elbow_filter_enabled: bool = False
    match_boost: float = 1.0
    match_name_boost: float = 2.0
    match_description_boost: float = 0.5
    match_keywords_boost: float = 0.5
    knn_boost: float = 2.0
    similarity_threshold: float = 0.7

    # this is also the azure_openai_model
    #chat_backend: ChatLLMBackend = ChatLLMBackend()

    # settings for tool call
    tool_govuk_retrieved_results: int = 100
    tool_govuk_returned_results: int = 5
    
ai_settings = AISettings()

In [None]:
from langchain_community.embeddings import BedrockEmbeddings

embedding_model = BedrockEmbeddings(region_name='eu-west-2', model_id="amazon.titan-embed-text-v2:0")
query_vector = embedding_model.embed_query(user_prompt_search[0])

query_filter = [{
        "bool": {
            "should": [
                {"terms": {
                    "metadata.file_name.keyword": [
                        "richie.bird@digital.trade.gov.uk/associated-british-ports-financial-statements-2023.pdf"
                        ]}},
                {"terms": {"metadata.uri.keyword": [
                    "richie.bird@digital.trade.gov.uk/associated-british-ports-financial-statements-2023.pdf"
                    ]}}
            ]
        }
    }, {"term": {"metadata.chunk_resolution.keyword": "normal"}}]

knn_final_query = {
        "size": ai_settings.rag_k,
                   "min_score": 0.6,
        "query": {
            "bool": {
                "must": [
                    {
                        "knn": {
                            "vector_field": {
                            "vector": query_vector,
                            "k": ai_settings.rag_num_candidates,
                            # "boost": ai_settings.knn_boost,
                            
                            }
                        }
                    },
                ],
                "filter": query_filter,
            }
        },
    }

response_knn = client.search(index='redbox-data-integration-chunk-current', body=knn_final_query)
response_knn

In [None]:
retrieved_texts = []
for i, j in enumerate(response_knn['hits']['hits']):
    page = response_knn['hits']['hits'][i]['_source']['metadata']['page_number']
    score = response_knn['hits']['hits'][i]['_score']
    retrieved_texts.append(response_knn['hits']['hits'][i]['_source']['text'])
    print(f'{i}, {page}, {score}')
    # chunk retrieval limit is 30
    if i == 29: 
        break

In [None]:
x = get_state(uuid4(), prompts = user_prompt_search, documents = documents, ai_setting = ai_setting)
final_state_search = await app.run(x)