In [5]:
import sagemaker
import boto3
import json
import subprocess
from langchain_aws import ChatBedrockConverse
from os import environ
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from langchain.vectorstores import OpenSearchVectorSearch
from opensearchpy import RequestsHttpConnection, AWSV4SignerAuth
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from typing import List
from langchain.embeddings.base import Embeddings
from sentence_transformers import SentenceTransformer

In [6]:
# Login AWS
aws_profile = "vrt-analytics-engineer-nonsensitive"
envvars = subprocess.check_output(['aws-vault', 'exec', aws_profile, '--', 'env'])
for envline in envvars.split(b'\n'):
    line = envline.decode('utf8')
    eqpos = line.find('=')
    if eqpos < 4:
        continue
    k = line[0:eqpos]
    v = line[eqpos+1:]
    if k == 'AWS_ACCESS_KEY_ID':
        aws_access_key_id = v
    if k == 'AWS_SECRET_ACCESS_KEY':
        aws_secret_access_key = v
    if k == 'AWS_SESSION_TOKEN':
        aws_session_token = v

session = boto3.Session(
aws_access_key_id, aws_secret_access_key, aws_session_token,region_name="eu-west-1"
)
credentials = session.get_credentials()
sagemaker_session = sagemaker.Session(boto_session=session)
role = sagemaker.get_execution_role(sagemaker_session=sagemaker_session)
auth = AWSV4SignerAuth(credentials, 'eu-west-2', 'aoss')


environ["AWS_ACCESS_KEY_ID"] = aws_access_key_id
environ["AWS_SECRET_ACCESS_KEY"] = aws_secret_access_key
environ["AWS_SESSION_TOKEN"] = aws_session_token

In [17]:
# LLM endpoint
llm = ChatBedrockConverse(
    region_name="eu-west-2",
    model="mistral.mistral-large-2402-v1:0" , # "meta.llama3-70b-instruct-v1:0"
    temperature=0.6,
    top_p=0.6,
    max_tokens=512
)

model_name = "NetherlandsForensicInstitute/robbert-2022-dutch-sentence-transformers"
encode_kwargs = {'normalize_embeddings': False}

class CustomEmbeddings(Embeddings):
    def __init__(self, model_name: str):
        self.model = SentenceTransformer(model_name)

    def embed_documents(self, documents: List[str]) -> List[List[float]]:
        return [self.model.encode(d).tolist() for d in documents]

    def embed_query(self, query: str) -> List[float]:
        return self.model.encode([query])[0].tolist()
    
model_name = "NetherlandsForensicInstitute/robbert-2022-dutch-sentence-transformers"
encode_kwargs = {'normalize_embeddings': False}
embeddings = HuggingFaceEmbeddings(
    model_name=model_name,
    encode_kwargs=encode_kwargs
)

embeddings = CustomEmbeddings(model_name=model_name)

# Init OpenSearch client connection
docsearch = OpenSearchVectorSearch(
     index_name="aoss-index",  # TODO: use the same index-name used in the ingestion script
     embedding_function=embeddings,
     opensearch_url="https://epcavlvwitam2ivpwv4k.eu-west-2.aoss.amazonaws.com:443",  # TODO: e.g. use the AWS OpenSearch domain instantiated previously
      http_auth = auth,
      use_ssl = True,
      verify_certs = True,
      connection_class = RequestsHttpConnection,
)

retriever = docsearch.as_retriever(search_kwargs={'k': 5,"vector_field":"vrtmax_catalog_vector"})

In [8]:
def format_docs(docs):
    context = []
    for d in docs:
        program_description = ""
        if d.metadata["mediacontent_page_description_program"]  != "":
            program_description = docs[0].metadata["mediacontent_page_description_program"]
        elif d.metadata["mediacontent_page_editorialtitle_program"]  != "":
            program_description = docs[0].metadata["mediacontent_page_editorialtitle_program"]

        context.append("Het programma met de naam " + d.metadata["mediacontent_pagetitle_program"] +\
                " heeft volgende beschrijving: " + program_description  +\
                " De episode van dit programma heeft als beschrijving: " + d.metadata["mediacontent_page_description"] +\
                " De episode zal online staan tot " + d.metadata["offering_publication_planneduntil"] +\
                " De episode heeft als URL " + d.metadata["mediacontent_pageurl"] +\
                " De episode heeft als foto " + "https:" +d.metadata["mediacontent_imageurl"] 
        )
        
    return "\n\n".join(context)

In [9]:

system_prompt = """
Je bent een hulpvaardige assistent die Nederlands spreekt.
Je krijgt vragen over de catalogus van programma's van het videoplatform VRT MAX. 
Je probeert mensen te helpen om programma's aan te bevelen die aansluiten bij hun vraag.
Daarvoor krijg je een beschrijving van een aantal programma's.
Aan jou om wat relevant is aan te bevelen.
Begin niet met het geven van jouw mening over de programma's. Begin direct met het aanbevelen van relevante content aan de gebruiker.
"""

message = """Beantwoordt de vraag op basis van de volgende context:

{context}

Vraag: {question}
"""
prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", message),
    ]
)


from langchain_core.runnables import RunnableParallel

rag_chain_from_docs = (
    RunnablePassthrough.assign(context=(lambda x: format_docs(x["context"])))
    | prompt
    | llm
    | StrOutputParser()
)

rag_chain_with_source = RunnableParallel(
    {"context": retriever, "question": RunnablePassthrough()}
).assign(answer=rag_chain_from_docs)

config = {"configurable": {"session_id": "special_key"}}

question = "iets over geld en beleggen"
response = rag_chain_with_source.invoke({"question": question}, config)

In [13]:
import mlflow

In [15]:
%%writefile chain.py
import sagemaker
import boto3
import json
import subprocess
from langchain_aws import ChatBedrockConverse
from os import environ
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from langchain.vectorstores import OpenSearchVectorSearch
from opensearchpy import RequestsHttpConnection, AWSV4SignerAuth
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from typing import List
from langchain.embeddings.base import Embeddings
from sentence_transformers import SentenceTransformer


# LLM endpoint
llm = ChatBedrockConverse(
    region_name="eu-west-2",
    model="mistral.mistral-large-2402-v1:0" , # "meta.llama3-70b-instruct-v1:0"
    temperature=0.6,
    top_p=0.6,
    max_tokens=512
)

model_name = "NetherlandsForensicInstitute/robbert-2022-dutch-sentence-transformers"
encode_kwargs = {'normalize_embeddings': False}

class CustomEmbeddings(Embeddings):
    def __init__(self, model_name: str):
        self.model = SentenceTransformer(model_name)

    def embed_documents(self, documents: List[str]) -> List[List[float]]:
        return [self.model.encode(d).tolist() for d in documents]

    def embed_query(self, query: str) -> List[float]:
        return self.model.encode([query])[0].tolist()
    
model_name = "NetherlandsForensicInstitute/robbert-2022-dutch-sentence-transformers"
encode_kwargs = {'normalize_embeddings': False}
embeddings = HuggingFaceEmbeddings(
    model_name=model_name,
    encode_kwargs=encode_kwargs
)

embeddings = CustomEmbeddings(model_name=model_name)

# Init OpenSearch client connection
docsearch = OpenSearchVectorSearch(
     index_name="aoss-index",  # TODO: use the same index-name used in the ingestion script
     embedding_function=embeddings,
     opensearch_url="https://epcavlvwitam2ivpwv4k.eu-west-2.aoss.amazonaws.com:443",  # TODO: e.g. use the AWS OpenSearch domain instantiated previously
      http_auth = auth,
      use_ssl = True,
      verify_certs = True,
      connection_class = RequestsHttpConnection,
)

retriever = docsearch.as_retriever(search_kwargs={'k': 5,"vector_field":"vrtmax_catalog_vector"})

def format_docs(docs):
    context = []
    for d in docs:
        program_description = ""
        if d.metadata["mediacontent_page_description_program"]  != "":
            program_description = docs[0].metadata["mediacontent_page_description_program"]
        elif d.metadata["mediacontent_page_editorialtitle_program"]  != "":
            program_description = docs[0].metadata["mediacontent_page_editorialtitle_program"]

        context.append("Het programma met de naam " + d.metadata["mediacontent_pagetitle_program"] +\
                " heeft volgende beschrijving: " + program_description  +\
                " De episode van dit programma heeft als beschrijving: " + d.metadata["mediacontent_page_description"] +\
                " De episode zal online staan tot " + d.metadata["offering_publication_planneduntil"] +\
                " De episode heeft als URL " + d.metadata["mediacontent_pageurl"] +\
                " De episode heeft als foto " + "https:" +d.metadata["mediacontent_imageurl"] 
        )
        
    return "\n\n".join(context)


system_prompt = """
Je bent een hulpvaardige assistent die Nederlands spreekt.
Je krijgt vragen over de catalogus van programma's van het videoplatform VRT MAX. 
Je probeert mensen te helpen om programma's aan te bevelen die aansluiten bij hun vraag.
Daarvoor krijg je een beschrijving van een aantal programma's.
Aan jou om wat relevant is aan te bevelen.
Begin niet met het geven van jouw mening over de programma's. Begin direct met het aanbevelen van relevante content aan de gebruiker.
"""

message = """Beantwoordt de vraag op basis van de volgende context:

{context}

Vraag: {question}
"""
prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", message),
    ]
)


from langchain_core.runnables import RunnableParallel

rag_chain_from_docs = (
    RunnablePassthrough.assign(context=(lambda x: format_docs(x["context"])))
    | prompt
    | llm
    | StrOutputParser()
)

rag_chain_with_source = RunnableParallel(
    {"context": retriever, "question": RunnablePassthrough()}
).assign(answer=rag_chain_from_docs)

config = {"configurable": {"session_id": "special_key"}}

question = "iets over geld en beleggen"
response = rag_chain_with_source.invoke({"question": question}, config)
mlflow.models.set_model(rag_chain_with_source)

Overwriting chain.py


In [16]:
with mlflow.start_run():
    model_info = mlflow.langchain.log_model(
      lc_model="chain.py",
      artifact_path="chat_agent",
    )


MlflowException: Failed to run user code from /home/willem/Documents/vrt/vrtmax-llm/chain.py. Error: name 'auth' is not defined.Review the stack trace for more information.

In [12]:
import mlflow
mlflow.set_experiment("Test")

with mlflow.start_run():
    logged_model = mlflow.langchain.log_model(rag_chain_from_docs, "langchain_model")

2024/09/05 10:55:19 INFO mlflow.tracking.fluent: Experiment with name 'Test' does not exist. Creating a new experiment.


MlflowException: Failed to save runnable sequence: {'2': 'ChatBedrockConverse -- Cannot save runnable without `save` method.'}.

In [53]:
response

{'context': [Document(metadata={'mediacontent_page_description_program': '', 'mediacontent_page_description': 'Wat is een blockchain? Hét woord van de voorbije jaren: blockchain! En na deze video met wiskundige An Braeken (VUB) snap jij ook perfect wat de blockchain is.', 'mediacontent_page_editorialtitle_program': 'Krijg simpele antwoorden op wetenschappelijke vragen', 'mediacontent_pagetitle_program': 'WetenSNAP', 'mediacontent_pagetitle_season': '2022-2023', 'mediacontent_pagetitle': 'Wat is een blockchain?', 'offering_publication_planneduntil': '2032-12-31 22:59:00.000', 'brand_contentbrand': 'radio1', 'mediacontent_pageurl': 'https://www.vrt.be/vrtnu/a-z/wetensnap/2022-2023/wetensnap-s2022-2023a19/', 'mediacontent_imageurl': '//images.vrt.be/orig/2022/10/26/84c3f8c6-5525-11ed-b07d-02b7b76bf47f.png', 'mediacontent_programimageurl': '//images.vrt.be/orig/2022/09/29/bae33113-400c-11ed-b07d-02b7b76bf47f.jpg'}, page_content="Iets met je bankkaart kopen, geld overschrijven of geld stort

In [47]:
docs = docsearch.similarity_search_with_score(
    "een thuis episode over voetbal",
    k=5,
    vector_field="vrtmax_catalog_vector"
)

In [48]:
docs

[(Document(metadata={'mediacontent_page_description_program': '', 'mediacontent_page_description': 'Ontdek wat er allemaal gebeurde in het 24e seizoen van Thuis.', 'mediacontent_page_editorialtitle_program': 'Vlaanderens populairste dagelijkse fictiereeks', 'mediacontent_pagetitle_program': 'Thuis', 'mediacontent_pagetitle_season': "Extra's", 'mediacontent_pagetitle': 'Seizoen 24 samengevat', 'offering_publication_planneduntil': '2025-12-31 22:59:00.000', 'brand_contentbrand': 'een', 'mediacontent_pageurl': 'https://www.vrt.be/vrtmax/a-z/thuis/extra-s/thuis-extra-recap-s24/', 'mediacontent_imageurl': '//images.vrt.be/orig/2021/08/12/68224bd5-fb52-11eb-b07d-02b7b76bf47f.jpg', 'mediacontent_programimageurl': '//images.vrt.be/orig/2021/11/22/29012acd-4ba2-11ec-b07d-02b7b76bf47f.png'}, page_content='Ontdek wat er allemaal gebeurde in het 24e seizoen van Thuis. Vlaanderens populairste dagelijkse fictiereeks'),
  0.004640531),
 (Document(metadata={'mediacontent_page_description_program': '',

In [6]:
# Generate prompt
system_prompt = """
Je bent een hulpvaardige assistent die Nederlands spreekt.
Je krijgt vragen over de catalogus van programma's van het videoplatform VRT MAX. 
Je probeert mensen te helpen om programma's aan te bevelen die aansluiten bij hun vraag.
Daarvoor krijg je een beschrijving van een aantal programma's.
Aan jou om wat relevant is aan te bevelen.
Begin niet met het geven van jouw mening over de programma's. Begin direct met het aanbevelen van relevante content aan de gebruiker.
"""

user_prompt = ""
for item in out.keys():
    
    user_prompt += "Het programma met de naam " + out[item]["metadata"]["mediacontent_pagetitle_program"] +\
          " heeft volgende beschrijving: " + out[item]["metadata"]["mediacontent_page_description"] 
    
    # + " en in het programma wordt er onder meer het volgende gezegd dat misschien relevant is voor de vraag: " + \out[item]["page_content"]

user_prompt += " Tot zover alle informatie over de programmas."

user_prompt += " Gelieve met die informatie een antwoord te geven op volgende vraag: " + question

In [7]:
import mlflow
import os
import logging

from langchain_core.output_parsers import PydanticOutputParser
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain_core.runnables import (
    RunnableParallel,
    RunnableConfig,
    RunnableSerializable,
    ConfigurableField,
)
from langchain.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser

from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain_core.documents import Document
from typing import Any, Iterable, List, Optional, Tuple
from langchain_openai import ChatOpenAI
from langchain.chains import RetrievalQA



from core.secret_utils import get_open_ai_api_key,get_pg_db_connection

logger = logging.getLogger("mlflow")
os.environ['OPENAI_API_KEY']=get_open_ai_api_key()
os.environ['MLFLOW_TRACKING_URI']="http://localhost:5000"

prompt_template = """Use the following pieces of context to compose a comprehensive reply to the question at the end. 

        {context}

        Question: {question}
        Answer :
"""

PROMPT = PromptTemplate(
    template=prompt_template,
    input_variables=["context", "question"],
)

chain_type_kwargs = {
    "prompt": PROMPT,
}

def get_pg_vector_client(collection_name: str):
    from langchain_postgres.vectorstores import (
        PGVector,
        _LANGCHAIN_DEFAULT_COLLECTION_NAME,
    )
    from langchain_openai import OpenAIEmbeddings

    connection_string = get_pg_db_connection()
    vectorstore = PGVector(
        embeddings=OpenAIEmbeddings(),
        connection=connection_string,
        collection_name=collection_name,
    )
    return vectorstore

class RAGModel(mlflow.pyfunc.PythonModel):

    def __init__(self):

        self.llm = ChatOpenAI(
            streaming=True,
            model_name="gpt-4o-2024-05-13",
            temperature=0.7,
            api_key=get_open_ai_api_key(),
        )


    def predict(self, context, model_input):
        collection_name = f"test_collection"
        vectorstore = get_pg_vector_client(collection_name)
        retriever = vectorstore.as_retriever(
            search_type="similarity_score_threshold",
            search_kwargs={"k": 4, "score_threshold": 0.3},
        )

        qa_chain = RetrievalQA.from_chain_type(
            llm=self.llm,
            retriever=retriever,
            input_key="question",
            return_source_documents=True,
            chain_type="stuff",  # You might need to replace this with the appropriate chain type.
            chain_type_kwargs=chain_type_kwargs,
        )

        for chunk in qa_chain.stream({**input, **{"query": input["question"]}}):
            for key in chunk:
                if key == "result":
                    return chunk[key]

model = RAGModel()
os.environ['MLFLOW_S3_ENDPOINT_URL']="http://localhost:9000"
os.environ['AWS_ACCESS_KEY_ID']="minio"
os.environ['AWS_SECRET_ACCESS_KEY']="minio123"
os.environ['AWS_DEFAULT_REGION']="minio123"
mlflow.set_tracking_uri(os.environ['MLFLOW_TRACKING_URI'])
mlflow.set_experiment("RAG")


with mlflow.start_run() as run:
    model_info = mlflow.pyfunc.log_model(
        python_model=model,
        artifact_path="rag-model",
        pip_requirements=["-r requirements.txt"],
    )
    print(model_info.model_uri)

AIMessageChunk(content=[{'type': 'text', 'text': 'Als je geïnteresseerd bent in programma\'s over geld en beleggen, zou ik je aanbevelen om eens te kijken naar "Universiteit van Vlaanderen" en "Schermtijd". Beide programma\'s behandelen thema\'s rondom beleggen en financiën, waarbij "Universiteit van Vlaanderen" specifiek ingaat op de wereld van bitcoins en virtueel geld, en "Schermtijd" zich richt op veilig beleggen via beleggingsapps.', 'index': 0}], response_metadata={'stopReason': 'end_turn', 'metrics': {'latencyMs': 2199}}, id='run-fff28f28-e1e0-4010-990f-f52bd11f7d5d', usage_metadata={'input_tokens': 572, 'output_tokens': 115, 'total_tokens': 687})

In [12]:
chunk

AIMessageChunk(content=[], response_metadata={'metrics': {'latencyMs': 3010}}, id='run-bbbf8a1d-5a5c-452f-8e5d-bf91b7d08522', usage_metadata={'input_tokens': 572, 'output_tokens': 167, 'total_tokens': 739})

In [13]:
stream

<generator object BaseChatModel.stream at 0x7259ea1d97e0>