In [None]:
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
import os

# to create a new file named vectorstore in your current directory.
def load_knowledgeBase():
        model_name = "NetherlandsForensicInstitute/robbert-2022-dutch-sentence-transformers"
        encode_kwargs = {'normalize_embeddings': False}
        hf = HuggingFaceEmbeddings(
            model_name=model_name,
            encode_kwargs=encode_kwargs
        )
        DB_FAISS_PATH = os.path.normpath(os.getcwd() + os.sep + os.pardir) + '/data/faiss'
        vs = FAISS.load_local(DB_FAISS_PATH, hf,allow_dangerous_deserialization=True)
        return vs

In [None]:
#Import Dependencies
from langchain.prompts import ChatPromptTemplate

def load_prompt():
        prompt = """ You need to answer the question in the sentence as same as in the  pdf content. . 
        Given below is the context and question of the user.
        context = {context}
        question = {question}
        if the answer is not in the pdf , answer "i donot know what the hell you are asking about"
         """
        prompt = ChatPromptTemplate.from_template(prompt)
        return prompt

In [None]:
import sagemaker
import boto3
import json

# AWS login to use sagemaker endpoints
session = boto3.Session(profile_name='vrt-analytics-engineer-nonsensitive')
sagemaker_session = sagemaker.Session(boto_session=session)
role = sagemaker.get_execution_role(sagemaker_session=sagemaker_session)
client = session.client("sagemaker-runtime")

In [None]:
role

In [None]:
#to load the OPENAI LLM
from langchain import SagemakerEndpoint
from langchain.llms.sagemaker_endpoint import LLMContentHandler
from typing import Dict
import json

""" class HFContentHandler(LLMContentHandler):
    content_type = "application/json"
    accepts = "application/json"

    def transform_input(self, prompt: str, model_kwargs: Dict) -> bytes:
        self.len_prompt = len(prompt)
        input_dict = {
            "inputs": prompt,
            "parameters": model_kwargs
        }
        input_str = json.dumps(input_dict)
        print(input_str)
        return input_str.encode('utf-8')

    def transform_output(self, output: bytes) -> str:
        response_json = output.read()
        res = json.loads(response_json)
        print(res)

        # stripping away the input prompt from the returned response
        ans = res[0]['generated_text'][self.len_prompt:]
        ans = ans[:ans.rfind("Human")].strip()
        return ans """

class HFContentHandler(LLMContentHandler):
    content_type = "application/json"
    accepts = "application/json"

    def transform_input(self, prompt: str, model_kwargs: Dict) -> bytes:
        input_str = json.dumps({"messages": [    { "role": "system", "content": "You are an assistant." },{ "role": "user", "content": "What is gold?" }], **model_kwargs})
        return input_str.encode("utf-8")

    def transform_output(self, output: bytes) -> str:
        response_json = json.loads(output.read().decode("utf-8"))
        return response_json
    
# example parameters
parameters = {
    "model": "meta-llama/Meta-Llama-3-8B-Instruct", # placeholder, needed
    "top_p": 0.6,
    "temperature": 0.9,
    "max_tokens": 512,
    "stop": ["<|eot_id|>"],
}

llm = SagemakerEndpoint(
    endpoint_name="huggingface-pytorch-tgi-inference-2024-07-30-20-23-30-977",
    client=client,
    region_name="eu-west-1",
    model_kwargs=parameters,
    endpoint_kwargs={"CustomAttributes": 'accept_eula=true'},
    content_handler=HFContentHandler(),
)

In [None]:
llm

In [None]:
#Import Dependencies
import streamlit as sl
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

def format_docs(docs):
        return "\n\n".join(doc.page_content for doc in docs)

if __name__=='__main__':
        sl.header("welcome to the pdf bot")
        knowledgeBase=load_knowledgeBase()
        # llm=load_llm()
        prompt=load_prompt()
        
        query=sl.text_input('Enter some text')
        
        
        if(query):
                #getting only the chunks that are similar to the query for llm to produce the output
                similar_embeddings=knowledgeBase.similarity_search(query)
                similar_embeddings=FAISS.from_documents(documents=similar_embeddings, embedding=OpenAIEmbeddings(api_key="Enter your API key"))
                
                #creating the chain for integrating llm,prompt,stroutputparser
                retriever = similar_embeddings.as_retriever()
                rag_chain = (
                        {"context": retriever | format_docs, "question": RunnablePassthrough()}
                        | prompt
                        | llm
                        | StrOutputParser()
                    )
                
                response=rag_chain.invoke(query)
                sl.write(response)

In [None]:

from langchain.prompts import ChatPromptTemplate
from langchain import LLMChain
template = "{content}"

prompt = ChatPromptTemplate.from_template(template)

llm_chain = LLMChain(

     llm=llm,
     prompt=prompt
 )

In [None]:
from langchain.prompts.chat import (
    ChatPromptTemplate,
    HumanMessagePromptTemplate,
    SystemMessagePromptTemplate,
)

from langchain.schema import AIMessage, HumanMessage

template = "Act as an experienced but grumpy high school teacher that teaches {subject}. Always give responses in one sentence with anger."
human_template = "{text}"
 
chat_prompt = ChatPromptTemplate.from_messages(
    [
        SystemMessagePromptTemplate.from_template(template),
        HumanMessage(content="Hello teacher!"),
        AIMessage(content="Welcome everyone!"),
        HumanMessagePromptTemplate.from_template(human_template),
    ]
)
 
messages = chat_prompt.format_messages(
    subject="Artificial Intelligence", text="What is the most powerful AI model?"
)
print(messages)
