In [3]:
# create boto3 session for interacting with AWS
import boto3
session = boto3.Session(region_name='us-east-1')
# create bedrock client
br_client = session.client('bedrock-runtime')


In [14]:
# Create LLM Model using claude-v2 model and langchain
from langchain.chat_models import BedrockChat
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler

# https://python.langchain.com/docs/integrations/chat/bedrock
chat_model = BedrockChat(
  model_id="anthropic.claude-v2",
  client=br_client,
  model_kwargs={
    "max_tokens_to_sample": 512,
    "temperature": 0.9,
    "top_p": 0.9,
    "top_k": 50,
  },
)


In [None]:
import gradio as gr
from operator import itemgetter
from langchain.prompts import MessagesPlaceholder, ChatPromptTemplate
from langchain.memory import ConversationBufferMemory
from langchain.schema.runnable import RunnableLambda, RunnablePassthrough

# define a prompt template
prompt = """You are a helpful AI assistant"""

chat_prompt = ChatPromptTemplate.from_messages(
  [
    ("system", prompt),
    MessagesPlaceholder(variable_name="history"),
    ("human", "{input}" )
  ])

# Create conversation history
memory = ConversationBufferMemory(memory_key="history", return_messages=True)

chain = ( RunnablePassthrough.assign(
          history=RunnableLambda(memory.load_memory_variables) | itemgetter("history")
        )
        | chat_prompt
        | chat_model
)

def stream_response(input, history):
  if input is not None:
    partial_message = ""
    for response in chain.stream({"input": input}):
      partial_message += response.content
      yield partial_message

# https://www.gradio.app/docs/chatinterface
gr.ChatInterface(stream_response, title="Chat with Claude").queue().launch()

In [None]:
""