## Test the deployed model by invoking some queries against it

**Install the necessary modules**

In [None]:
%pip install --upgrade pip
%pip install -qU langchain-openai

**Declare inference endpoint and other variables** 

In [None]:
# Declare the inference endpoint
inference_endpoint = '<RHOAI_MODEL_ENDPOINT>/v1'
predict_endpoint = inference_endpoint + '/chat/completions'

model_id = 'llama-31b'

system_msg = "Do not engage in additional dialog. Make your answer as concise as possible. You should only be answering one question at a time. "
human_msg = "{question}"
temperature = 0.1

**Create required functions**

In [11]:
from langchain_core.prompts import ChatPromptTemplate, HumanMessagePromptTemplate, PromptTemplate
from langchain_core.messages import SystemMessage
from langchain_openai import ChatOpenAI

def create_chat_model(model_id: str, temp: float, inference_endpoint: str, api_key: str):
  return ChatOpenAI(
    model = model_id,
    temperature = temp,
    base_url = inference_endpoint,
    api_key = api_key,
  )

def create_chat_chain(chat_model: any, system_msg: str, human_msg: str) -> any:
  system_msg = SystemMessage(system_msg)
  human_msg = HumanMessagePromptTemplate.from_template(human_msg)
  prompt = ChatPromptTemplate.from_messages ([ system_msg, human_msg ])

  return prompt | chat_model

def invoke_model(chat_chain: any, input: dict):
  response = chat_chain.invoke(input)
  print(response.content)


**Create chat model and chain**

In [12]:
chat_model = create_chat_model(model_id, temperature, inference_endpoint, 'DUMMY_KEY')
chat_chain = create_chat_chain(chat_model, system_msg, human_msg)

**Query the model with different questions**

In [None]:
invoke_model(chat_chain, "What's the capital of France?")

In [None]:
invoke_model(chat_chain, "What's the capital of Spain?")

In [None]:
invoke_model(chat_chain, "What's the capital of Canada?")

In [None]:
invoke_model(chat_chain, "What's the capital of India?")

In [None]:
invoke_model(chat_chain, "What's the capital of USA?")

In [None]:
invoke_model(chat_chain, "How's milk made?")

In [None]:
invoke_model(chat_chain, "How's pizza made?")

In [None]:
invoke_model(chat_chain, "How's chocolate made?")