### 1. Setup development environment

In [29]:
import sagemaker
from sagemaker.huggingface import HuggingFaceModel, get_huggingface_llm_image_uri
import time

sagemaker_session = sagemaker.Session()
region = sagemaker_session.boto_region_name
role = sagemaker.get_execution_role()

print(f"role arn: {role}")

role arn: arn:aws:iam::532805286864:role/AdminRole


### 2. Retrieve the new Hugging Face

In [30]:
llm_image = get_huggingface_llm_image_uri(
  backend="huggingface", # or lmi
  region=region
)

# print ecr image uri
print(f"llm image uri: {llm_image}")

llm image uri: 763104351884.dkr.ecr.ap-northeast-2.amazonaws.com/huggingface-pytorch-tgi-inference:2.1.1-tgi1.3.1-gpu-py310-cu121-ubuntu20.04


### 3. Deploy

In [31]:
import json
from sagemaker.huggingface import HuggingFaceModel

# sagemaker config
instance_type = "ml.g5.2xlarge"
number_of_gpu = 1
health_check_timeout = 900

# Define Model and Endpoint configuration parameter
hub = {
	'HF_MODEL_ID':'meta-llama/Llama-2-7b-chat-hf',
	'SM_NUM_GPUS': json.dumps(1),
	'HUGGING_FACE_HUB_TOKEN': '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'
}

# check if token is set
# assert hub['HUGGING_FACE_HUB_TOKEN'] != "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", "You have to provide a token."

# create HuggingFaceModel with the image uri
llm_model = HuggingFaceModel(
  role=role,
  image_uri=llm_image,
  env=hub
)


In [32]:
llm = llm_model.deploy(
  initial_instance_count=1,
  instance_type=instance_type,
  container_startup_health_check_timeout=health_check_timeout
)

----------!

In [None]:
def build_llama2_prompt(messages):
  startPrompt = "<s>[INST] "
  endPrompt = " [/INST]"
  conversation = []
  for index, message in enumerate(messages):
    if message['role'] == "system" and index == 0:
      conversation.append(f"<<SYS>>\n{message['content']}\n<<</SYS>>\n\n")
    elif message['role'] == "user":
      conversation.append(message['content'].strip())
    else:
      conversation.append(f" [/INST] {message['content'].strip()}</s><s>[INST] ")
  
  return startPrompt + "".join(conversation) + endPrompt

messages = [
  { "role": "system", "content": "You are a friendly and knowledgeable vacation planning assistant named Clara. Your goal is to have natural conversations with users to help them plan their perfect vacation. "}
]