# 部署语言模型

## 上传本地代码到 S3

In [None]:
!sh ./deploy_s3.sh

## 在SageMaker上部署模型

In [1]:
import boto3  
from sagemaker.huggingface.model import HuggingFaceModel


s3_model = "s3://cloudbeer-llm-models/llm/chatglm2-6b-model.tar.gz"

iam_client = boto3.client('iam')
role = iam_client.get_role(RoleName='HuggingfaceExecuteRole')['Role']['Arn']

huggingface_model = HuggingFaceModel(
  model_data=s3_model,
  role=role,
	transformers_version='4.26',
	pytorch_version='1.13',
	py_version='py39',
  entry_point='inference.py',
  source_dir='src/code',
)

predictor = huggingface_model.deploy(
  initial_instance_count=1,
  instance_type='ml.g4dn.2xlarge',
  endpoint_name='llm-models',
)

---------!

## 删除模型

In [None]:
from sagemaker.huggingface.model import HuggingFacePredictor

predictor = HuggingFacePredictor(
  endpoint_name='llm-models'
)

predictor.delete_model()
predictor.delete_endpoint()