# Sample Notebook on how to run inference using `CodeT5+ 770M`


In [21]:
import transformers
import torch
print(transformers.__version__)
print(torch.__version__)
#assert transformers.__version__ == "4.12.3", f"wrong transformers version: {transformers.__version__}"
#assert "1.9.1" in torch.__version__  , f"wrong torch version: {torch.__version__}"

4.25.1
1.13.0+cu117


## Download the `codet5p_770m` from s3 

In [2]:
"""
import os, boto3
def download_file_from_s3(bucket_name=None, src_loc=None, dest_loc=None):
    s3 = boto3.resource("s3")
    bucket = s3.Bucket(bucket_name)
    for obj in bucket.objects.filter(Prefix=src_loc):
        target = obj.key if dest_loc is None \
            else os.path.join(dest_loc, os.path.relpath(obj.key, src_loc))
        if not os.path.exists(os.path.dirname(target)):
            os.makedirs(os.path.dirname(target))
        if obj.key[-1] == '/':
            continue
        bucket.download_file(obj.key, target)

download_file_from_s3('foundingblock', 'vast.ai/60eps', './codet5p_770m/')
"""

'\nimport os, boto3\ndef download_file_from_s3(bucket_name=None, src_loc=None, dest_loc=None):\n    s3 = boto3.resource("s3")\n    bucket = s3.Bucket(bucket_name)\n    for obj in bucket.objects.filter(Prefix=src_loc):\n        target = obj.key if dest_loc is None             else os.path.join(dest_loc, os.path.relpath(obj.key, src_loc))\n        if not os.path.exists(os.path.dirname(target)):\n            os.makedirs(os.path.dirname(target))\n        if obj.key[-1] == \'/\':\n            continue\n        bucket.download_file(obj.key, target)\n\ndownload_file_from_s3(\'foundingblock\', \'vast.ai/60eps\', \'./codet5p_770m/\')\n'

# Load `codeT5+ 770M` using `torch.load`

loading the model with `torch.load` took 7.7s

In [3]:
from transformers import AutoTokenizer,T5ForConditionalGeneration
import torch


In [None]:
# Load downloaded model and save as torch model
model = T5ForConditionalGeneration.from_pretrained('./codet5p_770m/')
torch.save(model, "./codet5p_770m/model.pt")
tokenizer.save_pretrained('./codet5p_770m/')


In [4]:
model = torch.load("./codet5p_770m/model.pt")
tokenizer = AutoTokenizer.from_pretrained("./codet5p_770m/")

In [5]:
from transformers import pipeline

#tokenizer = AutoTokenizer.from_pretrained("EleutherAI/gpt-j-6B")
gen = pipeline("text2text-generation",model=model,tokenizer=tokenizer,device=0)

In [6]:

prompt = "question: Show the name and the release year of the song by the youngest singer. schema:  | concert_singer | stadium : stadium_id, location, name, capacity, highest, lowest, average | singer : singer_id, name, country, song_name, song_release_year, age, is_male | concert : concert_id, concert_name, theme, stadium_id, year | singer_in_concert : concert_id, singer_id)"

gen(prompt)

[{'generated_text': 'SELECT song_name ,  song_release_year FROM singer ORDER BY age ASC'}]

# Test text2sql interpreter

In [7]:
import sys, os
sys.path.append('code')
from inference import model_fn
generator = model_fn("./codet5p_770m/")


In [8]:
from text2sql.evaluation_examples import examples
example = examples.examples[0]
db_id, create_table_sql, question, query = list(example.values())
inputs = [db_id, create_table_sql, question]

In [None]:
print(inputs)

In [9]:
generator(inputs)

./station_weather/station_weather.sqlite

            PRAGMA foreign_keys = ON;


            CREATE TABLE "train" (
                "id" int,
                "train_number" int,
                "name" text,
                "origin" text,
                "destination" text,
                "time" text,
                "interval" text,
                primary key ("id")
            );
table "train" already exists
./station_weather/station_weather.sqlite SELECT LOCAL_Authorities ,  LOCAL_Services FROM station
no such column: LOCAL_Authorities


[{'generated_text': 'SELECT LOCAL_Authorities ,  LOCAL_Services FROM station',
  'Result': None}]

# Creating `model.tar.gz` for sagemaker deployment

In [10]:
import tarfile
import os

def compress(tar_dir=None,output_file="model.tar.gz"):
    with tarfile.open(output_file, "w:gz") as tar:
        tar.add(tar_dir, arcname=os.path.sep)
            

import boto3

def upload_file_to_s3(bucket_name=None,file_name="model.tar.gz",key_prefix=""):
    s3 = boto3.resource('s3')
    key_prefix_with_file_name = os.path.join(key_prefix,file_name)
    s3.Bucket(bucket_name).upload_file(file_name, key_prefix_with_file_name)
    return f"s3://{bucket_name}/{key_prefix_with_file_name}"

In [18]:
import os
import shutil 
import tarfile
import torch
from transformers import AutoTokenizer,GPTJForCausalLM

def compress(tar_dir=None,output_file="model.tar.gz"):
    with tarfile.open(output_file, "w:gz") as tar:
        tar.add(tar_dir, arcname=os.path.sep)
            

import boto3

def upload_file_to_s3(bucket_name=None,file_name="model.tar.gz",key_prefix=""):
    s3 = boto3.resource('s3')
    key_prefix_with_file_name = os.path.join(key_prefix,file_name)
    s3.Bucket(bucket_name).upload_file(file_name, key_prefix_with_file_name)
    return f"s3://{bucket_name}/{key_prefix_with_file_name}"

bucket_name="model-stash"
key_prefix = "codet5p_770m"
checkpoint = "./codet5p_770m/"
model_save_dir = f"./tmp_{key_prefix}"
src_inference_script = "code"
dst_inference_script = os.path.join(model_save_dir, "code")

os.makedirs(model_save_dir, exist_ok=True)
#os.makedirs(dst_inference_script, exist_ok=True)

# load model
print("Loading model from `./codet5p_770m/`")
model = T5ForConditionalGeneration.from_pretrained(checkpoint)

print("saving model with `torch.save`")
torch.save(model, os.path.join(model_save_dir, f"model.pt"))

print("saving tokenizer")
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
tokenizer.save_pretrained(model_save_dir)

# copy inference script
print("copying 'code' directory")
shutil.copytree(src_inference_script, dst_inference_script)

# create archive
print("creating `model.tar.gz` archive")
compress(model_save_dir)

# upload to s3
print(
    f"uploading `model.tar.gz` archive to s3://{bucket_name}/{key_prefix}/model.tar.gz"
)
model_uri = upload_file_to_s3(bucket_name=bucket_name, key_prefix=key_prefix)
print(f"Successfully uploaded to {model_uri}")

model_uri


Loading model from `./codet5p_770m/`
saving model with `torch.save`
saving tokenizer
copying inference.py script
creating `model.tar.gz` archive
uploading `model.tar.gz` archive to s3://foundingblock/codet5p_770m/model.tar.gz
Successfully uploaded to s3://foundingblock/codet5p_770m/model.tar.gz


's3://foundingblock/codet5p_770m/model.tar.gz'

(Optional) use **bash scripting** to upload compressed model file to the bucket

In [19]:
%bash
tar zcvf model.tar.gz *
aws s3 cp model.tar.gz s3://model-stash/codet5p_770m/model.tar.gz


SyntaxError: invalid syntax (645606951.py, line 2)

## Deploy endpoint

In [9]:
!python -m pip install sagemaker boto3 torch==1.13.1 transformers==4.26 protobuf==3.20.0

You should consider upgrading via the '/Users/weichaozhou/Workspace/virtualenv_legionai/bin/python -m pip install --upgrade pip' command.[0m[33m
[0m

In [14]:
import sagemaker, boto3

iam_client = boto3.client('iam')
role = iam_client.get_role(RoleName='AmazonSageMaker-ExecutionRole-20230622T164667')['Role']['Arn']
sess = sagemaker.Session()

In [25]:
from sagemaker.huggingface import HuggingFaceModel
import boto3
import os

os.environ["AWS_DEFAULT_REGION"]="us-east-2"
 
model_uri="s3://model-stash/codet5p_770m/model.tar.gz"
 
image_uri="763104351884.dkr.ecr.us-east-2.amazonaws.com/huggingface-pytorch-training:1.13.1-transformers4.26.0-gpu-py39-cu117-ubuntu20.04"
# create Hugging Face Model Class
huggingface_model = HuggingFaceModel(
    image_uri = image_uri,
    model_data=model_uri,
	#transformers_version='4.26',
	#pytorch_version='1.13.1',
	#py_version='py39',
	role=role, 
)


# deploy model to SageMaker Inference
predictor = huggingface_model.deploy(
	initial_instance_count=1, # number of instances
	instance_type='ml.m4.4xlarge'  # ec2 instance type
)


-----------------------------------------------------------*

UnexpectedStatusException: Error hosting endpoint huggingface-pytorch-training-2023-06-23-20-07-53-554: Failed. Reason: The primary container for production variant AllTraffic did not pass the ping health check. Please check CloudWatch logs for this endpoint..

In [None]:
from text2sql.evaluation_examples import examples
example = examples.examples[0]
db_id, create_table_sql, question, query = list(example.values())
inputs = [db_id, create_table_sql, question]


In [None]:
print(inputs)

In [None]:
predictor.predict({
	'inputs': inputs
})

parameterized request

In [23]:
predictor.predict({
	'inputs': "Can you please let us know more details about your ",
  "parameters" : {
    "min_length": 120,
    "temperature": 0.9,
  }
})

[{'generated_text': 'Can you please let us know more details about your \nissue?\n\nA:\n\nThe problem was caused by my lack of understanding on how web sockets \n  worked. Once I understood how they work; I was able to fix'}]

custom end of sequence token. 

In [None]:
from transformers import AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained("EleutherAI/gpt-j-6B")

end_sequence="."
temparature=40
max_generated_token_length=50
input="Can you please let us know more details about your "

predictor.predict({
	'inputs': input,
  "parameters" : {
    "min_length": int(len(input) + max_generated_token_length),
    "temperature":temparature,
    "eos_token_id": tokenizer.convert_tokens_to_ids(end_sequence)
  }
})

In [32]:
predictor.delete_endpoint()