# Download and Save the Model

To save this model so that you can use it from various locations, including other notebooks or the model server, upload it to s3-compatible storage.

## Install the required packages and define a function for the upload

## Download from Huggingface

In [1]:
import os

git_repo = "https://huggingface.co/gpt2"


In [2]:
!git clone $git_repo

Cloning into 'gpt2'...
remote: Enumerating objects: 87, done.[K
remote: Counting objects: 100% (21/21), done.[K
remote: Compressing objects: 100% (7/7), done.[K
remote: Total 87 (delta 17), reused 13 (delta 13), pack-reused 66 (from 1)[K
Unpacking objects: 100% (87/87), 1.65 MiB | 7.49 MiB/s, done.


In [None]:
import os

model_name = os.path.basename(git_repo)
model_name

In [4]:
pip install openvino-dev transformers torch onnx

Collecting onnx
  Downloading onnx-1.16.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (15.9 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m15.9/15.9 MB[0m [31m58.5 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
Collecting protobuf>=3.20.2
  Downloading protobuf-5.27.2-cp38-abi3-manylinux2014_x86_64.whl (309 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m309.3/309.3 kB[0m [31m184.8 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: protobuf, onnx
Successfully installed onnx-1.16.1 protobuf-5.27.2

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip available: [0m[31;49m22.2.2[0m[39;49m -> [0m[32;49m24.1.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


## Generate Onnx version of GPT
Download the model from https://huggingface.co/openai-community/gpt2 and convert it into onnx 

In [6]:
import os
from transformers import GPT2Model, GPT2Tokenizer
import torch

# Load the GPT-2 model and tokenizer
model_name = "openai-community/gpt2"
model = GPT2Model.from_pretrained(model_name)
tokenizer = GPT2Tokenizer.from_pretrained(model_name)

# Save the model in the ONNX format
import torch

# Create the 'onnx' directory if it doesn't exist
os.makedirs("gpt2-onnx", exist_ok=True)

# Save the model in the ONNX format
dummy_input = torch.randint(100, (1, 10))  # dummy input for tracing
torch.onnx.export(model, dummy_input, "gpt2-onnx/gpt2.onnx", input_names=["input"], output_names=["output"], dynamic_axes={"input": [0, 1], "output": [0, 1]})

# Convert to IR Format

In [10]:
import subprocess

# Define the command to run the Model Optimizer
mo_command = [
    "mo",
    "--input_model", "gpt2-onnx/gpt2.onnx",
    "--input_shape", "[1,10]",
    "--output_dir", "gpt2_ir"
]

# Run the command
result = subprocess.run(mo_command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)

# Print the output and error (if any)
print("stdout:", result.stdout)
print("stderr:", result.stderr)

stdout: [ INFO ] MO command line tool is considered as the legacy conversion API as of OpenVINO 2023.2 release.
In 2025.0 MO command line tool and openvino.tools.mo.convert_model() will be removed. Please use OpenVINO Model Converter (OVC) or openvino.convert_model(). OVC represents a lightweight alternative of MO and provides simplified model conversion API. 
Find more information about transition from MO to OVC at https://docs.openvino.ai/2023.2/openvino_docs_OV_Converter_UG_prepare_model_convert_model_MO_OVC_transition.html
[ INFO ] Generated IR will be compressed to FP16. If you get lower accuracy, please consider disabling compression explicitly by adding argument --compress_to_fp16=False.
Find more information about compression to FP16 at https://docs.openvino.ai/2023.0/openvino_docs_MO_DG_FP16_Compression.html
[ SUCCESS ] Generated IR version 11 model.
[ SUCCESS ] XML file: /opt/app-root/src/gpt2_ir/gpt2.xml
[ SUCCESS ] BIN file: /opt/app-root/src/gpt2_ir/gpt2.bin

stderr: 


## Helper functions for upload

In [12]:
!pip install boto3 botocore

Collecting boto3
  Downloading boto3-1.34.145-py3-none-any.whl (139 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m139.2/139.2 kB[0m [31m5.2 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting botocore
  Downloading botocore-1.34.145-py3-none-any.whl (12.4 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.4/12.4 MB[0m [31m28.2 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hCollecting s3transfer<0.11.0,>=0.10.0
  Downloading s3transfer-0.10.2-py3-none-any.whl (82 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m82.7/82.7 kB[0m [31m136.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting jmespath<2.0.0,>=0.7.1
  Downloading jmespath-1.0.1-py3-none-any.whl (20 kB)
Collecting urllib3<1.27,>=1.25.4
  Downloading urllib3-1.26.19-py2.py3-none-any.whl (143 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m143.9/143.9 kB[0m [31m161.4 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: urllib3, jmespath, bot

In [15]:
import os
import boto3
import botocore

aws_access_key_id = os.environ.get('AWS_ACCESS_KEY_ID')
aws_secret_access_key = os.environ.get('AWS_SECRET_ACCESS_KEY')
endpoint_url = os.environ.get('AWS_S3_ENDPOINT')
region_name = os.environ.get('AWS_DEFAULT_REGION')
bucket_name = os.environ.get('AWS_S3_BUCKET')

session = boto3.session.Session(aws_access_key_id=aws_access_key_id,
                                aws_secret_access_key=aws_secret_access_key)

s3_resource = session.resource(
    's3',
    config=botocore.client.Config(signature_version='s3v4'),
    endpoint_url=endpoint_url,
    region_name=region_name)

bucket = s3_resource.Bucket(bucket_name)

#upload the model directory without git
def upload_directory_to_s3(local_directory, s3_prefix, remove_safetensors=True):
    for root, dirs, files in os.walk(local_directory):
        for filename in files:
            file_path = os.path.join(root, filename)
            relative_path = os.path.relpath(file_path, local_directory)
            if ".git" in relative_path:
                print(f"skipping {relative_path}")
                continue
            # if remove_safetensors and ".safetensors" in relative_path:
            #     print(f"skipping {relative_path}")
            #     continue
            s3_key = os.path.join(s3_prefix, relative_path)
            print(f"{file_path} -> {s3_key}")
            bucket.upload_file(file_path, s3_key)


def list_objects(prefix):
    filter = bucket.objects.filter(Prefix=prefix)
    for obj in filter.all():
        print(obj.key)

## Check the Storage Bucket

In your S3 bucket, under the `models` upload prefix, run the `list_object` command. As best practice, to avoid mixing up model files, keep only one model and its required files in a given prefix or directory. This practice allows you to download and serve a directory with all the files that a model requires. 

If this is the first time running the code, this cell will have no output or the fraud model from the predictive AI/ML exercise.


In [16]:
list_objects("models")

## Upload and check again

Use the function to upload the `models` folder in a rescursive fashion:

In [17]:
model_name = "gpt2_ir"
upload_directory_to_s3(model_name, f"models/{model_name}")

gpt2_ir/gpt2.xml -> models/gpt2_ir/gpt2.xml
gpt2_ir/gpt2.bin -> models/gpt2_ir/gpt2.bin


In [22]:
model_name = "gpt2-onnx"
upload_directory_to_s3(model_name, f"models/{model_name}")

gpt2-onnx/gpt2.onnx -> models/gpt2-onnx/gpt2.onnx


To confirm this worked, run the `list_objects` function again:

This time, you should see files listed in the directory/prefix


In [20]:
list_objects("models")

models/gpt2_ir/gpt2.bin
models/gpt2_ir/gpt2.xml


### Next Step

Now that you've saved the model to s3 storage, you can refer to the model by using the same data connection to serve the model as an API.
