In [1]:
import torch
import torch_neuronx
print(f"‚úÖ PyTorch: {torch.__version__}")
print(f"‚úÖ Neuron SDK: {torch_neuronx.__version__}")

‚úÖ PyTorch: 2.9.0+cu128
‚úÖ Neuron SDK: 2.9.0.2.11.19912+e48cd891


In [2]:
import os

# The lifecycle script copies artifacts here so they are visible inside the Docker kernel
artifact_path = '/home/ec2-user/SageMaker/neuron-compiled-models'

if os.path.exists(artifact_path):
    print(f"\n‚úÖ Found compilation artifacts at: {artifact_path}")
    
    print("\nüìÅ Compilation output directory:")
    !ls -lh {artifact_path}/output/
    
    print("\nüìÑ Original model files:")
    !ls -lh {artifact_path}/model/
else:
    print(f"\n‚ö†Ô∏è Artifacts not found at {artifact_path}")
    print("If the lifecycle script is still running, wait a few minutes and try again.")


‚úÖ Found compilation artifacts at: /home/ec2-user/SageMaker/neuron-compiled-models

üìÅ Compilation output directory:
total 81M
-rw-r--r-- 1 ubuntu ubuntu 81M Jan 20 00:30 compiled_model.pt
-rw-r--r-- 1 ubuntu ubuntu 213 Jan 20 00:30 result.json

üìÑ Original model files:
total 88M
-rw-r--r-- 1 ubuntu ubuntu  978 Jan 20 00:30 config.json
-rw-r--r-- 1 ubuntu ubuntu  87M Jan 20 00:30 model.safetensors
-rw-r--r-- 1 ubuntu ubuntu  695 Jan 20 00:30 special_tokens_map.json
-rw-r--r-- 1 ubuntu ubuntu 695K Jan 20 00:30 tokenizer.json
-rw-r--r-- 1 ubuntu ubuntu 1.5K Jan 20 00:30 tokenizer_config.json
-rw-r--r-- 1 ubuntu ubuntu 227K Jan 20 00:30 vocab.txt


In [3]:
import json
import os

# Path to the persistent artifact directory
result_path = '/home/ec2-user/SageMaker/neuron-compiled-models/output/result.json'

try:
    if os.path.exists(result_path):
        with open(result_path, 'r') as f:
            result = json.load(f)
        
        print("üìä Compilation Result:")
        print(json.dumps(result, indent=2))
    else:
        print(f"‚ö†Ô∏è Result file not found at: {result_path}")
        print("The lifecycle script may still be running, or compilation failed.")
        
except Exception as e:
    print(f"‚ùå Error reading result: {e}")

üìä Compilation Result:
{
  "status": "COMPATIBLE",
  "message": "Model compiled and loaded successfully",
  "torch_neuronx_version": "2.9.0.2.11.19912+e48cd891",
  "pytorch_version": "2.9.0+cu128",
  "environment": "Docker Neuron DLC"
}


In [4]:
import json
import os

result_path = '/home/ec2-user/SageMaker/neuron-compiled-models/output/result.json'

try:
    if os.path.exists(result_path):
        with open(result_path, 'r') as f:
            result = json.load(f)

        if result.get('status') == 'COMPATIBLE':
            print("\n‚úÖ Your model compiled successfully!")
            
            if 'input_shape' in result:
                print(f"üìê Input Shape: {result['input_shape']}")
            if 'model_type' in result:
                print(f"ü§ñ Model Type: {result['model_type']}")
            if 'detected_architecture' in result:
                print(f"üèóÔ∏è  Architecture: {result['detected_architecture']}")
            
            # The output path in result.json might refer to the compile-time path
            # We print the path accessible to this notebook
            model_path = '/home/ec2-user/SageMaker/neuron-compiled-models/output/compiled_model.pt'
            print(f"üíæ Compiled Model: {model_path}")
            
            print(f"\n{result.get('message', 'Compilation completed')}")
        else:
            print(f"\n‚ùå Compilation status: {result.get('status')}")
            # Check various error fields
            error = result.get('error') or result.get('error_message') or result.get('message', 'N/A')
            print(f"Error: {error}")
            print("\n‚ö†Ô∏è  Stop here - compilation failed.")
    else:
        print(f"‚ö†Ô∏è Result file not found at: {result_path}")
        print("Compilation may still be running. Wait a few minutes and re-run this cell.")

except Exception as e:
    print


‚úÖ Your model compiled successfully!
üíæ Compiled Model: /home/ec2-user/SageMaker/neuron-compiled-models/output/compiled_model.pt

Model compiled and loaded successfully


In [5]:
import torch
import torch_neuronx
from transformers import AutoTokenizer
import os

os.environ["TQDM_DISABLE"] = "1"

# Path where artifacts are persisted
base_path = '/home/ec2-user/SageMaker/neuron-compiled-models'
model_path = f'{base_path}/output/compiled_model.pt'
tokenizer_path = f'{base_path}/model/'

print(f"Loading compiled model from {model_path}...")

# Load the Neuron-compiled model
try:
    compiled_model = torch.jit.load(model_path)
    print("‚úÖ Compiled model loaded successfully!")
except Exception as e:
    print(f"‚ùå Failed to load compiled model: {e}")
    # Don't raise immediately, let's try loading tokenizer too for debugging
    pass

# Load tokenizer from original model files
print(f"Loading tokenizer from {tokenizer_path}...")
try:
    tokenizer = AutoTokenizer.from_pretrained(tokenizer_path)
    print("‚úÖ Tokenizer loaded successfully!")
except Exception as e:
    print(f"‚ùå Failed to load tokenizer: {e}")
    
if 'compiled_model' in locals() and 'tokenizer' in locals():
    print("\n‚úÖ Model and Tokenizer ready for inference on Neuron!")

  from .autonotebook import tqdm as notebook_tqdm


Loading compiled model from /home/ec2-user/SageMaker/neuron-compiled-models/output/compiled_model.pt...
‚úÖ Compiled model loaded successfully!
Loading tokenizer from /home/ec2-user/SageMaker/neuron-compiled-models/model/...
‚úÖ Tokenizer loaded successfully!

‚úÖ Model and Tokenizer ready for inference on Neuron!
