In [None]:
!export OPENAI_API_KEY="openai-apikey" 

In [None]:
%%writefile inference.py

import os
import json
import logging
import traceback
from flask import Flask, request, jsonify
import autogen
from autogen import AssistantAgent, UserProxyAgent

# Configure logging - SageMaker expects logs on stdout/stderr
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s [%(levelname)s] %(message)s',
    handlers=[logging.StreamHandler()]
)
logger = logging.getLogger(__name__)

# Initialize Flask application
app = Flask(__name__)

# Get API key and model settings from environment variables with defaults
OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY", "openai-apikey")
MODEL_NAME = os.environ.get("MODEL_NAME", "gpt-3.5-turbo")
MAX_TOKENS = int(os.environ.get("MAX_TOKENS", "1000"))
TEMPERATURE = float(os.environ.get("TEMPERATURE", "0.7"))

# Set environment variable to disable Docker usage
os.environ["AUTOGEN_USE_DOCKER"] = "0"

# ReAct prompt for legal document analysis
REACT_PROMPT = """
You are a legal expert tasked with helping users review and plan legal documents. 
You can analyze clauses, detect potential issues, and suggest improvements for legal soundness. 
Use the following format:

Question: the input question or request
Thought: you should always think about what to do
Action: the action to take (if any)
Action Input: the input to the action (e.g., search query)
Observation: the result of the action
... (this process can repeat multiple times)
Thought: I now know the final answer
Final Answer: the final answer to the original input question or request

Begin!
Question: {input}
"""

def get_llm_config():
    """Create and return LLM configuration for AutoGen"""
    try:
        # Check if API key is set
        if not OPENAI_API_KEY:
            raise ValueError("OPENAI_API_KEY environment variable is not set")
            
        # Create config list directly without helper function
        config_list = [
            {
                "model": MODEL_NAME,
                "cache_seed": None,
                "api_key": OPENAI_API_KEY
            }
        ]
        
        # Create LLM config
        llm_config = {
            "config_list": config_list,
            "max_tokens": MAX_TOKENS,
            "temperature": TEMPERATURE
        }
        
        logger.info(f"LLM config created successfully using model: {MODEL_NAME}")
        return llm_config
    except Exception as e:
        logger.error(f"Error creating LLM config: {str(e)}")
        logger.error(traceback.format_exc())
        raise

def create_agents():
    """Create and configure AutoGen agents"""
    try:
        # Get LLM config
        llm_config = get_llm_config()
        
        # Configure code execution to not use Docker
        code_execution_config = {
            "use_docker": False,  # Explicitly disable Docker
            "work_dir": "/tmp/autogen"  # Use a writable directory in SageMaker
        }
        
        # Assistant configuration
        assistant = AssistantAgent(
            name="LegalPlannerAssistant",
            system_message="You are a legal planner tasked with analyzing legal documents and planning legal actions. Use the ReAct framework provided.",
            llm_config=llm_config
        )
        
        # User proxy configuration
        user_proxy = UserProxyAgent(
            name="User",
            human_input_mode="NEVER",
            max_consecutive_auto_reply=5,
            is_termination_msg=lambda x: x.get("content", "") and "TERMINATE" in x.get("content", ""),
            code_execution_config=code_execution_config  # Apply code execution config
        )
        
        logger.info("Agents created successfully with Docker disabled")
        return assistant, user_proxy
    except Exception as e:
        logger.error(f"Error creating agents: {str(e)}")
        logger.error(traceback.format_exc())
        raise

def generate_response(text_input):
    """Generate response using AutoGen agents with ReAct prompting"""
    try:
        logger.info("Generating response for input")
        assistant, user_proxy = create_agents()
        
        # Format the input with ReAct prompt
        formatted_input = REACT_PROMPT.format(input=text_input)
        
        # Initiate chat with the formatted input
        user_proxy.initiate_chat(
            assistant,
            message=formatted_input
        )
        
        # Extract response from the conversation
        conversation = user_proxy.chat_messages[assistant.name]
        response = conversation[-1]["content"] if conversation else "No response generated"
        
        logger.info("Response generated successfully")
        return response
    except Exception as e:
        error_msg = f"Error generating response: {str(e)}"
        logger.error(error_msg)
        logger.error(traceback.format_exc())
        return error_msg

@app.route("/ping", methods=["GET"])
def ping():
    """Health check endpoint required by SageMaker"""
    try:
        # Check if API key is available
        if not OPENAI_API_KEY:
            logger.warning("OPENAI_API_KEY is not set")
            return jsonify({"status": "unhealthy", "reason": "Missing OpenAI API key"}), 500
        
        # Validate model configuration
        if not MODEL_NAME:
            logger.warning("MODEL_NAME is not set")
            return jsonify({"status": "unhealthy", "reason": "Missing model name"}), 500
            
        logger.info("Health check passed")
        return jsonify({"status": "healthy"}), 200
    except Exception as e:
        logger.error(f"Health check failed: {str(e)}")
        return jsonify({"status": "unhealthy", "reason": str(e)}), 500

@app.route("/invocations", methods=["POST"])
def invoke():
    """Inference endpoint for SageMaker"""
    try:
        # Parse input data from request
        if request.content_type == 'application/json':
            data = request.get_json()
            if not data:
                return jsonify({"error": "Empty request body"}), 400
                
            text_input = data.get("input", "")
            if not text_input:
                return jsonify({"error": "Missing 'input' field in request"}), 400
                
        else:
            return jsonify({"error": "Unsupported content type. Use application/json"}), 415
        
        # Log the input (truncated for security/privacy)
        input_preview = text_input[:100] + "..." if len(text_input) > 100 else text_input
        logger.info(f"Received input: {input_preview}")
        
        # Generate response
        response = generate_response(text_input)
        
        # Return the response with proper Content-Type for SageMaker
        return jsonify({"response": response})
    except Exception as e:
        error_msg = f"Error during inference: {str(e)}"
        logger.error(error_msg)
        logger.error(traceback.format_exc())
        return jsonify({"error": error_msg}), 500

# SageMaker specific handling for production
if __name__ == "__main__":
    # Get port from environment variable or use default
    port = int(os.environ.get("PORT", 8080))
    
    # In production, SageMaker expects the app to listen on 0.0.0.0 and port 8080
    app.run(host="0.0.0.0", port=port, debug=False)

Writing inference.py


In [3]:
%%writefile requirements.txt

flask==2.3.3
gunicorn==21.2.0
pyautogen==0.7.5
openai==1.64.0
boto3==1.28.38
botocore==1.31.38

Writing requirements.txt


In [4]:
%%writefile Dockerfile

FROM python:3.9-slim

# Set working directory
WORKDIR /app

# Install system dependencies
RUN apt-get update && apt-get install -y \
    build-essential \
    && rm -rf /var/lib/apt/lists/*

# Copy application files
COPY inference.py /app/
COPY requirements.txt /app/

# Create the model directory that SageMaker expects
RUN mkdir -p /opt/ml/model

# Create and set permissions for cache directories
RUN mkdir -p /.cache && chmod 777 /.cache

# Create a writable directory for AutoGen's code execution
RUN mkdir -p /tmp/autogen && chmod 777 /tmp/autogen

# Install dependencies with extra debugging
RUN pip install --no-cache-dir -r requirements.txt && \
    pip list

# Create logs directory
RUN mkdir -p /var/log/autogen && chmod 777 /var/log/autogen

# Create a serve script that SageMaker expects
RUN echo '#!/bin/bash\ncd /app && gunicorn --bind 0.0.0.0:8080 --timeout 300 --workers 1 --log-level debug inference:app' > /usr/local/bin/serve && \
    chmod +x /usr/local/bin/serve

# Set cache directory to a writable location
ENV TRANSFORMERS_CACHE="/tmp/cache"
ENV HF_HOME="/tmp/cache"

# Environment variables
ENV MODEL_NAME="gpt-3.5-turbo"
ENV MAX_TOKENS="1000"
ENV TEMPERATURE="0.7"
ENV PYTHONUNBUFFERED=1
ENV AUTOGEN_USE_DOCKER="0"

# Make sure serve is in PATH
ENV PATH="/usr/local/bin:${PATH}"

# Expose the port
EXPOSE 8080

# Set working directory as per SageMaker requirements
WORKDIR /app

# Command to run when container starts
CMD ["serve"]

Writing Dockerfile


In [None]:
# Build the Docker image
!docker build -t llm-autogen-check-claude:latest .

# Tag the image for ECR
!docker tag llm-autogen-check-claude:latest 311141549115.dkr.ecr.us-east-1.amazonaws.com/llm-autogen-check-claude:latest

# Push to ECR
!docker push 311141549115.dkr.ecr.us-east-1.amazonaws.com/llm-autogen-check-claude:latest

In [None]:
import boto3
import sagemaker
import json
from sagemaker.model import Model
from sagemaker.serverless import ServerlessInferenceConfig

def deploy_serverless_endpoint():
    """
    Deploy a SageMaker serverless endpoint and print endpoint details.
    """
    # Configurations
    ecr_uri = "311141549115.dkr.ecr.us-east-1.amazonaws.com/llm-autogen-check-claude:latest"
    endpoint_name = "27legal-autogen-endpoint-version3-claude"
    role_arn = "arn:aws:iam::311141549115:role/Autogen-Multiagent"  # Replace
    memory_size = 2048
    max_concurrency = 5
    region = "us-east-1"

    # Initialize SageMaker session
    boto_session = boto3.Session(region_name=region)
    sagemaker_session = sagemaker.Session(boto_session=boto_session)

    print(f"🔹 Using role ARN: {role_arn}")
    print(f"🔹 ECR URI: {ecr_uri}")

    # Environment Variables
    environment = {
        "MODEL_NAME": "gpt-3.5-turbo",
        "MAX_TOKENS": "1000",
        "TEMPERATURE": "0.7",
        "OPENAI_API_KEY": "openai-apikey"  # Replace with actual key
    }

    # Create Model
    model = Model(
        image_uri=ecr_uri,
        role=role_arn,
        env=environment,
        name=endpoint_name
    )

    # Serverless Config
    serverless_config = ServerlessInferenceConfig(
        memory_size_in_mb=memory_size,
        max_concurrency=max_concurrency
    )

    # Deploy Model
    predictor = model.deploy(
        serverless_inference_config=serverless_config,
        endpoint_name=endpoint_name
    )

    # Print Deployment Info
    print(f"✅ **SageMaker Serverless Endpoint Created!**")
    print(f"🔹 **Endpoint Name**: {endpoint_name}")
    print(f"🔹 **Status**: InService (Check SageMaker Console)")
    print(f"🔹 **ARN**: arn:aws:sagemaker:{region}:311141549115:endpoint/{endpoint_name}")
    print(f"🔹 **Invoke URL**: https://runtime.sagemaker.{region}.amazonaws.com/endpoints/{endpoint_name}/invocations")

    return predictor

# Run Deployment
predictor = deploy_serverless_endpoint()



sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/ec2-user/.config/sagemaker/config.yaml


🔹 Using role ARN: arn:aws:iam::311141549115:role/Autogen-Multiagent
🔹 ECR URI: 311141549115.dkr.ecr.us-east-1.amazonaws.com/llm-autogen-check-claude:latest


---------------!✅ **SageMaker Serverless Endpoint Created!**
🔹 **Endpoint Name**: 27legal-autogen-endpoint-version3-claude
🔹 **Status**: InService (Check SageMaker Console)
🔹 **ARN**: arn:aws:sagemaker:us-east-1:311141549115:endpoint/27legal-autogen-endpoint-version3-claude
🔹 **Invoke URL**: https://runtime.sagemaker.us-east-1.amazonaws.com/endpoints/27legal-autogen-endpoint-version3-claude/invocations


In [7]:
import boto3
import json

def test_endpoint():
    """
    Test the deployed endpoint with a sample input.
    """
    endpoint_name = "27legal-autogen-endpoint-version3-claude"
    region = "us-east-1"

    sample_input = """
    Analyze the following legal document for potential issues:

    This Agreement is made effective as of March 1st, 2024, by and between Omega Software Corp. ("Party A") 
    and Horizon Data Solutions ("Party B").

    1. Termination: Either party may terminate this Agreement with 60 days' notice. 
       In the case of a breach of any material obligation by Party B, Party A can terminate with 7 days' notice.

    2. Confidentiality: Both parties agree to maintain confidentiality regarding any proprietary information 
       for the duration of the Agreement and 5 years following its termination.
    """

    # Initialize runtime client
    runtime = boto3.client('sagemaker-runtime', region_name=region)

    # Prepare test payload
    payload = json.dumps({"input": sample_input})

    print(f"🔹 Testing endpoint: {endpoint_name}...")

    # Invoke endpoint
    try:
        response = runtime.invoke_endpoint(
            EndpointName=endpoint_name,
            ContentType='application/json',
            Body=payload
        )

        # Parse response
        result = json.loads(response['Body'].read().decode())
        print("✅ **Endpoint Test Successful!**")
        print(f"🔹 **Response:**\n{json.dumps(result, indent=2)}")
        return result
    except Exception as e:
        print(f"❌ **Error testing endpoint:** {str(e)}")
        return None

# Run Test
test_result = test_endpoint()

🔹 Testing endpoint: 27legal-autogen-endpoint-version3-claude...
✅ **Endpoint Test Successful!**
🔹 **Response:**
{
  "response": "Error generating response: Error code: 401 - {'error': {'message': 'Incorrect API key provided: sk-proj-********************************************************************************************************************************************************lwEA. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}"
}
