In [1]:
import torch
from transformers import T5ForConditionalGeneration, T5Tokenizer

def run_encoder_decoder_inference(input_text: str, model_name: str = "t5-small"):
    """
    Loads a pre-trained encoder-decoder model, performs inference,
    and prints the input and generated output.

    Args:
        input_text (str): The text to be processed by the model.
        model_name (str): The name of the pre-trained T5 model to use
                          (e.g., "t5-small", "t5-base").
    """
    print(f"Loading tokenizer and model: {model_name}...")
    tokenizer = T5Tokenizer.from_pretrained(model_name)
    model = T5ForConditionalGeneration.from_pretrained(model_name)
    print("Model loaded successfully!")

    # --- Step 1: Prepare Input ---
    # T5 models often expect a task prefix, e.g., "summarize: " for summarization
    task_prefix = "summarize: "
    full_input = task_prefix + input_text

    print(f"\n--- Input Text ---")
    print(full_input)

    # Encode the input text
    # return_tensors='pt' ensures PyTorch tensors are returned
    input_ids = tokenizer(full_input, return_tensors="pt").input_ids

    # --- Step 2: Generate Output ---
    print(f"\n--- Generating Output (Max length: 150, Min length: 40) ---")
    # The 'generate' method handles the encoder-decoder attention mechanisms
    # and beam search for quality output.
    # You can adjust parameters like max_length, min_length, num_beams, etc.
    output_ids = model.generate(
        input_ids,
        max_length=150,  # Maximum length of the generated summary
        min_length=40,   # Minimum length of the generated summary
        num_beams=4,     # Number of beams for beam search (higher means better quality but slower)
        early_stopping=True # Stop when all hypotheses have reached the end of the sentence
    )

    # --- Step 3: Decode Output ---
    # Decode the generated IDs back to human-readable text
    generated_text = tokenizer.decode(output_ids[0], skip_special_tokens=True)

    print(f"\n--- Generated Output ---")
    print(generated_text)

    # --- Optional: Illustrating a simple trace (for deployment, not for "extraction" during inference) ---
    # If you wanted to trace the *model* for deployment (e.g., to ONNX or TorchScript),
    # you would typically do it separately, after you are satisfied with the model
    # and before deploying it. This is not "extracting" it during runtime inference.
    # Here's how you might trace a simple forward pass for PyTorch's TorchScript:
    print("\n--- Illustrating Model Tracing (Conceptual for Deployment) ---")
    try:
        # Create a dummy input for tracing
        dummy_input = tokenizer("Hello world!", return_tensors="pt").input_ids
        traced_model = torch.jit.trace(model, dummy_input)
        print(f"Model successfully traced with TorchScript. Type: {type(traced_model)}")
        # In a real scenario, you would save this traced_model:
        # traced_model.save("traced_t5_model.pt")
        print("Note: The traced model is for deployment/optimization, not directly part of this live inference output.")
    except Exception as e:
        print(f"Could not trace model (this is normal if not specifically configured for tracing all ops): {e}")
        print("Tracing typically requires a more controlled environment or specific model configurations.")

# --- Main execution ---
if __name__ == "__main__":
    example_text = """
    Artificial intelligence (AI) is intelligence demonstrated by machines,
    as opposed to the natural intelligence displayed by humans and animals.
    Leading AI textbooks define the field as the study of "intelligent agents":
    any device that perceives its environment and takes actions that maximize
    its chance of successfully achieving its goals. Colloquially, the term "artificial intelligence"
    is often used to describe machines that mimic "cognitive" functions that humans associate with
    the human mind, such as "learning" and "problem-solving".
    """
    run_encoder_decoder_inference(example_text)

    print("\n" + "="*80 + "\n")

    example_text_2 = """
    The quick brown fox jumps over the lazy dog. This sentence is famous for
    containing all letters of the English alphabet. It is often used for
    testing typewriters and computer keyboards, as well as in typography,
    and for demonstrating text-based fonts and other applications.
    """
    run_encoder_decoder_inference(example_text_2, model_name="t5-small")

Loading tokenizer and model: t5-small...


ImportError: 
T5Tokenizer requires the SentencePiece library but it was not found in your environment. Checkout the instructions on the
installation page of its repo: https://github.com/google/sentencepiece#installation and follow the ones
that match your environment. Please note that you may need to restart your runtime after installation.
