<a href="https://colab.research.google.com/github/wesslen/gpu-testing/blob/main/notebooks/transformers_baseline.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
import torch
from transformers.configuration_utils import PretrainedConfig
from transformers.modeling_utils import PreTrainedModel
from time import time
import numpy as np

class BenchmarkConfig(PretrainedConfig):
    model_type = "benchmark"
    def __init__(self, hidden_size=768, num_hidden_layers=12, num_attention_heads=12, **kwargs):
        self.hidden_size = hidden_size
        self.num_hidden_layers = num_hidden_layers
        self.num_attention_heads = num_attention_heads
        super().__init__(**kwargs)

class BenchmarkModel(PreTrainedModel):
    config_class = BenchmarkConfig

    def __init__(self, config):
        super().__init__(config)
        self.layers = torch.nn.ModuleList([
            torch.nn.Sequential(
                torch.nn.Linear(config.hidden_size, config.hidden_size * 4),
                torch.nn.GELU(),
                torch.nn.Linear(config.hidden_size * 4, config.hidden_size),
                torch.nn.LayerNorm(config.hidden_size)
            ) for _ in range(config.num_hidden_layers)
        ])

    def forward(self, x):
        for layer in self.layers:
            x = layer(x) + x
        return x

def run_benchmark(batch_size=32, seq_length=512, num_warmup=10, num_iterations=50):
    # Configure device
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"Running on device: {device}")
    if device.type == "cuda":
        print(f"GPU: {torch.cuda.get_device_name()}")
        print(f"CUDA Version: {torch.version.cuda}")

    # Initialize model
    config = BenchmarkConfig()
    model = BenchmarkModel(config).to(device)
    model.eval()

    # Create dummy input
    dummy_input = torch.randn(batch_size, seq_length, config.hidden_size, device=device)

    # Warmup
    print("Warming up...")
    with torch.no_grad():
        for _ in range(num_warmup):
            _ = model(dummy_input)

    # Benchmark
    print(f"Running benchmark with {num_iterations} iterations...")
    times = []
    with torch.no_grad():
        torch.cuda.synchronize()
        for i in range(num_iterations):
            start = time()
            _ = model(dummy_input)
            torch.cuda.synchronize()
            end = time()
            times.append(end - start)
            if (i + 1) % 10 == 0:
                print(f"Iteration {i + 1}/{num_iterations}")

    # Calculate statistics
    times = np.array(times) * 1000  # Convert to milliseconds
    print("\nResults:")
    print(f"Average latency: {np.mean(times):.2f} ms")
    print(f"Median latency: {np.median(times):.2f} ms")
    print(f"90th percentile latency: {np.percentile(times, 90):.2f} ms")
    print(f"99th percentile latency: {np.percentile(times, 99):.2f} ms")
    print(f"Throughput: {batch_size * seq_length / (np.mean(times) / 1000):.2f} tokens/second")

if __name__ == "__main__":
    run_benchmark()

Running on device: cuda
GPU: NVIDIA A100-SXM4-40GB
CUDA Version: 12.1
Warming up...
Running benchmark with 50 iterations...
Iteration 10/50
Iteration 20/50
Iteration 30/50
Iteration 40/50
Iteration 50/50

Results:
Average latency: 106.64 ms
Median latency: 106.63 ms
90th percentile latency: 106.68 ms
99th percentile latency: 106.76 ms
Throughput: 153636.99 tokens/second


In [4]:
# Now check versions
import pkg_resources
import sys

def get_package_details():
    """Print details of specific packages and Python version"""
    packages_to_check = [
        'torch',
        'transformers',
        'numpy',
        'sentencepiece'  # Often used by transformers
    ]

    print("Python version:", sys.version.split()[0])
    print("\nPackage versions:")
    print("-" * 50)

    for package in packages_to_check:
        try:
            version = pkg_resources.get_distribution(package).version
            print(f"{package:<15} {version}")
        except pkg_resources.DistributionNotFound:
            print(f"{package:<15} Not installed")

# Check CUDA availability for PyTorch
import torch
print("\nCUDA Status:")
print("-" * 50)
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"CUDA version: {torch.version.cuda}")
    print(f"Current GPU: {torch.cuda.get_device_name()}")

# Run the check
get_package_details()


CUDA Status:
--------------------------------------------------
CUDA available: True
CUDA version: 12.1
Current GPU: NVIDIA A100-SXM4-40GB
Python version: 3.10.12

Package versions:
--------------------------------------------------
torch           2.5.0+cu121
transformers    4.44.2
numpy           1.26.4
sentencepiece   0.2.0
