In [17]:
import torch
from transformers import BertModel, BertTokenizer
from torch.utils.tensorboard import SummaryWriter

# Load pre-trained BERT
model = BertModel.from_pretrained("bert-base-uncased")
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")

# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device).eval()

# Dummy input
inputs = tokenizer("The quick brown fox jumps over the lazy dog", return_tensors="pt").to(device)


In [22]:
writer = SummaryWriter(log_dir="./runs/bert_activation_debug")
activation_stats = {}

global_step = 0  # will be updated in loop

def make_hook(name):
    def hook(module, input, output):
        global global_step
        if isinstance(output, torch.Tensor):
            act_mean = output.mean().item()
            buffer_size = output.nelement() * output.element_size()  # in bytes
            writer.add_scalar(f"{name}/activation_mean", act_mean, global_step)
            writer.add_scalar(f"{name}/buffer_bytes", buffer_size, global_step)
    return hook

    


In [23]:
for name, module in model.named_modules():
    if isinstance(module, nn.Linear):
        module.register_forward_hook(make_hook(name))


def register_hooks(model):
    for name, module in model.named_modules():
        if isinstance(module, (torch.nn.Linear, torch.nn.Embedding, torch.nn.LayerNorm, torch.nn.Dropout, torch.nn.Conv1d)):
            module.register_forward_hook(activation_hook)

register_hooks(model)


In [24]:
with torch.no_grad():
    output = model(**inputs)


In [25]:
import pprint
pprint.pprint(activation_stats)


{5787163424: {'layer': 'Embedding',
              'output_memory_MB': 0.033792,
              'output_shape': [1, 11, 768]},
 5804061520: {'layer': 'Linear',
              'output_memory_MB': 0.033792,
              'output_shape': [1, 11, 768]},
 5825224128: {'layer': 'Linear',
              'output_memory_MB': 0.003072,
              'output_shape': [1, 768]},
 5825224944: {'layer': 'Linear',
              'output_memory_MB': 0.033792,
              'output_shape': [1, 11, 768]},
 5825225280: {'layer': 'LayerNorm',
              'output_memory_MB': 0.033792,
              'output_shape': [1, 11, 768]},
 5825225760: {'layer': 'Linear',
              'output_memory_MB': 0.033792,
              'output_shape': [1, 11, 768]},
 5825226144: {'layer': 'Linear',
              'output_memory_MB': 0.033792,
              'output_shape': [1, 11, 768]},
 5825226912: {'layer': 'Linear',
              'output_memory_MB': 0.033792,
              'output_shape': [1, 11, 768]},
 5825227392: {'layer':

In [10]:
import torch

# Check if you're on Metal backend
print(torch.backends.mps.is_available())  # True = running on M1/M2/M3 GPU

# After model/input is loaded
print(torch.mps.current_allocated_memory())



True
0


In [11]:
import torch
import torch.nn as nn
from torchvision import models
from torch.utils.tensorboard import SummaryWriter
import time

# Enable MPS backend
device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")
print(device)
print(f"Running on device: {device}")

# Use a simple model for demo
model = models.resnet18(pretrained=False).to(device).eval()

# Dummy input
input_tensor = torch.randn(1, 3, 224, 224).to(device)

# TensorBoard writer
writer = SummaryWriter(log_dir="./runs/mps_memory_demo")

# Hook to log activation memory
activation_stats = {}

def activation_hook(module, input, output):
    name = module.__class__.__name__
    layer_id = id(module)
    if hasattr(output, 'nelement'):
        mem_mb = output.element_size() * output.nelement() / 1e6
        activation_stats[layer_id] = {
            "layer": name,
            "output_shape": list(output.shape),
            "output_memory_MB": mem_mb
        }
        writer.add_scalar(f"LayerMemory/{name}_{layer_id}", mem_mb)

# Register hooks on linear & conv layers
def register_hooks(model):
    for name, module in model.named_modules():
        if isinstance(module, (nn.Conv2d, nn.Linear, nn.ReLU)):
            module.register_forward_hook(activation_hook)

register_hooks(model)

# Run a few passes and log MPS memory
for step in range(5):
    with torch.no_grad():
        output = model(input_tensor)

    # Log MPS GPU memory usage (in MB)
    alloc_mem = torch.mps.current_allocated_memory() / 1e6
    # reserved_mem = torch.mps.current_reserved_memory() / 1e6

    print(f"[Step {step}] Allocated: {alloc_mem:.2f} MB, Reserved:  MB")
    writer.add_scalar("MPS/AllocatedMemory_MB", alloc_mem, step)
    # writer.add_scalar("MPS/ReservedMemory_MB", reserved_mem, step)

    time.sleep(1)

writer.close()


mps
Running on device: mps




[Step 0] Allocated: 48.21 MB, Reserved:  MB
[Step 1] Allocated: 48.21 MB, Reserved:  MB
[Step 2] Allocated: 48.21 MB, Reserved:  MB
[Step 3] Allocated: 48.21 MB, Reserved:  MB
[Step 4] Allocated: 48.21 MB, Reserved:  MB


In [28]:
import torch
from transformers import AutoModel, AutoTokenizer
from torch.utils.tensorboard import SummaryWriter
import time

# ========== CONFIG ==========
model_name = "bert-base-uncased"  # You can swap with "gpt2", etc.
device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")
print(f"Using device: {device}")

# ========== LOAD MODEL ==========
model = AutoModel.from_pretrained(model_name).to(device)
tokenizer = AutoTokenizer.from_pretrained(model_name)

# ========== TENSORBOARD WRITER ==========
writer = SummaryWriter(log_dir="./runs/llm_monitor")

# ========== HOOK FUNCTION ==========
global_step = 0

def make_hook(name):
    def hook(module, input, output):
        global global_step
        if isinstance(output, torch.Tensor):
            act_mean = output.mean().item()
            buffer_size = output.nelement() * output.element_size()
            writer.add_scalar(f"{name}/activation_mean", act_mean, global_step)
            writer.add_scalar(f"{name}/buffer_bytes", buffer_size, global_step)
    return hook

# ========== REGISTER HOOKS ==========
for name, module in model.named_modules():
    if "encoder.layer" in name and isinstance(module, torch.nn.Module):
        module.register_forward_hook(make_hook(name))

# ========== INFERENCE LOOP ==========
sentences = [
    "The quick brown fox jumps over the lazy dog.",
    "Large language models are powerful tools for AI.",
    "Monitoring memory usage helps optimize performance.",
    "PyTorch hooks allow introspection of hidden layers."
]

for i, sentence in enumerate(sentences):
    global_step = i

    inputs = tokenizer(sentence, return_tensors="pt").to(device)
    with torch.no_grad():
        outputs = model(**inputs)

    # Log MPS memory stats
    if device.type == "mps":
        allocated = torch.mps.current_allocated_memory()
        writer.add_scalar("MPS/AllocatedMemory", allocated, global_step)

    time.sleep(0.1)

writer.close()
print("TensorBoard logging complete. Run `tensorboard --logdir=runs` to view.")

Using device: mps
TensorBoard logging complete. Run `tensorboard --logdir=runs` to view.


In [29]:
import torch
import torch.nn as nn
from torch.utils.tensorboard import SummaryWriter
from datetime import datetime

# === 1. Dummy LLM or load your own model === #
class TinyTransformer(nn.Module):
    def __init__(self, vocab_size=1000, embed_dim=64):
        super().__init__()
        self.embedding = nn.Embedding(vocab_size, embed_dim)
        self.transformer = nn.TransformerEncoder(
            nn.TransformerEncoderLayer(d_model=embed_dim, nhead=4), num_layers=2
        )
        self.fc = nn.Linear(embed_dim, vocab_size)

    def forward(self, x):
        x = self.embedding(x)
        x = self.transformer(x)
        return self.fc(x)

device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")
model = TinyTransformer().to(device)

# === 2. TensorBoard Setup === #
logdir = f"runs/llm_monitor_{datetime.now().strftime('%Y%m%d-%H%M%S')}"
writer = SummaryWriter(logdir)

# === 3. Hook to capture activations === #
def activation_hook(name):
    def hook(module, input, output):
        if isinstance(output, torch.Tensor):
            writer.add_scalar(f"Activations/{name}_mean", output.mean().item(), global_step)
        elif isinstance(output, tuple):
            for i, out in enumerate(output):
                if isinstance(out, torch.Tensor):
                    writer.add_scalar(f"Activations/{name}_{i}_mean", out.mean().item(), global_step)
    return hook

# === 4. Register hooks === #
for name, module in model.named_modules():
    if not isinstance(module, nn.Sequential):
        module.register_forward_hook(activation_hook(name))

# === 5. Dummy input & loop === #
global_step = 0
for step in range(5):
    global_step = step
    x = torch.randint(0, 1000, (10, 32)).to(device)  # (sequence_length, batch_size)
    out = model(x)

    # === 6. Log MPS Memory === #
    try:
        alloc = torch.mps.current_allocated_memory()
        reserved = torch.mps.current_reserved_memory()
        if alloc is not None:
            writer.add_scalar("MPS/AllocatedMemory_MB", alloc / (1024 ** 2), global_step)
        if reserved is not None:
            writer.add_scalar("MPS/ReservedMemory_MB", reserved / (1024 ** 2), global_step)
    except Exception as e:
        print(f"[WARN] MPS memory logging failed: {e}")

writer.close()
print(f"TensorBoard log written to: {logdir}")




[WARN] MPS memory logging failed: module 'torch.mps' has no attribute 'current_reserved_memory'
[WARN] MPS memory logging failed: module 'torch.mps' has no attribute 'current_reserved_memory'
[WARN] MPS memory logging failed: module 'torch.mps' has no attribute 'current_reserved_memory'
[WARN] MPS memory logging failed: module 'torch.mps' has no attribute 'current_reserved_memory'
[WARN] MPS memory logging failed: module 'torch.mps' has no attribute 'current_reserved_memory'
TensorBoard log written to: runs/llm_monitor_20250611-144411
