# Run VAE experiments

In [1]:
import sys, subprocess, platform
print('Python executable:', sys.executable)
print('Python version:', platform.python_version())
print('Verifying torch in current kernel...')
subprocess.run([sys.executable, '-m', 'pip', 'show', 'torch'])

Python executable: /Users/zoe/Desktop/VAEs/beta-tcvae/.venv/bin/python
Python version: 3.11.7
Verifying torch in current kernel...
Name: torch
Version: 2.9.1
Summary: Tensors and Dynamic neural networks in Python with strong GPU acceleration
Home-page: https://pytorch.org
Author: 
Author-email: PyTorch Team <packages@pytorch.org>
License: BSD-3-Clause
Location: /Users/zoe/Desktop/VAEs/beta-tcvae/.venv/lib/python3.11/site-packages
Requires: filelock, fsspec, jinja2, networkx, sympy, typing-extensions
Required-by: torchaudio, torchvision


CompletedProcess(args=['/Users/zoe/Desktop/VAEs/beta-tcvae/.venv/bin/python', '-m', 'pip', 'show', 'torch'], returncode=0)

In [2]:
# Common arguments
DATASET = 'shapes'
BETA = 6
TCVAE = True
LOG_FREQ = 50
BATCH_SIZES = [32, 64, 256, 1024, 2048]
SEEDS = [0, 1, 2, 3, 4]

# Budget: fix optimizer steps ('steps') or total examples processed ('examples')
BUDGET = 'steps'
DATASET_SIZE = 737280           
TARGET_STEPS = 100000

import math
def epochs_for_bs(n, bs):
    steps_per_epoch = math.ceil(n / bs)
    return math.ceil(TARGET_STEPS / steps_per_epoch)

EPOCHS_PER_BS = {bs: epochs_for_bs(DATASET_SIZE, bs) for bs in BATCH_SIZES}
print('EPOCHS_PER_BS:', EPOCHS_PER_BS)

# W&B
USE_WANDB = True
WANDB_PROJECT = 'beta-tcvae'
WANDB_ENTITY = None
WANDB_MODE = 'online'

EPOCHS_PER_BS: {32: 5, 64: 9, 256: 35, 1024: 139, 2048: 278}


In [3]:
# single experiment (repeat across seeds)
import sys, subprocess, datetime

bs = 64
epochs = EPOCHS_PER_BS[bs]

for seed in SEEDS:
    cmd = [sys.executable, 'vae_quant.py',
           '--dataset', DATASET,
           '--beta', str(BETA),
           '--batch-size', str(bs),
           '--num-epochs', str(epochs),
           '--log_freq', str(LOG_FREQ),
           '--seed', str(seed)]
    if TCVAE:
        cmd.append('--tcvae')
    if USE_WANDB:
        cmd += ['--wandb', '--wandb_project', WANDB_PROJECT, '--wandb_mode', WANDB_MODE]
        if WANDB_ENTITY:
            cmd += ['--wandb_entity', WANDB_ENTITY]
        cmd += ['--wandb_run_name', f'single_bs{bs}_seed{seed}']
    print('Running:', ' '.join(cmd))
    subprocess.run(cmd, check=False)

Running: /Users/zoe/Desktop/VAEs/beta-tcvae/.venv/bin/python vae_quant.py --dataset shapes --beta 6 --batch-size 64 --num-epochs 9 --log_freq 50 --seed 0 --tcvae --wandb --wandb_project beta-tcvae --wandb_mode online --wandb_run_name single_bs64_seed0


wandb: Currently logged in as: zoematr (vae-seminar) to https://api.wandb.ai. Use `wandb login --relogin` to force relogin
wandb: setting up run mt4xpidj
wandb: Tracking run with wandb version 0.23.1
wandb: Run data is saved locally in /Users/zoe/Desktop/VAEs/beta-tcvae/wandb/run-20260106_090904-mt4xpidj
wandb: Run `wandb offline` to turn off syncing.
wandb: Syncing run single_bs64_seed0
wandb: ‚≠êÔ∏è View project at https://wandb.ai/vae-seminar/beta-tcvae
wandb: üöÄ View run at https://wandb.ai/vae-seminar/beta-tcvae/runs/mt4xpidj


[iteration 050] time: 0.07 	beta 6.00 	lambda 0.00 training ELBO: -542.3851 (-1124.5497)
Active units: 0,1,2,3,4,5,6,7,8,9
Number of active units: 10/10
[iteration 100] time: 0.07 	beta 6.00 	lambda 0.00 training ELBO: -468.0027 (-624.8453)
Active units: 0,1,2,3,4,6,7,8,9
Number of active units: 9/10
[iteration 150] time: 0.08 	beta 6.00 	lambda 0.00 training ELBO: -409.6158 (-475.2733)
Active units: 0,1,2,3,4,5,6,7,9
Number of active units: 9/10
[iteration 200] time: 0.07 	beta 6.00 	lambda 0.00 training ELBO: -369.2279 (-421.2196)
Active units: 0,1,4,7
Number of active units: 4/10
[iteration 250] time: 0.07 	beta 6.00 	lambda 0.00 training ELBO: -380.4002 (-390.7379)
Active units: 0,1,2,3,4,6,9
Number of active units: 7/10
[iteration 300] time: 0.07 	beta 6.00 	lambda 0.00 training ELBO: -431.7405 (-408.1615)
Active units: 0,1,2,3,4,5,6,7,8,9
Number of active units: 10/10
[iteration 350] time: 0.06 	beta 6.00 	lambda 0.00 training ELBO: -299.5880 (-367.7029)
Active units: 0,1,2,3,4,5

Traceback (most recent call last):
  File "/Users/zoe/Desktop/VAEs/beta-tcvae/vae_quant.py", line 542, in <module>
    main()
  File "/Users/zoe/Desktop/VAEs/beta-tcvae/vae_quant.py", line 516, in main
    eval('plot_vs_gt_' + args.dataset)(vae, train_loader.dataset,
  File "/Users/zoe/Desktop/VAEs/beta-tcvae/plot_latent_vs_true.py", line 33, in plot_vs_gt_shapes
    for xs in loader:
  File "/Users/zoe/Desktop/VAEs/beta-tcvae/.venv/lib/python3.11/site-packages/torch/utils/data/dataloader.py", line 732, in __next__
    data = self._next_data()
           ^^^^^^^^^^^^^^^^^
  File "/Users/zoe/Desktop/VAEs/beta-tcvae/.venv/lib/python3.11/site-packages/torch/utils/data/dataloader.py", line 788, in _next_data
    data = self._dataset_fetcher.fetch(index)  # may raise StopIteration
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/zoe/Desktop/VAEs/beta-tcvae/.venv/lib/python3.11/site-packages/torch/utils/data/_utils/fetch.py", line 55, in fetch
    return self.collate_fn(data)
   

KeyboardInterrupt: 

In [4]:
# Sweep over batch sizes and seeds; save logs
import os, sys, subprocess, datetime
os.makedirs('runs', exist_ok=True)
ts = datetime.datetime.now().strftime('%Y%m%d_%H%M%S')

for seed in SEEDS:
    for bs in BATCH_SIZES:
        cmd = [sys.executable, 'vae_quant.py',
               '--dataset', DATASET,
               '--beta', str(BETA),
               '--batch-size', str(bs),
               '--num-epochs', str(epochs_for_bs(DATASET_SIZE, bs)),
               '--log_freq', str(LOG_FREQ),
               '--seed', str(seed)]
        if TCVAE:
            cmd.append('--tcvae')
        if USE_WANDB:
            cmd += ['--wandb', '--wandb_project', WANDB_PROJECT, '--wandb_mode', WANDB_MODE]
            if WANDB_ENTITY:
                cmd += ['--wandb_entity', WANDB_ENTITY]
            cmd += ['--wandb_run_name', f'bs{bs}_seed{seed}_{ts}']
        print(f'\n=== Running batch-size {bs} | seed {seed} ===\n', ' '.join(cmd))
        log_path = f'runs/bs{bs}_seed{seed}_{ts}.log'
        with open(log_path, 'w') as f:
            proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True)
            for line in proc.stdout:
                print(line, end='')
                f.write(line)
            proc.wait()
        print(f'[exit code {proc.returncode}] log saved to {log_path}')


=== Running batch-size 32 | seed 0 ===
 /Users/zoe/Desktop/VAEs/beta-tcvae/.venv/bin/python vae_quant.py --dataset shapes --beta 6 --batch-size 32 --num-epochs 5 --log_freq 50 --seed 0 --tcvae --wandb --wandb_project beta-tcvae --wandb_mode online --wandb_run_name bs32_seed0_20260106_113325
wandb: Currently logged in as: zoematr (vae-seminar) to https://api.wandb.ai. Use `wandb login --relogin` to force relogin
wandb: setting up run th2ojlb1
wandb: Tracking run with wandb version 0.23.1
wandb: Run data is saved locally in /Users/zoe/Desktop/VAEs/beta-tcvae/wandb/run-20260106_113329-th2ojlb1
wandb: Run `wandb offline` to turn off syncing.
wandb: Syncing run bs32_seed0_20260106_113325
wandb: ‚≠êÔ∏è View project at https://wandb.ai/vae-seminar/beta-tcvae
wandb: üöÄ View run at https://wandb.ai/vae-seminar/beta-tcvae/runs/th2ojlb1
[iteration 050] time: 0.05 	beta 6.00 	lambda 0.00 training ELBO: -616.7518 (-1146.8712)


KeyboardInterrupt: 