# ULTRATHINK: Colab Quickstart

Run a quick sanity check and a small real-data training in Google Colab.

In [None]:
import torch, platform
!nvidia-smi || echo 'No NVIDIA GPU available'
print('Python:', platform.python_version())
print('CUDA available:', torch.cuda.is_available())

In [None]:
!git clone https://github.com/vediyappanm/UltraThinking-LLM-Training.git
%cd UltraThinking-LLM-Training/deep
!pip install --upgrade pip
# Install PyTorch (CUDA wheels for Colab)
!pip install torch --index-url https://download.pytorch.org/whl/cu121
# Core deps
!pip install "transformers>=4.41.0" datasets einops tqdm wandb accelerate
# Optional: FlashAttention (may fail on some Colab GPUs)
# !pip install flash-attn --no-build-isolation


In [None]:
import os
os.environ['TORCHDYNAMO_DISABLE'] = '1'
os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'expandable_segments:True'
print('Env set.')

## 1) Sanity run on dummy data

In [None]:
!python train_ultrathink.py \
  --dataset dummy --train_samples 2000 --val_samples 200 \
  --vocab_size 50257 --hidden_size 384 --num_layers 4 --num_heads 6 --num_kv_heads 6 \
  --intermediate_size 1536 --max_seq_length 256 \
  --batch_size 4 --gradient_accumulation_steps 8 \
  --learning_rate 5e-4 --use_amp --gradient_checkpointing \
  --num_epochs 1 \
  --output_dir ./outputs/sanity_dummy

## 2) Small real-data run (C4 streaming)

In [None]:
!python train_ultrathink.py \
  --dataset c4 --dataset_subset en --streaming \
  --enable_dre --dre_warmup_steps 500 \
  --amp_warmup_steps 200 \
  --tokenizer_name gpt2 \
  --vocab_size 50257 \
  --hidden_size 384 --num_layers 4 --num_heads 6 --num_kv_heads 6 \
  --intermediate_size 1536 --max_seq_length 512 \
  --batch_size 1 --gradient_accumulation_steps 64 \
  --learning_rate 5e-5 --weight_decay 0.1 \
  --warmup_steps 2000 \
  --use_amp --gradient_checkpointing \
  --eval_frequency 1 \
  --output_dir ./outputs/ultrathink_c4_seq512_sdpa_warmup

## 3) Switch datasets (examples)

- See `docs/datasets.md` for more.
- Add `--use_wandb --run_name ultrathink_colab` to log to W&B.