<a href="https://colab.research.google.com/github/xgrayfoxss21/bitbybit-hybrid-orchestrator/blob/main/notebooks/bitbybit-hybrid-orchestrator.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# @title BitNet-7B PoC — Colab Bootstrap
# @markdown Fill your settings then run the cell.

# ==== Repo ====
REPO_URL = "https://github.com/xgrayfoxss21/BitNet-7B-PoC-KD-Distillation-Mini-Training-7B-Dry-Run"
REPO_DIR = "/content/bitnet-7b-poc"  # folder to clone into

# ==== Google Drive ====
AUTO_MOUNT_GDRIVE = True  #@param {type:"boolean"}
GDRIVE_MOUNT_POINT = "/content/drive"  #@param {type:"string"}
DRIVE_ROOT = "/content/drive/MyDrive/bitnet_poc"  #@param {type:"string"}

# ==== Provider selection ====
# one of: openai | anthropic | groq | aimlapi | gemini
PROVIDER = "openai"  #@param ["openai","anthropic","groq","aimlapi","gemini"]

# ==== API keys (paste only what you use) ====
OPENAI_API_KEY   = ""  #@param {type:"string"}
ANTHROPIC_API_KEY= ""  #@param {type:"string"}
GROQ_API_KEY     = ""  #@param {type:"string"}
AIMLAPI_API_KEY  = ""  #@param {type:"string"}
GEMINI_API_KEY   = ""  #@param {type:"string"}

# Optional OpenAI-compatible base URLs (leave blank if unsure)
OPENAI_BASE_URL  = ""  #@param {type:"string"}
GROQ_BASE_URL    = "https://api.groq.com/openai/v1"  # good default
AIMLAPI_BASE_URL = "https://api.aimlapi.com/v1"      # good default

# ==== Training/runtime toggles ====
TORCH_DTYPE = "bf16"  #@param ["bf16","fp16","fp32"]
ENABLE_FLASH_ATTN = 1 #@param {type:"number"}
WANDB_DISABLED = 1    #@param {type:"number"}

# Tokenizer / template (keep defaults unless you know you need to change)
TOKENIZER_NAME = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"  #@param {type:"string"}
TEMPLATE = "<|user|>\n{prompt}\n\n<|assistant|>\n"     #@param {type:"string"}

# ---- Bootstrapping logic (no edits below) ----
import os, sys, textwrap, subprocess, json, shutil, pathlib

def sh(cmd, check=True):
    print(f"$ {cmd}")
    r = subprocess.run(cmd, shell=True, check=check, text=True)
    return r

# 0) GPU info (nice to see)
try:
    sh("nvidia-smi", check=False)
except Exception:
    pass

# 1) Clone / Pull repo
if os.path.exists(REPO_DIR):
    print("Repo dir exists — pulling latest...")
    os.chdir(REPO_DIR)
    sh("git pull --ff-only")
else:
    os.chdir("/content")
    sh(f"git clone {REPO_URL} {REPO_DIR}")
    os.chdir(REPO_DIR)

# 2) Install Python deps
sh("python -m pip -q install --upgrade pip")
sh("pip -q install -r requirements.txt")

# 3) Optionally mount Drive now (storage.py can also mount automatically)
if AUTO_MOUNT_GDRIVE:
    try:
        from google.colab import drive
        drive.mount(GDRIVE_MOUNT_POINT)
    except Exception as e:
        print("Drive mount skipped or failed:", e)

# 4) Write .env from the form values (overwrites existing .env in this runtime)
env = f"""
# ==== STORAGE / PATHS ====
AUTO_MOUNT_GDRIVE={1 if AUTO_MOUNT_GDRIVE else 0}
GDRIVE_MOUNT_POINT={GDRIVE_MOUNT_POINT}
DRIVE_ROOT={DRIVE_ROOT}
CHECKPOINTS_DIR={DRIVE_ROOT}/checkpoints
DATA_DIR={DRIVE_ROOT}/data
REPORTS_DIR={DRIVE_ROOT}/reports
LOGS_DIR={DRIVE_ROOT}/logs
HF_HOME={DRIVE_ROOT}/.hf
TRANSFORMERS_CACHE={{HF_HOME}}/transformers
HF_DATASETS_CACHE={{HF_HOME}}/datasets
TORCH_HOME={DRIVE_ROOT}/.torch
TOKENIZERS_PARALLELISM=false
PYTORCH_CUDA_ALLOC_CONF=max_split_size_mb:128

# ==== PROVIDERS ====
PROVIDER={PROVIDER}
OPENAI_API_KEY={OPENAI_API_KEY}
ANTHROPIC_API_KEY={ANTHROPIC_API_KEY}
GROQ_API_KEY={GROQ_API_KEY}
AIMLAPI_API_KEY={AIMLAPI_API_KEY}
GEMINI_API_KEY={GEMINI_API_KEY}
OPENAI_BASE_URL={OPENAI_BASE_URL}
GROQ_BASE_URL={GROQ_BASE_URL}
AIMLAPI_BASE_URL={AIMLAPI_BASE_URL}
OPENAI_MODEL=gpt-4o-mini
ANTHROPIC_MODEL=claude-3-5-sonnet-20241022
GROQ_MODEL=llama-3.1-70b-versatile
AIMLAPI_MODEL=gpt-4o-mini
GEMINI_MODEL=gemini-1.5-pro

# ==== TRAINING ====
TORCH_DTYPE={TORCH_DTYPE}
ENABLE_FLASH_ATTN={ENABLE_FLASH_ATTN}
WANDB_DISABLED={WANDB_DISABLED}
WANDB_PROJECT=bitnet-poc
WANDB_ENTITY=
CUDA_VISIBLE_DEVICES=
TOKENIZER_NAME={TOKENIZER_NAME}
TEMPLATE={TEMPLATE}
HF_TOKEN=
""".strip() + "\n"

with open(".env", "w") as f:
    f.write(env)
print("📝 Wrote .env")

# 5) Prepare storage (mounts if needed, creates folders)
sh("python -m scripts.storage")

print("\n✅ Bootstrap complete.")
print("Next steps:")
print("  • Run a target, e.g.:  !make teacher   or   !make collect")
