In [1]:
import os
from zipfile import ZipFile, ZIP_DEFLATED
from pathlib import Path
import datetime

# 1) Locate project root (assuming this notebook is in <root>/notebooks/maintenance)
PROJECT_ROOT = Path.cwd().resolve().parents[1]

BACKUP_DIR = PROJECT_ROOT / "_backups"
BACKUP_DIR.mkdir(exist_ok=True)

timestamp = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
zip_path = BACKUP_DIR / f"qepc_light_backup_{timestamp}.zip"

print("Project root:", PROJECT_ROOT)
print("Backup file:", zip_path)

# 2) Things we want to skip entirely
EXCLUDE_DIR_NAMES = {
    ".git",
    "_backups",
    ".ipynb_checkpoints",
    "__pycache__",
    ".venv",
    "env",
    "data",          # <-- if you want to keep data out of the zip completely
    # or comment this out if you DO want data included
}

EXCLUDE_FILE_EXT = {
    ".zip",
    ".gz",
    ".lz4",
}

MAX_SINGLE_FILE_BYTES = 500 * 1024 * 1024  # 500 MB safety cap per file

def should_skip_path(path: Path) -> bool:
    # Skip excluded directories
    parts = path.relative_to(PROJECT_ROOT).parts
    if parts and parts[0] in EXCLUDE_DIR_NAMES:
        return True

    # Skip backup files themselves
    if "_backups" in parts:
        return True

    # Skip certain extensions
    if path.suffix.lower() in EXCLUDE_FILE_EXT:
        return True

    # Skip very large individual files
    if path.is_file() and path.stat().st_size > MAX_SINGLE_FILE_BYTES:
        print(f"Skipping large file (>500MB): {path}")
        return True

    return False

# 3) Build the zip with filtering
with ZipFile(zip_path, "w", ZIP_DEFLATED) as zf:
    for root, dirs, files in os.walk(PROJECT_ROOT):
        root_path = Path(root)

        # Filter directories in-place so os.walk doesn't descend into excluded ones
        dirs[:] = [
            d for d in dirs
            if not should_skip_path(root_path / d)
        ]

        for file in files:
            file_path = root_path / file
            if should_skip_path(file_path):
                continue

            arcname = file_path.relative_to(PROJECT_ROOT)
            zf.write(file_path, arcname=str(arcname))

print("✅ Light backup complete!")
print("Backup file:", zip_path)


Project root: C:\Users\wdors\qepc_project
Backup file: C:\Users\wdors\qepc_project\_backups\qepc_light_backup_2025-12-04_14-19-05.zip
✅ Light backup complete!
Backup file: C:\Users\wdors\qepc_project\_backups\qepc_light_backup_2025-12-04_14-19-05.zip
