
# üßπ QEPC Project Backup ‚Äì Full Project Zip

This notebook creates a **timestamped ZIP backup** of your entire `qepc_project`,
including:

- Python packages and modules
- Notebooks
- Config files
- Data (including `data/raw/` by default ‚Äì configurable)
- Props engine, utils, etc.

It will:

1. Auto-detect the project root by looking for a folder with both `data/` and `notebooks/`.
2. Create a `backups/` folder in the project root if it doesn't exist.
3. Zip the project into `backups/qepc_backup_YYYYMMDD_HHMMSS.zip`.
4. Respect a configurable list of **exclude patterns** (e.g. `.git`, `__pycache__`, `.ipynb_checkpoints`).

You can safely run this notebook whenever you want a fresh snapshot of the whole project.


In [2]:

# üß± Cell 1 ‚Äì Detect project root and set backup config

from pathlib import Path
import datetime

# Try to detect the qepc_project root by walking up until we see both `data/` and `notebooks/`
here = Path.cwd().resolve()
project_root = None

for p in [here] + list(here.parents):
    if (p / "data").exists() and (p / "notebooks").exists():
        project_root = p
        break

if project_root is None:
    raise RuntimeError(
        "Could not find project root with both 'data/' and 'notebooks/' folders.\n"
        f"Started search from: {here}"
    )

print(f"‚úÖ Project root detected: {project_root}")

# Where to store backups
backups_dir = project_root / "backups"
backups_dir.mkdir(exist_ok=True)
print(f"üìÅ Backups directory: {backups_dir}")

# Backup filename with timestamp
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
backup_name = f"qepc_backup_{timestamp}.zip"
backup_path = backups_dir / backup_name

print(f"üóÇÔ∏è Backup file will be created as: {backup_path}")


‚úÖ Project root detected: C:\Users\wdors\qepc_project
üìÅ Backups directory: C:\Users\wdors\qepc_project\backups
üóÇÔ∏è Backup file will be created as: C:\Users\wdors\qepc_project\backups\qepc_backup_20251127_155441.zip


In [3]:
# ‚öôÔ∏è Cell 2 ‚Äì Backup configuration (edit as needed)

# Whether to include heavy raw data files under data/raw/
INCLUDE_RAW_DATA = False   # ‚¨ÖÔ∏è turn this OFF for lighter backups

# Maximum size (in MB) of individual files to include.
# Use None for no limit. Example: MAX_FILE_MB = 200 to skip >200MB files.
MAX_FILE_MB = None

# Directories (relative to project_root) to always exclude
EXCLUDE_DIRS = {
    ".git",
    ".venv",
    "venv",
    "__pycache__",
    ".ipynb_checkpoints",
    ".mypy_cache",
    ".pytest_cache",
    ".DS_Store",
}

# File name patterns (suffixes) to exclude
EXCLUDE_SUFFIXES = {
    ".pyc",
    ".pyo",
    ".pyd",
    ".log",
    ".tmp",
}

print("‚úÖ Backup configuration loaded:")
print(f"   INCLUDE_RAW_DATA: {INCLUDE_RAW_DATA}")
print(f"   MAX_FILE_MB:      {MAX_FILE_MB}")
print(f"   EXCLUDE_DIRS:     {sorted(EXCLUDE_DIRS)}")
print(f"   EXCLUDE_SUFFIXES: {sorted(EXCLUDE_SUFFIXES)}")


‚úÖ Backup configuration loaded:
   INCLUDE_RAW_DATA: False
   MAX_FILE_MB:      None
   EXCLUDE_DIRS:     ['.DS_Store', '.git', '.ipynb_checkpoints', '.mypy_cache', '.pytest_cache', '.venv', '__pycache__', 'venv']
   EXCLUDE_SUFFIXES: ['.log', '.pyc', '.pyd', '.pyo', '.tmp']


In [4]:

# üì¶ Cell 3 ‚Äì Create backup zip

import os
import zipfile

def should_exclude_path(path: Path) -> bool:
    """Return True if a given path (file or directory) should be excluded."""
    # Convert to relative path from project_root
    rel = path.relative_to(project_root)
    parts = rel.parts

    # Exclude any path that has a directory in EXCLUDE_DIRS
    for part in parts:
        if part in EXCLUDE_DIRS:
            return True

    # Exclude raw data if configured off
    if not INCLUDE_RAW_DATA and parts[:2] == ("data", "raw"):
        return True

    # Exclude by suffix for files
    if path.is_file():
        if any(str(path).endswith(sfx) for sfx in EXCLUDE_SUFFIXES):
            return True

        # Exclude by max file size if needed
        if MAX_FILE_MB is not None:
            try:
                size_mb = path.stat().st_size / (1024 * 1024)
                if size_mb > MAX_FILE_MB:
                    return True
            except OSError:
                # If we can't stat it, skip just to be safe
                return True

    return False


print(f"üì¶ Creating backup zip at: {backup_path}")

files_added = 0
total_bytes = 0

with zipfile.ZipFile(backup_path, "w", zipfile.ZIP_DEFLATED) as zf:
    for root, dirs, files in os.walk(project_root):
        root_path = Path(root)

        # Prune excluded directories from traversal
        dirs[:] = [d for d in dirs if (root_path / d).name not in EXCLUDE_DIRS]

        for fname in files:
            full_path = root_path / fname
            if full_path == backup_path:
                # Don't include the backup we are currently writing
                continue

            if should_exclude_path(full_path):
                continue

            rel_path = full_path.relative_to(project_root)
            try:
                zf.write(full_path, rel_path)
                files_added += 1
                total_bytes += full_path.stat().st_size
            except Exception as e:
                print(f"‚ö†Ô∏è Skipped {full_path} due to error: {e}")

# Summary
size_mb = total_bytes / (1024 * 1024)
print("\n‚úÖ Backup complete!")
print(f"   File:   {backup_path}")
print(f"   Files:  {files_added}")
print(f"   Size:   {size_mb:.2f} MB")


üì¶ Creating backup zip at: C:\Users\wdors\qepc_project\backups\qepc_backup_20251127_155441.zip

‚úÖ Backup complete!
   File:   C:\Users\wdors\qepc_project\backups\qepc_backup_20251127_155441.zip
   Files:  103
   Size:   1.33 MB


In [None]:

# üìÇ Cell 4 ‚Äì List recent backups

from datetime import datetime

backup_files = sorted(backups_dir.glob("qepc_backup_*.zip"))

if not backup_files:
    print("No backups found in", backups_dir)
else:
    print(f"üìö Found {len(backup_files)} backup(s) in {backups_dir}:\n")
    for bp in backup_files[-10:]:  # show last 10
        ts_str = bp.stem.replace("qepc_backup_", "")
        try:
            ts = datetime.strptime(ts_str, "%Y%m%d_%H%M%S")
            ts_human = ts.strftime("%Y-%m-%d %H:%M:%S")
        except ValueError:
            ts_human = "(unknown time)"
        size_mb = bp.stat().st_size / (1024 * 1024)
        print(f"  - {bp.name}  [{ts_human}]  ({size_mb:.2f} MB)")
