In [10]:
"""
Flowchart YOLO Project: Cleanup and Reset Notebook

This notebook provides a comprehensive set of utilities to clean up, reset, and prepare your working
environment for new training and inference runs in the Flowchart-to-Text project. It is designed 
to streamline disk space management, avoid residual artifacts between runs, and ensure reproducibility.

Included Cleanup Tasks:
-----------------------
1. **Remove previous training runs**:
   - Deletes all content in `runs/train_test/` (or your YOLO training output directory).
   - Ensures stale checkpoints, logs, and visualizations don't interfere with fresh experiments.

2. **Clean output visualization folders**:
   - Deletes contents from `test_visualizations/`, where test result images are stored.

3. **Delete temporary or system files**:
   - Removes unwanted `.DS_Store` files and `.ipynb_checkpoints/` folders across the Kaggle dataset directory.
   - Helps reduce clutter and potential errors when scanning directories.

4. **Clear the full YOLO `runs/` folder**:
   - Resets all YOLO experiment outputs, including default `runs/detect`, `runs/train`, and `runs/segment`.

5. **Delete all `.pt` YOLO weight files**:
   - Removes all YOLO model weight files (`*.pt`) from the current working directory.
   - Useful for reclaiming disk space and avoiding loading wrong models.

6. **Clear GPU cache (optional but recommended)**:
   - Frees up GPU memory using `torch.cuda.empty_cache()` after heavy training or inference.
   - Helps mitigate CUDA memory errors in long-running notebooks.

7. **Empty the `archive/` directory**:
   - Recursively removes all files, symbolic links, and subfolders inside the `archive` directory.
   - Preserves the folder itself so it can be reused.
   - Useful for managing historical backups or staging data for retraining.

Usage Recommendations:
----------------------
- Run this notebook **before each training cycle** to start clean.
- Customize paths as needed depending on your dataset location or output preferences.
- Ensure proper permissions when working in shared environments or using mounted storage.

Dependencies:
-------------
- Python 3.6+
- `os`, `shutil`, `torch` (for GPU memory operations)

Warning:
--------
All deletion steps are **permanent**. Double-check directory names and file paths before running
to avoid accidental data loss.
"""




# Remove previous training runs
Ultralytics saves training results under runs/train/ or your custom path like runs/train_test/

In [16]:
import shutil
import os

# Example: clean up all YOLO training runs
train_runs_dir = "/sfs/ceph/standard/sds_managed_sadewole/DS6050_SP25/group6/kaggle/runs/train_test"

if os.path.exists(train_runs_dir):
    shutil.rmtree(train_runs_dir)
    print(f"Removed: {train_runs_dir}")
else:
    print("Training runs folder does not exist.")


Training runs folder does not exist.


# Clean just the output visualizations

In [2]:
import shutil
import os

vis_dir = "test_visualizations"

if os.path.exists(vis_dir):
    shutil.rmtree(vis_dir)
    print(f"Removed: {vis_dir}")
else:
    print("Visualization folder not found.")

Removed: test_visualizations


# Delete .DS_Store, .ipynb_checkpoints, or other temp files

In [15]:
import os

def delete_temp_files(root_dir):
    for root, dirs, files in os.walk(root_dir):
        for file in files:
            if file in [".DS_Store"]:
                os.remove(os.path.join(root, file))
        for d in dirs:
            if d == ".ipynb_checkpoints":
                shutil.rmtree(os.path.join(root, d), ignore_errors=True)

# Run on your Kaggle dataset folder
delete_temp_files("/sfs/ceph/standard/sds_managed_sadewole/DS6050_SP25/group6/kaggle")
print("Temp files removed.")


Temp files removed.


# Reset the YOLO runs/ folder entirely

In [14]:
shutil.rmtree("/sfs/ceph/standard/sds_managed_sadewole/DS6050_SP25/group6/kaggle/runs", ignore_errors=True)
print("All YOLO runs cleared.")

All YOLO runs cleared.


# Delete all Yolo weight (.pt) files in the current diretory

In [13]:
#  Delete all YOLO weight (.pt) files in the current directory

import os

for file in os.listdir("."):
    if file.endswith(".pt"):
        try:
            os.remove(file)
            print(f"Deleted: {file}")
        except Exception as e:
            print(f"Error deleting {file}: {e}")


Deleted: yolov9s.pt
Deleted: yolo11n.pt


# Clear out GPU Cache

In [17]:

# === Clear GPU cache (optional but helpful after training or large inference runs) ===
import torch

if torch.cuda.is_available():
    torch.cuda.empty_cache()
    print("GPU cache cleared.")
else:
    print("No GPU available to clear.")


GPU cache cleared.


# Clean Out Archive

In [18]:
import os
import shutil

archive_dir = "/sfs/ceph/standard/sds_managed_sadewole/DS6050_SP25/group6/kaggle/archive"

if os.path.exists(archive_dir):
    for filename in os.listdir(archive_dir):
        file_path = os.path.join(archive_dir, filename)
        try:
            if os.path.isfile(file_path) or os.path.islink(file_path):
                os.unlink(file_path)  # Remove file or symlink
            elif os.path.isdir(file_path):
                shutil.rmtree(file_path)  # Remove directory and contents
            print(f"Removed: {file_path}")
        except Exception as e:
            print(f"Failed to delete {file_path}. Reason: {e}")
else:
    print("Archive directory does not exist.")


# Clean up the processing_log

In [19]:
import os

log_file_path = 'processing_log.txt'

# Check if the file exists and delete it
if os.path.exists(log_file_path):
    os.remove(log_file_path)
    print(f"{log_file_path} has been deleted.")
else:
    print(f"{log_file_path} does not exist.")


processing_log.txt has been deleted.
