In [3]:
# cleanup_intermediate_files.py
# Standalone cleanup script to remove intermediate/temp files
# generated during SVM training. 
# Safe: leaves original raw dataset CSVs intact.

import os
import glob
import tempfile
import shutil

# ------------------------------
# Config
# ------------------------------
base_path = "./"
folders = {
    "packet": "packet_features",
    "uniflow": "uniflow_features",
    "biflow": "biflow_features"
}

# patterns considered "intermediate"
INTERMEDIATE_PATTERNS = [
    "*.tmp", "*.temp", "tmp*",
    "*.part", "*.cache", "*.bak",
    "*.npy", "*.npz"
]

# keep these (raw dataset files)
KEEP_FILES = {
    "normal.csv",
    "sparta.csv",
    "scan_A.csv",
    "mqtt_bruteforce.csv",
    "scan_sU.csv",
    "uniflow_normal.csv",
    "uniflow_sparta.csv",
    "uniflow_scan_A.csv",
    "uniflow_mqtt_bruteforce.csv",
    "uniflow_scan_sU.csv",
    "biflow_normal.csv",
    "biflow_sparta.csv",
    "biflow_scan_A.csv",
    "biflow_mqtt_bruteforce.csv",
    "biflow_scan_sU.csv",
}

# ------------------------------
# Helpers
# ------------------------------
def safe_remove(path):
    try:
        if os.path.exists(path):
            os.remove(path)
            print(f"[CLEANUP] Removed: {path}")
    except Exception as e:
        print(f"[CLEANUP] Could not remove {path}: {e}")

def cleanup_feature_folders():
    print("\n[STEP] Cleaning feature folders...")
    for level, folder in folders.items():
        fpath = os.path.join(base_path, folder)
        if not os.path.isdir(fpath):
            continue
        for pat in INTERMEDIATE_PATTERNS:
            for file in glob.glob(os.path.join(fpath, pat)):
                fname = os.path.basename(file)
                if fname in KEEP_FILES:
                    continue  # skip raw dataset files
                safe_remove(file)

def cleanup_tempdir():
    print("\n[STEP] Cleaning system temp directory...")
    tmpdir = tempfile.gettempdir()
    for pat in INTERMEDIATE_PATTERNS:
        for file in glob.glob(os.path.join(tmpdir, pat)):
            safe_remove(file)

def cleanup_misc():
    print("\n[STEP] Cleaning misc training artifacts...")
    # example: remove cached sklearn joblib models if present
    cache_dir = os.path.join(base_path, "__pycache__")
    if os.path.isdir(cache_dir):
        try:
            shutil.rmtree(cache_dir)
            print(f"[CLEANUP] Removed directory: {cache_dir}")
        except Exception as e:
            print(f"[CLEANUP] Could not remove {cache_dir}: {e}")

# ------------------------------
# Main
# ------------------------------
if __name__ == "__main__":
    print("[START CLEANUP SCRIPT]")
    cleanup_feature_folders()
    cleanup_tempdir()
    cleanup_misc()
    print("\n[CLEANUP] Completed successfully.")

[START CLEANUP SCRIPT]

[STEP] Cleaning feature folders...

[STEP] Cleaning system temp directory...
[CLEANUP] Could not remove C:\Users\VALMIK~1\AppData\Local\Temp\25e150fc-1947-4a6f-a4d7-e44453d6a15b.tmp: [WinError 32] The process cannot access the file because it is being used by another process: 'C:\\Users\\VALMIK~1\\AppData\\Local\\Temp\\25e150fc-1947-4a6f-a4d7-e44453d6a15b.tmp'
[CLEANUP] Could not remove C:\Users\VALMIK~1\AppData\Local\Temp\2a07e942-aff7-4960-9e6e-0343e024160d.tmp: [WinError 32] The process cannot access the file because it is being used by another process: 'C:\\Users\\VALMIK~1\\AppData\\Local\\Temp\\2a07e942-aff7-4960-9e6e-0343e024160d.tmp'
[CLEANUP] Could not remove C:\Users\VALMIK~1\AppData\Local\Temp\4d0981fe-0d47-479e-ab7c-d9f9258e1623.tmp: [WinError 32] The process cannot access the file because it is being used by another process: 'C:\\Users\\VALMIK~1\\AppData\\Local\\Temp\\4d0981fe-0d47-479e-ab7c-d9f9258e1623.tmp'
[CLEANUP] Could not remove C:\Users\VALMI