In [1]:
import os
import pickle
import shutil

def load_multiple_pickles(file_path):
    """ Load all objects from a pickle file where multiple objects are stored sequentially. """
    objects = []
    with open(file_path, 'rb') as file:
        while True:
            try:
                objects.append(pickle.load(file))
            except EOFError:
                break
    return objects

def save_multiple_pickles(objects, file_path):
    """ Save multiple objects to a pickle file. """
    with open(file_path, 'wb') as file:
        for obj in objects:
            pickle.dump(obj, file)

def merge_pickle_folders(source_folder1, source_folder2, destination_folder):
    """ Merge pickle files from two source folders into a destination folder. """
    # Create destination folder if it doesn't exist
    os.makedirs(destination_folder, exist_ok=True)

    # Get all pickle files from both folders
    files1 = set(os.listdir(source_folder1))
    files2 = set(os.listdir(source_folder2))

    # Merge common files
    common_files = files1.intersection(files2)
    for file_name in common_files:
        file_path1 = os.path.join(source_folder1, file_name)
        file_path2 = os.path.join(source_folder2, file_name)
        objects1 = load_multiple_pickles(file_path1)
        objects2 = load_multiple_pickles(file_path2)

        # Combine lists of objects from both files
        combined_objects = objects1 + objects2

        save_multiple_pickles(combined_objects, os.path.join(destination_folder, file_name))

    # Copy unique files from folder1
    unique_files1 = files1 - files2
    for file_name in unique_files1:
        shutil.copy2(os.path.join(source_folder1, file_name), os.path.join(destination_folder, file_name))

    # Copy unique files from folder2
    unique_files2 = files2 - files1
    for file_name in unique_files2:
        shutil.copy2(os.path.join(source_folder2, file_name), os.path.join(destination_folder, file_name))

# Example usage
merge_pickle_folders('/home/mcwave/code/automath/atp/datasets/provability/rag_20240613', 
                     '/home/mcwave/code/automath/atp/datasets/provability/rag_20240614', 
                     '/home/mcwave/code/automath/atp/datasets/provability/rag_merged')


  from .autonotebook import tqdm as notebook_tqdm
2024-06-14 21:46:57,629	INFO util.py:154 -- Missing packages: ['ipywidgets']. Run `pip install -U ipywidgets`, then restart the notebook server for rich notebook output.


IsADirectoryError: [Errno 21] Is a directory: '/home/mcwave/code/automath/atp/datasets/provability/rag_20240614/.ipynb_checkpoints'

In [4]:
import os
import glob
from datetime import datetime, timedelta

def folder_last_modified(folder_path):
    try:
        latest_time = 0
        for root, dirs, files in os.walk(folder_path):
            for name in files + dirs:
                full_path = os.path.join(root, name)
                mtime = os.path.getmtime(full_path)
                if mtime > latest_time:
                    latest_time = mtime
        
        if latest_time == 0:
            return None

        return datetime.fromtimestamp(latest_time)
    
    except FileNotFoundError:
        return None
    except Exception as e:
        print(f"An error occurred while checking {folder_path}: {str(e)}")
        return None

def remove_old_tmp_folders(age_limit_hours=1):
    # Get all folders matching the pattern /tmp/tmp*
    tmp_folders = glob.glob("/tmp/tmp*")
    
    current_time = datetime.now()
    age_limit = timedelta(hours=age_limit_hours)

    for folder in tmp_folders:
        if not os.path.isdir(folder):
            continue

        last_modified = folder_last_modified(folder)
        
        if last_modified is None:
            continue

        age = current_time - last_modified
        print(f"Age of {folder}: {age}")

        if age >= age_limit:
            try:
                os.system(f"rm -rf {folder}")
                print(f"Removed folder: {folder} (last modified {age.total_seconds() / 3600:.2f} hours ago)")
            except Exception as e:
                print(f"Failed to remove folder {folder}: {str(e)}")
                
        time.sleep(5)

# Call the function to remove old tmp folders
remove_old_tmp_folders()

Age of /tmp/tmp4o5x1ip1: 5:17:18.888225
Removed folder: /tmp/tmp4o5x1ip1 (last modified 5.29 hours ago)
Age of /tmp/tmp5ri947h7: 11:42:13.559940
Removed folder: /tmp/tmp5ri947h7 (last modified 11.70 hours ago)
Age of /tmp/tmpdi4tf9gr: 4:57:58.405244
Removed folder: /tmp/tmpdi4tf9gr (last modified 4.97 hours ago)
Age of /tmp/tmpowm0o1sc: 0:32:00.586002
Age of /tmp/tmpse3vrx7x: 1:01:00.289702
Removed folder: /tmp/tmpse3vrx7x (last modified 1.02 hours ago)
Age of /tmp/tmpcgmkl55p: 5:15:35.161869
Removed folder: /tmp/tmpcgmkl55p (last modified 5.26 hours ago)
Age of /tmp/tmptmz7lvbx: 1:35:25.091636
Removed folder: /tmp/tmptmz7lvbx (last modified 1.59 hours ago)
Age of /tmp/tmp6oze0xrd: 0:08:01.542896
Age of /tmp/tmpid0zg570: 0:03:31.002765
Age of /tmp/tmpqui7ubgu: 0:44:08.200645
Age of /tmp/tmpml4uemun: 0:02:05.877778
Age of /tmp/tmp67i5ls9y: 1:42:41.516338
Removed folder: /tmp/tmp67i5ls9y (last modified 1.71 hours ago)
Age of /tmp/tmp3liinico: 11:48:19.834757
Removed folder: /tmp/tmp3liin

Age of /tmp/tmpwgevg_el: 5:20:52.025044
Removed folder: /tmp/tmpwgevg_el (last modified 5.35 hours ago)
Age of /tmp/tmpw23yzk0i: 0:23:07.154941
Age of /tmp/tmpyo1vhy_1: 5:14:35.668515
Removed folder: /tmp/tmpyo1vhy_1 (last modified 5.24 hours ago)
Age of /tmp/tmpxmxapub9: 1:20:50.664010
Removed folder: /tmp/tmpxmxapub9 (last modified 1.35 hours ago)
Age of /tmp/tmpg6l53iqz: 1:38:27.019420
Removed folder: /tmp/tmpg6l53iqz (last modified 1.64 hours ago)
Age of /tmp/tmpusudhxy3: 5:22:19.451009
Removed folder: /tmp/tmpusudhxy3 (last modified 5.37 hours ago)
Age of /tmp/tmplxczni5_: 0:31:30.578004
Age of /tmp/tmply550aqz: 5:28:06.078701
Removed folder: /tmp/tmply550aqz (last modified 5.47 hours ago)
Age of /tmp/tmp0936bgcx: -1 day, 23:51:49.303900
Age of /tmp/tmp9mo6h0tv: 0:07:33.114467
Age of /tmp/tmpgyaye084: 4:59:56.003187
Removed folder: /tmp/tmpgyaye084 (last modified 5.00 hours ago)
Age of /tmp/tmpyt1foap0: -1 day, 23:49:17.654010
Age of /tmp/tmpa6hujmu6: 7:51:12.475913
Removed folder

In [None]:
import os
import psutil
import time

# def kill_long_running_lean_processes():
#     for proc in psutil.process_iter(['name', 'cpu_times']):
#         try:
#             # Check if the process name is "lean"
#             if proc.info['name'] == "lean":
#                 # Get CPU times for the process
#                 cpu_times = proc.info['cpu_times']
                
#                 # Calculate total CPU time in minutes
#                 total_cpu_time = (cpu_times.user + cpu_times.system) / 60
                
#                 # If the process has used more than 30 minutes of CPU time, terminate it
#                 if total_cpu_time > 30:
#                     proc.terminate()
#                     print(f"Terminated process {proc.pid} (CPU time: {total_cpu_time:.2f} minutes)")
        
#         except (psutil.NoSuchProcess, psutil.AccessDenied, psutil.ZombieProcess):
#             pass


def kill_long_running_lean_processes():
    # Get the current time
    current_time = time.time()
    
    # Iterate through all running processes
    for proc in psutil.process_iter(['name', 'create_time']):
        try:
            # Check if the process name is "lean"
            if proc.info['name'] == "lean":
                # Calculate the process running time in minutes
                runtime_minutes = (current_time - proc.info['create_time']) / 60
                
                # If the process has been running for more than 30 minutes, terminate it
                if runtime_minutes > 30:
                    proc.terminate()
                    print(f"Terminated process {proc.pid} (runtime: {runtime_minutes:.2f} minutes)")
        
        except (psutil.NoSuchProcess, psutil.AccessDenied, psutil.ZombieProcess):
            pass

# Call the function to kill long-running lean processes
kill_long_running_lean_processes()

def kill_high_memory_processes(memory_threshold_gb=32):
    memory_threshold_bytes = memory_threshold_gb * 1024 * 1024 * 1024
    
    def terminate_process_tree(pid):
        try:
            parent = psutil.Process(pid)
            children = parent.children(recursive=True)
            for child in children:
                child.terminate()
            parent.terminate()
        except psutil.NoSuchProcess:
            pass
    
    for proc in psutil.process_iter(['name', 'memory_info']):
        try:
            mem_info = proc.info['memory_info']
            
            if mem_info.rss > memory_threshold_bytes:
                proc_name = proc.info['name']
                mem_usage_gb = mem_info.rss / (1024 * 1024 * 1024)
                
                terminate_process_tree(proc.pid)
                print(f"Terminated process tree of {proc.pid} ({proc_name}) using {mem_usage_gb:.2f} GB of memory")
        
        except (psutil.NoSuchProcess, psutil.AccessDenied, psutil.ZombieProcess):
            pass

def monitor_and_kill_bad_processes():
    print(f"Monitoring processes...")
    num_seconds = 0
    try:
        while True:
            kill_long_running_lean_processes()
            kill_high_memory_processes()
            time.sleep(1)
            if num_seconds > 0 and num_seconds % 100 == 0:
                print(num_seconds, "passed")
            num_seconds += 1
    except KeyboardInterrupt:
        print("\nMonitoring stopped.")
        
monitor_and_kill_bad_processes()

Monitoring processes...
100 passed
200 passed
300 passed
Terminated process 163828 (runtime: 30.01 minutes)
400 passed
500 passed
Terminated process 164429 (runtime: 30.02 minutes)
600 passed
Terminated process 164509 (runtime: 30.01 minutes)
700 passed
800 passed
900 passed
1000 passed
1100 passed
1200 passed
1300 passed
1400 passed
1500 passed
1600 passed
1700 passed
1800 passed
1900 passed
Terminated process 168342 (runtime: 30.00 minutes)
2000 passed
Terminated process 168776 (runtime: 30.00 minutes)
2100 passed
2200 passed
2300 passed
2400 passed
2500 passed
2600 passed
2700 passed
2800 passed
2900 passed
3000 passed
3100 passed
3200 passed
Terminated process 171454 (runtime: 30.02 minutes)
3300 passed
Terminated process 171632 (runtime: 30.01 minutes)
3400 passed
3500 passed
3600 passed
Terminated process 172099 (runtime: 30.01 minutes)
3700 passed
3800 passed
3900 passed
4000 passed
4100 passed
4200 passed
4300 passed
4400 passed
Terminated process 173978 (runtime: 30.02 minutes