In [None]:
#! /usr/bin/env python3
import multiCMD
import TSVZ
import inotify_simple
import Tee_Logger
import argparse
import xxhash

from collections import deque
import time
from collections import namedtuple
from collections import OrderedDict
import os
import threading
import sys
import signal
import resource
import datetime
import unicodedata
import re
from shutil import copystat

# for the lib wrapping aroung inotify, I tried
# wtr-wather, watchdog, watchfiles
# None of them seem to work well with the most basic tests
# I need it to work with:
# 1. able to notice changes recursively
# 2. able to notice changes in the folders
# 3. do not incorrectly group move events
# 4. recognize mkdir -p properly
# 5. notice umount events and notify properly
# 6. notice mount events and notify properly
# 7. compatible with symlink events 

HASH_SIZE = 1<<16
__version__ = 0.01

## WIP

# @dataclass
# class ChangeEvent:
# 	monotonic_time:float
# 	# type can be file or dir.
# 	type:str
# 	# if type is dir, event can be create 
# 	#  ( need to recursively copy everything as inotifywait does not caputre mkdir -p properly for all sub folders),
# 	#  ,delete, attrib, move
# 	# if type is file, event can be delete, change, attrib, move
# 	# move detection will be handled by monitor if moved_from moved_to events were captured. 
# 	# it will use an tsv storing all the path inode number and hash to detect move.
# 	event:str
# 	path:str
# 	moved_from:str = None


# if type is dir, event can be create 
#  ( need to recursively copy everything as inotifywait does not caputre mkdir -p properly for all sub folders),
#  ,delete, attrib, move
# if type is file, event can be delete, change, attrib, move
# move detection will be handled by monitor if moved_from moved_to events were captured. 
# it will use an tsv storing all the path inode number and hash to detect move.
CHANGED_EVENT_HEADER = ['monotonic_time', 'is_dir', 'event', 'path','moved_from']
BACKUP_ENTRY_VALUES_HEADER = ['iso_time','event','source_path']
BACKUP_JOURNAL_HEADER = ['at_path','iso_time','event','source_path']
REVERB_RULE_TSV_HEADER = ['Job Name (Unique Key)', 'Path Monitoring', 'Monitoring File System Signature', 
		'Minium snapshot time', 'Vault Path', 'Keep 1 Compelete Backup', 
		'Only Sync Attributes (permissions)', 'Keep N versions', 'Backup Size Limit', 
		'Vault File System Signature']
REVERB_RULE_HEADER = ['job_name', 'mon_path', 'mon_fs_signature', 
		'min_shapshot_time', 'vault_path', 'keep_one_complete_backup', 
		'only_sync_attributes', 'keep_n_versions', 'backup_size_limit',
		'vault_fs_signature']
VAULT_ENTRY_HEADER = ['version_number','path','timestamp','size','inode']
VAULT_INFO_HEADER = ['vault_info_dict', 'vault_size' , 'vault_inodes']
VAULT_TIMESTAMP_FORMAT = '%Y-%m-%d_%H-%M-%S_%z'
TRACKING_FILES_FOLDERS_HEADER = ['files','folders']

ChangeEvent = namedtuple('ChangeEvent', CHANGED_EVENT_HEADER)
BackupEntryValues = namedtuple('BackupEntryValues', BACKUP_ENTRY_VALUES_HEADER)
ReverbRule = namedtuple('ReverbRule', REVERB_RULE_HEADER)
VaultEntry = namedtuple('VaultEntry', VAULT_ENTRY_HEADER)
VaultInfo = namedtuple('VaultInfo', VAULT_INFO_HEADER)
TrackingFilesFolders = namedtuple('TrackingFilesFolders', TRACKING_FILES_FOLDERS_HEADER)


# by defualt, set the max backup threads to 2* the number of cores
BACKUP_SEMAPHORE = threading.Semaphore(2*os.cpu_count())

DEFAULT_SNAPSHOT_DELAY = 60
DEFAULT_KEEP_ONE_COMPLETE_BACKUP = False
DEFAULT_ONLY_SYNC_ATTRIBUTES = False
DEFAULT_KEEP_N_VERSIONS = 30
DEFAULT_BACKUP_SIZE_LIMIT = '5%'

ARG_MAX = os.sysconf('SC_ARG_MAX')
ARGUMENT_LIMIT = (ARG_MAX - 4096) // 2048

COOKIE_DICT_MAX_SIZE = 16384
COOKIE_VALUE = namedtuple('COOKIE_VALUE',['wd','path','isDir'])

GREEN_LIGHT = threading.Event()
GREEN_LIGHT.set()

DEBUG = True

tl = Tee_Logger.teeLogger(systemLogFileDir='/dev/null', programName='reverberator', compressLogAfterMonths=0, deleteLogAfterYears=0, suppressPrintout=False,noLog=True)


In [21]:
def get_path_size(*path:str):
	global DEBUG
	# this function gets the actual size of a path
	# du --bytes -s <path>
	if DEBUG:
		startTime = time.perf_counter()
	rtn = multiCMD.run_command(['du','--block-size=1','-csP',*path],quiet=True)
	if DEBUG:
		print(f'get_path_size took {time.perf_counter()-startTime} seconds')
	if rtn and rtn[-1] and rtn[-1].partition('\t')[0].isdigit():
		return int(rtn[-1].partition('\t')[0])
	else:
		return 0

In [22]:
def format_bytes(size, use_1024_bytes=None, to_int=False, to_str=False,str_format='.2f'):
	"""
	Format the size in bytes to a human-readable format or vice versa.
	From hpcp: https://github.com/yufei-pan/hpcp

	Args:
		size (int or str): The size in bytes or a string representation of the size.
		use_1024_bytes (bool, optional): Whether to use 1024 bytes as the base for conversion. If None, it will be determined automatically. Default is None.
		to_int (bool, optional): Whether to convert the size to an integer. Default is False.
		to_str (bool, optional): Whether to convert the size to a string representation. Default is False.
		str_format (str, optional): The format string to use when converting the size to a string. Default is '.2f'.

	Returns:
		int or str: The formatted size based on the provided arguments.

	Examples:
		>>> format_bytes(1500)
		'1.50 KB'
		>>> format_bytes('1.5 GiB', to_int=True)
		1610612736
	"""
	if to_int or isinstance(size, str):
		if isinstance(size, int):
			return size
		elif isinstance(size, str):
			# Use regular expression to split the numeric part from the unit, handling optional whitespace
			match = re.match(r"(\d+(\.\d+)?)\s*([a-zA-Z]*)", size)
			if not match:
				print("Invalid size format. Expected format: 'number [unit]', e.g., '1.5 GiB' or '1.5GiB'")
				print(f"Got: {size}")
				return 0
			number, _, unit = match.groups()
			number = float(number)
			unit  = unit.strip().lower().rstrip('b')
			# Define the unit conversion dictionary
			if unit.endswith('i'):
				# this means we treat the unit as 1024 bytes if it ends with 'i'
				use_1024_bytes = True
			elif use_1024_bytes is None:
				use_1024_bytes = False
			unit  = unit.rstrip('i')
			if use_1024_bytes:
				power = 2**10
			else:
				power = 10**3
			unit_labels = {'': 0, 'k': 1, 'm': 2, 'g': 3, 't': 4, 'p': 5}
			if unit not in unit_labels:
				print(f"Invalid unit '{unit}'. Expected one of {list(unit_labels.keys())}")
				return 0
			# Calculate the bytes
			return int(number * (power ** unit_labels[unit]))
		else:
			try:
				return int(size)
			except Exception as e:
				return 0
	elif to_str or isinstance(size, int) or isinstance(size, float):
		if isinstance(size, str):
			try:
				size = size.lower().strip().rstrip('b')
				size = float(size)
			except Exception as e:
				return size
		# size is in bytes
		if use_1024_bytes or use_1024_bytes is None:
			power = 2**10
			n = 0
			power_labels = {0 : '', 1: 'Ki', 2: 'Mi', 3: 'Gi', 4: 'Ti', 5: 'Pi'}
			while size > power:
				size /= power
				n += 1
			return f"{size:{str_format}}{' '}{power_labels[n]}"
		else:
			power = 10**3
			n = 0
			power_labels = {0 : '', 1: 'K', 2: 'M', 3: 'G', 4: 'T', 5: 'P'}
			while size > power:
				size /= power
				n += 1
			return f"{size:{str_format}}{' '}{power_labels[n]}"
	else:
		try:
			return format_bytes(float(size), use_1024_bytes)
		except Exception as e:
			import traceback
			print(f"Error: {e}")
			print(traceback.format_exc())
			print(f"Invalid size: {size}")
		return 0


In [23]:
def get_path_inodes(*path:str):
	global DEBUG
	# this function gets the number of inodes in a path
	# df --inodes -s <path>
	if DEBUG:
		startTime = time.perf_counter()
	rtn = multiCMD.run_command(['du','--inodes','-cs',*path],quiet=True)
	if DEBUG:
		print(f'get_path_inodes of {[*path]} took {time.perf_counter()-startTime} seconds')
	if rtn and rtn[-1] and rtn[-1].partition('\t')[0].isdigit():
		return int(rtn[-1].partition('\t')[0])
	else:
		return 0


In [24]:
def get_vault_info(job_vault_path:str,recalculate:bool=False) -> tuple:
	# job vault subfolder should follow: V{version}--{ISO8601ish time}--{size of this backup}-{inodes of this backup}
	# ex. V0--2021-01-01_00-00-00_-0800--1.3_GiB-4.2K_ino
	global DEBUG
	global tl
	global VAULT_TIMESTAMP_FORMAT
	if not os.path.exists(job_vault_path):
		tl.teeerror(f'Job vault path {job_vault_path} does not exist')
		return {}, 0, 0
	# if the symlink is broken, we will try to find the latest version folder
	vault_info_dict = {}
	vault_size = 0
	vault_inodes = 0
	try:
		for entry in os.scandir(job_vault_path):
			if entry.is_dir() and entry.name.startswith('V') and '--' in entry.name:
				version_number_str = entry.name.lstrip('V').partition('--')[0]
				if version_number_str.isdigit():
					version_number = int(version_number_str)
					try:
						if not recalculate:
							entry_size_inode_str = entry.name.rpartition('--')[2]
							entry_size_str = entry_size_inode_str.partition('-')[0].replace('_','')
							entry_size = format_bytes(entry_size_str,to_int=True)
							entry_inode_str = entry_size_inode_str.rpartition('-')[2].replace('_ino','')
							entry_inode = format_bytes(entry_inode_str,to_int=True)
							vault_size += entry_size
							vault_inodes += entry_inode
							entry_timestamp = datetime.datetime.strptime(entry.name.partition('--')[2].rpartition('--')[0],VAULT_TIMESTAMP_FORMAT).timestamp()
							#VAULT_ENTRY_HEADER = ['version_number','path','timestamp','size','inode']
							vault_info_dict[version_number] = VaultEntry(version_number,entry.path,entry_timestamp,entry_size,entry_inode)
						else:
							# we will recalculate the size and inodes for the folders and rename the folders accordingly
							entry_size = get_path_size(entry.path)
							entry_inode = get_path_inodes(entry.path)
							vault_size += entry_size
							vault_inodes += entry_inode
							backup_size_str = format_bytes(entry_size,use_1024_bytes=True,to_str=True).replace(' ','_')
							backup_inode_str = format_bytes(entry_inode,use_1024_bytes=False,to_str=True).replace(' ','')
							entry_timestamp = datetime.datetime.strptime(entry.name.partition('--')[2].rpartition('--')[0],VAULT_TIMESTAMP_FORMAT).timestamp()
							new_entry_name = f'V{version_number}--{datetime.datetime.fromtimestamp(entry_timestamp).astimezone().strftime(VAULT_TIMESTAMP_FORMAT)}--{backup_size_str}B-{backup_inode_str}_ino'
							if entry.name != new_entry_name:
								tl.teeprint(f'Renaming {entry.name} to {new_entry_name}')
								try:
									new_entry_path = os.path.join(job_vault_path,new_entry_name)
									os.rename(entry.path,new_entry_path)
									vault_info_dict[version_number] = VaultEntry(version_number,new_entry_path,entry_timestamp,entry_size,entry_inode)
								except Exception as e:
									tl.teeerror(f'Error renaming {entry.name} to {new_entry_name}: {e}')
									continue
							else:
								vault_info_dict[version_number] = VaultEntry(version_number,entry.path,entry_timestamp,entry_size,entry_inode)
					except:
						if DEBUG:
							import traceback
							tl.teeerror(f'Error processing {entry.name}: {traceback.format_exc()}')
	except PermissionError:
		tl.teeerror(f'Permission error while scanning {job_vault_path}')
	return OrderedDict(sorted(vault_info_dict.items())), vault_size, vault_inodes


In [25]:
vault_info_dict, vault_size , vault_inodes= get_vault_info('test_vault/test')

In [26]:
vault_info_dict

OrderedDict([(0,
              VaultEntry(version_number=0, path='test_vault/test/V0--2025-04-01_20-46-14_-0700--20.00_KiB-9.00_ino', timestamp=1743565574.0, size=20480, inode=9)),
             (1,
              VaultEntry(version_number=1, path='test_vault/test/V1--2025-04-01_20-46-52_-0700--24.00_KiB-9.00_ino', timestamp=1743565612.0, size=24576, inode=9)),
             (2,
              VaultEntry(version_number=2, path='test_vault/test/V2--2025-04-01_20-48-34_-0700--20.00_KiB-8.00_ino', timestamp=1743565714.0, size=20480, inode=8)),
             (3,
              VaultEntry(version_number=3, path='test_vault/test/V3--2025-04-01_20-48-57_-0700--20.00_KiB-8.00_ino', timestamp=1743565737.0, size=20480, inode=8))])

In [27]:
vault_info_dict[next(reversed(vault_info_dict))]

VaultEntry(version_number=3, path='test_vault/test/V3--2025-04-01_20-48-57_-0700--20.00_KiB-8.00_ino', timestamp=1743565737.0, size=20480, inode=8)

In [28]:
def cp_af_copy_path(source_path:str,dest_path:str,mcae:multiCMD.AsyncExecutor = ...):
	global DEBUG
	if mcae is ...:
		return multiCMD.run_command(['cp','-af','--reflink=auto','--sparse=always',source_path,dest_path],quiet=not DEBUG,return_code_only=True,wait_for_return=True)
	else:
		return mcae.run_command(['cp','-af','--reflink=auto','--sparse=always',source_path,dest_path])


In [29]:
def get_all_files_and_folders(path):
	files = []
	folders = []
	try:
		with os.scandir(path) as entries:
			for entry in entries:
				if entry.is_dir(follow_symlinks=False):
					folders.append(entry.path+'/')
					child_files, child_folders = get_all_files_and_folders(entry.path)
					files.extend(child_files)
					folders.extend(child_folders)
				else:
					files.append(entry.path)
	except PermissionError:
		pass
	return files, folders


In [30]:
def get_backup_size_inode(backup_entries:dict,only_sync_attributes:bool): 
	'''
	This function gets the size of the backup entries

	Parameters:
		backup_entries (dict): The backup entries
		only_sync_attributes (bool): Whether to only sync the attributes

	Returns:
		tuple: The size and inode change compared to previous backup of the backup entries
	'''
	# BACKUP_ENTRY_VALUES_HEADER = ['iso_time','event','source_path']
	# event can be create, modify, attrib, move, delete
	total_size = 0
	total_inode_change = 0
	path_pending_to_get_size = []
	path_pending_to_get_inode_add = []
	path_pending_to_get_inode_del = []
	for entry in backup_entries:
		if backup_entries[entry].event == 'create' :
			path_pending_to_get_size.append(entry)
			path_pending_to_get_inode_add.append(entry)
		elif backup_entries[entry].event == 'modify' or (not only_sync_attributes and backup_entries[entry].event == 'attrib'):
			path_pending_to_get_size.append(entry)
			# we do not count inode change for attrib / modify / move events
		elif backup_entries[entry].event == 'delete':
			path_pending_to_get_inode_del.append(entry)
			# size will not decrease as we are not backing up the full content, but inodes will decrease as we are using symlinks
		if len(path_pending_to_get_size) > ARGUMENT_LIMIT:
			total_size += get_path_size(*path_pending_to_get_size)
			path_pending_to_get_size = []
		if len(path_pending_to_get_inode_add) > ARGUMENT_LIMIT:
			total_inode_change += get_path_inodes(*path_pending_to_get_inode_add)
			path_pending_to_get_inode_add = []
		if len(path_pending_to_get_inode_del) > ARGUMENT_LIMIT:
			total_inode_change -= get_path_inodes(*path_pending_to_get_inode_del)
			path_pending_to_get_inode_del = []
	if path_pending_to_get_size:
		total_size += get_path_size(*path_pending_to_get_size)
	if path_pending_to_get_inode_add:
		total_inode_change += get_path_inodes(*path_pending_to_get_inode_add)
	if path_pending_to_get_inode_del:
		total_inode_change -= get_path_inodes(*path_pending_to_get_inode_del)
	return total_size, total_inode_change


In [31]:
def get_backup_limits_from_str(backup_size_limit:str,vault_fs_size:int,vault_fs_inode:int) -> tuple:
	'''
	This function gets the backup limits from a string.
	Use 0 for infinity, % numbers refer to disk usage %, use leading i to represent inodes, use comma to seperate multiple rules
	Bigger ones take precedence

	Parameters:
		backup_size_limit (str): The backup size limit string

	Returns:
		tuple: The backup size and inode limits	
	'''
	backup_size_limit = backup_size_limit.strip().lower()
	if not backup_size_limit or backup_size_limit == '0':
		return 0, 0
	backup_size_limits = backup_size_limit.split(',')
	rtn_size_limit = -1
	rtn_inode_limit = -1
	for limit in backup_size_limits:
		size_limit = -1
		inode_limit = -1
		if limit.endswith('%'):
			limit = limit.rstrip('%')
			if limit.startswith('i'):
				try:
					inode_limit = int(float(limit.lstrip('i')) * vault_fs_inode // 100)
					if vault_fs_inode and inode_limit == 0:
						# if vault has inodes but we cannot use any, set inode limit to 1
						inode_limit = 1
				except:
					pass
			else:
				try:
					size_limit = int(float(limit) * vault_fs_size // 100)
					if vault_fs_size and size_limit == 0:
						size_limit = 1
				except:
					pass
		elif limit.startswith('i'):
			try:
				inode_limit = int(limit.lstrip('i'))
			except:
				pass
		else:
			try:
				size_limit = int(limit)
			except:
				pass
		if size_limit > rtn_size_limit:
			rtn_size_limit = size_limit
		if inode_limit > rtn_inode_limit:
			rtn_inode_limit = inode_limit
	return rtn_size_limit, rtn_inode_limit
			


In [32]:
get_backup_limits_from_str('10%,i20%,2000',10000,100)

(2000, 20)

In [33]:
def get_path_fs_info(path:str):
	# this function gets the filesystem usage information of a path
	# df --no-sync --output=size,itotal <path>
	global DEBUG
	rtn = multiCMD.run_command(['df','--no-sync','--output=size,itotal',path],quiet=True)
	if rtn and rtn[-1]:
		try:
			size, inodes = rtn[-1].split(maxsplit=1)
			return int(size), int(inodes)
		except:
			pass
	return 0,0


In [34]:
def hash_file(path,size = ...,full_hash=False):
	#From hpcp: https://github.com/yufei-pan/hpcp
	global HASH_SIZE
	if HASH_SIZE <= 0:
		# Do not hash
		return ''
	if size == ...:
		size = os.path.getsize(path)
	hasher = xxhash.xxh64()
	with open(path, 'rb') as f:
		if not full_hash:
			# Only hash the last hash_size bytes
			#f.seek(-min(1<<16,size), os.SEEK_END)
			f.seek(-min(HASH_SIZE,size), os.SEEK_END)
		for chunk in iter(lambda: f.read(4096), b''):
			hasher.update(chunk)
	return hasher.hexdigest()


In [None]:
def check_duplicate(backupEntries:dict,latest_version_path:str,latest_version_files_folders:set,rel_entry:str,monitor_entry:str,isDir:bool):
	global DEBUG
	global tl
	if rel_entry in latest_version_files_folders:
		# this is a common file/folder
		latest_version_files_folders.remove(rel_entry)
		# check if the file is the same
		latest_version_entry = os.path.join(latest_version_path,rel_entry)
		if DEBUG:
			tl.teeprint(f'Checking {monitor_entry} against {latest_version_entry}')
		monitor_entry_stat = os.stat(monitor_entry)
		latest_version_entry_stat = os.stat(latest_version_entry)
		# do dev and inode based skip first
		try:
			if monitor_entry_stat.st_dev == latest_version_entry_stat.st_dev and monitor_entry_stat.st_ino == latest_version_entry_stat.st_ino:
				# same file
				if DEBUG:
					tl.teeprint(f'Same file according to dev and ino number {monitor_entry}')
				return
		except:
			if DEBUG:
				import traceback
				tl.teeerror(f'Error comparing dev and ino {monitor_entry}')
				tl.teeerror(traceback.format_exc())
		try:
			if not isDir and monitor_entry_stat.st_size != latest_version_entry_stat.st_size:
				# size is different
				if DEBUG:
					tl.teeprint(f'Size different {monitor_entry}')
				backupEntries[monitor_entry] = BackupEntryValues(datetime.datetime.now().isoformat(),'modify',None)
			elif monitor_entry_stat.st_mtime_ns == latest_version_entry_stat.st_mtime_ns:
				# size and mtime the same, we assume as if the file content is the same
				if monitor_entry_stat.st_ctime_ns != latest_version_entry_stat.st_ctime_ns:
					# attrib changed
					if DEBUG:
						tl.teeprint(f'Size Same, mtime same, ctime different {monitor_entry}')
					backupEntries[monitor_entry] = BackupEntryValues(datetime.datetime.now().isoformat(),'attrib',None)
				elif DEBUG:
					tl.teeprint(f'Size Same, mtime same, ctime same {monitor_entry}')
			elif not isDir and hash_file(monitor_entry,monitor_entry_stat.st_size) != hash_file(latest_version_entry,latest_version_entry_stat.st_size):
				# size is same, but mtime is different, and hash is different
				if DEBUG:
					tl.teeprint(f'Size Same, mtime different, hash different {monitor_entry}')
				backupEntries[monitor_entry] = BackupEntryValues(datetime.datetime.now().isoformat(),'modify',None)
			elif isDir:
				# check for mode, uid, gid
				if monitor_entry_stat.st_mode != latest_version_entry_stat.st_mode or monitor_entry_stat.st_uid != latest_version_entry_stat.st_uid or monitor_entry_stat.st_gid != latest_version_entry_stat.st_gid:
					# attrib changed
					if DEBUG:
						tl.teeprint(f'Dir attrib changed {monitor_entry}')
					backupEntries[monitor_entry] = BackupEntryValues(datetime.datetime.now().isoformat(),'attrib',None)
				elif DEBUG:
					tl.teeprint(f'Dir attrib same {monitor_entry}')
			else:
				# size is same, but mtime is different, however hash is the same
				if DEBUG:
					tl.teeprint(f'Size Same, mtime different, hash same{monitor_entry}')
				backupEntries[monitor_entry] = BackupEntryValues(datetime.datetime.now().isoformat(),'attrib',None)
		except:
			if DEBUG:
				import traceback
				tl.teeerror(f'Error comparing size, mtime, hash {monitor_entry}')
				tl.teeerror(traceback.format_exc())
				tl.teeprint(f'Adding as modify {monitor_entry}')
			backupEntries[monitor_entry] = BackupEntryValues(datetime.datetime.now().isoformat(),'modify',None)
	else:
		# this is a new file/folder
		if DEBUG:
			tl.teeprint(f'New file/folder {monitor_entry}')
		backupEntries[monitor_entry] = BackupEntryValues(datetime.datetime.now().isoformat(),'create',None)


In [36]:
def delta_generate_backup_entries(backupEntries:dict,latest_version_info:VaultEntry,monitor_path:str):
	global DEBUG
	global tl
	# this function will generate the backup entries by comparing size, mtime and hash between latest version and the monitoring path.
	# When reverberator is in cold start, will not calculate hash to do space efficient move operations to vault.
	# When restarting reverberator, will do one backup that will only handle create, modify, delete, attrib.
	monitor_files, monitor_folders = get_all_files_and_folders(monitor_path)
	latest_version_files, latest_version_folders = get_all_files_and_folders(latest_version_info.path)
	latest_version_files = [os.path.relpath(latest_version_entry,latest_version_info.path) for latest_version_entry in latest_version_files]
	latest_version_folders = [os.path.relpath(latest_version_entry,latest_version_info.path) + '/' for latest_version_entry in latest_version_folders]
	latest_version_files_folders = set(latest_version_files + latest_version_folders)
	# for latest_version_entry in latest_version_files:
	# 	latest_version_files_folders.add(os.path.relpath(latest_version_entry,latest_version_info.path))
	# for latest_version_entry in latest_version_folders:
	# 	latest_version_files_folders.add(os.path.relpath(latest_version_entry,latest_version_info.path)+'/')
	for monitor_entry in monitor_files:
		rel_entry = os.path.relpath(monitor_entry,monitor_path)
		isDir = False
		check_duplicate(backupEntries,latest_version_info.path,latest_version_files_folders,rel_entry,monitor_entry,isDir)
	for monitor_entry in monitor_folders:
		rel_entry = os.path.relpath(monitor_entry,monitor_path) + '/'
		isDir = True
		check_duplicate(backupEntries,latest_version_info.path,latest_version_files_folders,rel_entry,monitor_entry,isDir)
	for deleted_entry in latest_version_files_folders:
		# this is a deleted file/folder
		if DEBUG:
			tl.teeprint(f'Deleted file/folder {deleted_entry}')
		backupEntries[os.path.join(monitor_path,deleted_entry)] = BackupEntryValues(datetime.datetime.now().isoformat(),'delete',None)
	if DEBUG:
		tl.teeprint('latest_version_folders:')
		tl.teeprint(latest_version_folders)
		tl.teeprint('latest_version_files')
		tl.teeprint(latest_version_files)
	# check if there are any files/folders that are not in the latest version
	return TrackingFilesFolders(latest_version_files,latest_version_folders)


In [37]:
backup_entries = OrderedDict()

In [38]:
latest_version_info = vault_info_dict[next(reversed(vault_info_dict))]

In [41]:
delta_generate_backup_entries(backup_entries,latest_version_info,'/tmp/test')
backup_entries

[0mChecking /tmp/test/hi against test_vault/test/V3--2025-04-01_20-48-57_-0700--20.00_KiB-8.00_ino/hi[0m
[0mSize Same, mtime different, hash same/tmp/test/hi[0m
[0mChecking /tmp/test/alsa/alsactl.conf against test_vault/test/V3--2025-04-01_20-48-57_-0700--20.00_KiB-8.00_ino/alsa/alsactl.conf[0m
[0mSize Same, mtime same, ctime different /tmp/test/alsa/alsactl.conf[0m
[0mChecking /tmp/test/alsa/conf.d/99-pipewire-default.conf against test_vault/test/V3--2025-04-01_20-48-57_-0700--20.00_KiB-8.00_ino/alsa/conf.d/99-pipewire-default.conf[0m
[0mSize Same, mtime same, ctime different /tmp/test/alsa/conf.d/99-pipewire-default.conf[0m
[0mChecking /tmp/test/alsa/conf.d/50-pipewire.conf against test_vault/test/V3--2025-04-01_20-48-57_-0700--20.00_KiB-8.00_ino/alsa/conf.d/50-pipewire.conf[0m
[0mSize Same, mtime same, ctime different /tmp/test/alsa/conf.d/50-pipewire.conf[0m
[0mChecking /tmp/test/hosts against test_vault/test/V3--2025-04-01_20-48-57_-0700--20.00_KiB-8.00_ino/hosts

OrderedDict([('/tmp/test/hi',
              BackupEntryValues(iso_time='2025-04-01T23:03:34.181553', event='attrib', source_path=None)),
             ('/tmp/test/alsa/alsactl.conf',
              BackupEntryValues(iso_time='2025-04-01T23:03:34.181633', event='attrib', source_path=None)),
             ('/tmp/test/alsa/conf.d/99-pipewire-default.conf',
              BackupEntryValues(iso_time='2025-04-01T23:03:34.181691', event='attrib', source_path=None)),
             ('/tmp/test/alsa/conf.d/50-pipewire.conf',
              BackupEntryValues(iso_time='2025-04-01T23:03:34.181760', event='attrib', source_path=None)),
             ('/tmp/test/hosts',
              BackupEntryValues(iso_time='2025-04-01T23:03:34.181811', event='attrib', source_path=None)),
             ('/tmp/test/alsa/',
              BackupEntryValues(iso_time='2025-04-01T23:03:34.181853', event='modify', source_path=None)),
             ('/tmp/test/alsa/conf.d/',
              BackupEntryValues(iso_time='2025-04-01T23:0

In [161]:
def get_all_folders(path):
	_, folders = get_all_files_and_folders(path)
	return folders

def get_all_files(path):
	files, _ = get_all_files_and_folders(path)
	return files

def get_all_files_and_folders(path):
	global DEBUG
	global tl
	files = []
	folders = []
	try:
		with os.scandir(path) as entries:
			for entry in entries:
				if entry.is_dir(follow_symlinks=False):
					folders.append(entry.path+'/')
					child_files, child_folders = get_all_files_and_folders(entry.path)
					files.extend(child_files)
					folders.extend(child_folders)
				else:
					files.append(entry.path)
	except Exception as e:
		if DEBUG:
			tl.teeerror(f'Error scanning {path}: {e}')
		return [], []
	return files, folders


In [162]:
vault_info_dict

OrderedDict([(4,
              VaultEntry(version_number=4, path='test_vault/V4--2021-12-31_16-00-00_-0800--176.00_KiB-133.00_ino', timestamp=1640995200.0, size=180224, inode=133)),
             (5,
              VaultEntry(version_number=5, path='test_vault/V5--2021-12-31_16-00-00_-0800--408.00_KiB-167.00_ino', timestamp=1640995200.0, size=417792, inode=167)),
             (6,
              VaultEntry(version_number=6, path='test_vault/V6--2021-12-31_16-00-00_-0800--2.15_MiB-482.00_ino', timestamp=1640995200.0, size=2254438, inode=482))])

In [163]:
next(iter(vault_info_dict))

4

In [164]:
vault_info_dict

OrderedDict([(4,
              VaultEntry(version_number=4, path='test_vault/V4--2021-12-31_16-00-00_-0800--176.00_KiB-133.00_ino', timestamp=1640995200.0, size=180224, inode=133)),
             (5,
              VaultEntry(version_number=5, path='test_vault/V5--2021-12-31_16-00-00_-0800--408.00_KiB-167.00_ino', timestamp=1640995200.0, size=417792, inode=167)),
             (6,
              VaultEntry(version_number=6, path='test_vault/V6--2021-12-31_16-00-00_-0800--2.15_MiB-482.00_ino', timestamp=1640995200.0, size=2254438, inode=482))])

In [165]:
#referenceVersionNumber, referenceVaultEntry = vault_info_dict.popitem(last=False)

In [166]:
# referenceVaultPath = os.path.abspath(referenceVaultEntry.path)
# referenceVaultPath

In [167]:
def decrement_stepper(vault_info_dict:OrderedDict) -> tuple:
	# this function remove the oldest reverb from path
	if len(vault_info_dict) < 2:
		# we cannot step as there is less than 2 availble reverbs
		return 0 , 0
	referenceVersionNumber, referenceVaultEntry = vault_info_dict.popitem(last=False)
	referenceVaultPath = os.path.abspath(referenceVaultEntry.path)
	if referenceVaultPath == '/':
		print('Attempting to remove root, skipping')
		print(referenceVaultEntry)
		return 0 , 0
	if DEBUG:
		tl.teeprint(f'Removing {referenceVersionNumber}: {referenceVaultPath}')
	movedPath = {}
	for applyingVersionNumber in vault_info_dict:
		applyingVaultEntry = vault_info_dict[applyingVersionNumber]
		# now we need to go through all files in the applying vault entry
		# find all symlinks that is referencing an files in the reference vault entry
		# if it is in movedPath, we apply the new path to the symlink
		# if it is not in movedPath, this means it is first reference, thus
		#   we remove the symlink in the appying vault, move the file to the new location, and add the pair to movedPath
		path_files = get_all_files(applyingVaultEntry.path)
		if DEBUG:
			tl.teeprint(f"Dealing with {applyingVersionNumber}: {applyingVaultEntry.path} ({len(path_files)} files)")
		new_size = applyingVaultEntry.size
		pendingMovedPath = {}
		for file in path_files:
			if os.path.islink(file):
				target = os.path.realpath(file)
				if target in movedPath:
					# we just apply the new dest to the link
					new_target = os.path.relpath(movedPath[target],applyingVaultEntry.path)
					if DEBUG:
						tl.teeprint(f'Applying new target {new_target} to {file}')
					os.remove(file)
					os.symlink(new_target,file)
					continue
				if target.startswith(referenceVaultPath):
					# this means we need to move the file to the new location
					# get the relative path of the target
					if DEBUG:
						tl.teeprint(f'Moving {target} to {file}')
					os.remove(file)
					os.rename(target,file)
					new_size += os.path.getsize(file)
					pendingMovedPath[target] = os.path.relpath(file,applyingVaultEntry.path)
				elif DEBUG:
					tl.teeprint(f'Not moving {target} to {file}')
					tl.teeprint(f'{os.path.abspath(target)} not in {referenceVaultPath}')
		#V0--2021-01-01_00-00-00_-0800--1.3_GiB-4.2K_ino
		if new_size != applyingVaultEntry.size:
			backup_size_str = format_bytes(new_size,use_1024_bytes=True,to_str=True).replace(' ','_')
			newVaultPath = applyingVaultEntry.path.rpartition('--')[0] + f'--{backup_size_str}B-' + applyingVaultEntry.path.rpartition('-')[2]
			if newVaultPath != applyingVaultEntry.path:
				if DEBUG:
					tl.teeprint(f'Renaming {applyingVaultEntry.path} to {newVaultPath}')
				os.rename(applyingVaultEntry.path,newVaultPath)
				vault_info_dict[applyingVersionNumber] = applyingVaultEntry._replace(path=newVaultPath)
		else:
			newVaultPath = applyingVaultEntry.path
		for target in pendingMovedPath:
			movedPath[target] = os.path.join(newVaultPath,pendingMovedPath[target])
	# remove the reference vault entry
	if DEBUG:
		tl.teeprint(f'Removing {referenceVaultPath}')
	removingSize = get_path_size(referenceVaultPath)
	removingInodes = get_path_inodes(referenceVaultPath)
	multiCMD.run_command(['rm','-rf',referenceVaultPath],quiet=True,return_code_only=True)

	return removingSize, removingInodes

In [168]:
vault_info_dict, vault_size , vault_inodes= get_vault_info('test_vault')
decrement_stepper(vault_info_dict)

[0mRemoving 4: /var/home/kes/Documents/reverberator/test_vault/V4--2021-12-31_16-00-00_-0800--176.00_KiB-133.00_ino[0m
[0mDealing with 5: test_vault/V5--2021-12-31_16-00-00_-0800--408.00_KiB-167.00_ino (102 files)[0m
[0mNot moving /var/home/kes/Documents/reverberator/test/.X11-unix/X1 to test_vault/V5--2021-12-31_16-00-00_-0800--408.00_KiB-167.00_ino/.X11-unix/X1[0m
[0m/var/home/kes/Documents/reverberator/test/.X11-unix/X1 not in /var/home/kes/Documents/reverberator/test_vault/V4--2021-12-31_16-00-00_-0800--176.00_KiB-133.00_ino[0m
[0mNot moving /var/home/kes/Documents/reverberator/test/.X11-unix/X1024 to test_vault/V5--2021-12-31_16-00-00_-0800--408.00_KiB-167.00_ino/.X11-unix/X1024[0m
[0m/var/home/kes/Documents/reverberator/test/.X11-unix/X1024 not in /var/home/kes/Documents/reverberator/test_vault/V4--2021-12-31_16-00-00_-0800--176.00_KiB-133.00_ino[0m
[0mNot moving /var/home/kes/Documents/reverberator/test/.X11-unix/X0 to test_vault/V5--2021-12-31_16-00-00_-0800--408.0

(180224, 133)

In [169]:
vault_info_dict

OrderedDict([(5,
              VaultEntry(version_number=5, path='test_vault/V5--2021-12-31_16-00-00_-0800--408.00_KiB-167.00_ino', timestamp=1640995200.0, size=417792, inode=167)),
             (6,
              VaultEntry(version_number=6, path='test_vault/V6--2021-12-31_16-00-00_-0800--2.15_MiB-482.00_ino', timestamp=1640995200.0, size=2254438, inode=482))])

In [170]:
backup_entries

OrderedDict([('test/.ICE-unix/1539',
              BackupEntryValues(iso_time='2025-04-01T16:13:42.202171', event='attrib', source_path=None)),
             ('test/.ICE-unix/2538',
              BackupEntryValues(iso_time='2025-04-01T16:13:42.202205', event='attrib', source_path=None)),
             ('test/pyright-13187-pB55FDl9C2qD/select-13187-z2dXS3RoPowv-.py',
              BackupEntryValues(iso_time='2025-04-01T16:13:42.202294', event='attrib', source_path=None)),
             ('test/pyright-13187-pB55FDl9C2qD/builtins-13187-SO7bGpFXxpPk-.py',
              BackupEntryValues(iso_time='2025-04-01T16:13:42.202313', event='attrib', source_path=None)),
             ('test/inotify_test/test/testrecursivefolder/nestedFolder/nestedFile.txt',
              BackupEntryValues(iso_time='2025-04-01T16:13:42.202335', event='attrib', source_path=None)),
             ('test/inotify_test/dirMoveTest/before_move.txt',
              BackupEntryValues(iso_time='2025-04-01T16:13:42.202353', event='at

In [171]:
class bidict(dict):
	# Credit: https://stackoverflow.com/users/1422096/basj
	# https://stackoverflow.com/questions/3318625/how-to-implement-an-efficient-bidirectional-hash-table
	def __init__(self, *args, **kwargs):
		super(bidict, self).__init__(*args, **kwargs)
		self.inverse = {}
		for key, value in self.items():
			self.inverse.setdefault(value, []).append(key) 

	def __setitem__(self, key, value):
		if key in self:
			self.inverse[self[key]].remove(key) 
		super(bidict, self).__setitem__(key, value)
		self.inverse.setdefault(value, []).append(key)        

	def __delitem__(self, key):
		self.inverse.setdefault(self[key], []).remove(key)
		if self[key] in self.inverse and not self.inverse[self[key]]: 
			del self.inverse[self[key]]
		super(bidict, self).__delitem__(key)


In [172]:
TrackingFilesFolders([],[])

TrackingFilesFolders(files=[], folders=[])

In [173]:
not VaultInfo({}, 0, 0).vault_info_dict

True

In [174]:
next(iter(backup_entries.items()))

('test/.ICE-unix/1539',
 BackupEntryValues(iso_time='2025-04-01T16:13:42.202171', event='attrib', source_path=None))

In [175]:
def do_referenced_copy(source_path:str,backup_folder:str,trackingFilesFolders:TrackingFilesFolders=None,relative = False):
	global DEBUG
	global tl
	global BACKUP_SEMAPHORE
	if not trackingFilesFolders:
		files, folders = get_all_files_and_folders(source_path)
		files = [os.path.relpath(file,source_path) for file in files]
		folders = [os.path.relpath(folder,source_path) + '/' for folder in folders]
	else:
		files, folders = trackingFilesFolders.files, trackingFilesFolders.folders
	for folder in folders:
		source_folder = os.path.join(source_path,folder)
		backup_folder_path = os.path.join(backup_folder,folder)
		os.makedirs(backup_folder_path,exist_ok=True)
		copy_file_meta(source_folder,backup_folder_path)
	mcae = multiCMD.AsyncExecutor(semaphore=BACKUP_SEMAPHORE)
	for file in files:
		# use ln -fsrLT to do a relative symlink
		# ln --symbolic --logical --force --no-target-directory
		source_file = os.path.join(source_path,file)
		backup_file_path = os.path.join(backup_folder,file)
		source_file = os.path.abspath(source_file)
		if relative:
			# taskObj = multiCMD.run_command(['ln','-rfsLT',source_file,backup_file_path],quiet=True,return_object=True,wait_for_return=False,sem=BACKUP_SEMAPHORE)
			mcae.run_command(['ln','-rfsLT',source_file,backup_file_path])
		else:
			#taskObj = multiCMD.run_command(['ln','-fsLT',source_file,backup_file_path],quiet=True,return_object=True,wait_for_return=False,sem=BACKUP_SEMAPHORE)
			mcae.run_command(['ln','-fsLT',source_file,backup_file_path])
	mcae.join()
	return TrackingFilesFolders(files, folders)

def copy_file_meta(source_file:str,backup_file_path:str):
	global tl
	global DEBUG
	try:
		# copy the file metadata
		copystat(source_file,backup_file_path)
		st = os.stat(source_file)
		if os.name == 'posix':
			os.chown(backup_file_path, st.st_uid, st.st_gid)
		os.utime(backup_file_path, (st.st_atime, st.st_mtime))
		if DEBUG:
			tl.teeprint(f'Copied metadata of {source_file} to {backup_file_path}')
		return True
	except:
		tl.teeerror(f'Failed to copy metadata of {source_file}')
		return False



In [176]:
def do_reverb_backup(backup_entries:dict,backup_folder:str,latest_version_info:VaultEntry,
					 only_sync_attributes:bool,trackingFilesFolders:TrackingFilesFolders,monitor_path:str):
	global DEBUG
	global tl
	global BACKUP_SEMAPHORE
	# this function does the actual backup using a referenced version and a change list to copy the source from
	# reverb backup flow:
	#   do referenced copy of the last version to the current backup folder 
	#   replay the changes chronologically ( to respect moves )
	vaultFiles, vaultFolders  = do_referenced_copy(source_path=latest_version_info.path,backup_folder=backup_folder,trackingFilesFolders=trackingFilesFolders,relative=True)
	vaultFiles = set(vaultFiles)
	vaultFolders = set(vaultFolders)
	mcae = multiCMD.AsyncExecutor(semaphore=BACKUP_SEMAPHORE)
	for path, values in backup_entries.items():
		relative_path = os.path.relpath(path=path,start=monitor_path)
		vault_path = os.path.join(backup_folder,relative_path)
		isDir = path.endswith('/')
		# create, modify, attrib, move, delete
		if values.event in {'create','modify'}:
			# we just over write the vault file with the source file
			if isDir:
				vaultFolders.add(relative_path)
				newFiles, newFolders = get_all_files_and_folders(path)
				for file in newFiles:
					vaultFiles.add(os.path.relpath(file,monitor_path))
				for folder in newFolders:
					vaultFolders.add(os.path.relpath(folder,monitor_path)+'/')
			else:
				vaultFiles.add(relative_path)
			# we just copy the entire folder / file ( for recursive create purposes )
			cp_af_copy_path(source_path=path,dest_path=vault_path,mcae=mcae)
		elif values.event == 'attrib':
			if isDir:
				# we just copy the folder metadata
				os.makedirs(vault_path,exist_ok=True)
				copy_file_meta(path,vault_path)
				vaultFolders.add(relative_path)
			else:
				if only_sync_attributes:
					# we just copy the file metadata
					copy_file_meta(path,vault_path)
				else:
					# we copy the file
					cp_af_copy_path(source_path=path,dest_path=vault_path,mcae=mcae)
				vaultFiles.add(relative_path)
		elif values.event == 'delete':
			if isDir:
				# we just remove the folder
				if os.path.abspath(vault_path) == '/':
					# we cannot remove root
					tl.teeerror(f'Attempting to remove root, skipping')
				else:
					mcae.run_command(['rm','-rf',vault_path])
				vaultFolders.discard(relative_path)
				# also need to remove all the files in the folder
				vaultFiles = {file for file in vaultFiles if not file.startswith(vault_path)}
			else:
				# we just remove the file
				mcae.run_command(['rm','-f',vault_path])
				vaultFiles.discard(relative_path)
		elif values.event == 'move':
			target_relative_path = os.path.relpath(values.target_path,monitor_path)
			if isDir:
				if os.path.abspath(vault_path) == '/':
					# we cannot remove root
					tl.teeerror(f'Attempting to move root, skipping')
					continue
				vaultFolders.discard(relative_path)
				vaultFolders.add(target_relative_path)
				# also need to move all the files in the folder
				oldFiles = set()
				newFiles = set()
				for file in vaultFiles:
					if file.startswith(relative_path):
						oldFiles.add(file)
						newFiles.add(os.path.relpath(file,monitor_path))
				vaultFiles.difference_update(oldFiles)
				vaultFiles.update(newFiles)
			else:
				vaultFiles.discard(relative_path)
				vaultFiles.add(target_relative_path)
			# we need to wait for cp threads to finish to allow rename
			mcae.wait()
			target_path = os.path.join(backup_folder,target_relative_path)
			os.rename(vault_path,target_path)
			if DEBUG:
				tl.teeprint(f'Moved {vault_path} to {target_path}')
	mcae.join()
	return TrackingFilesFolders(vaultFiles,vaultFolders)


In [177]:
do_referenced_copy('test','test_vault/V5--2022-01-01_00-00-00_+0000--1.1_MiB-4.0_ino',relative=True)

[0mCopied metadata of test/.X11-unix/ to test_vault/V5--2022-01-01_00-00-00_+0000--1.1_MiB-4.0_ino/.X11-unix/[0m
[0mCopied metadata of test/.ICE-unix/ to test_vault/V5--2022-01-01_00-00-00_+0000--1.1_MiB-4.0_ino/.ICE-unix/[0m
[0mCopied metadata of test/.XIM-unix/ to test_vault/V5--2022-01-01_00-00-00_+0000--1.1_MiB-4.0_ino/.XIM-unix/[0m
[0mCopied metadata of test/.font-unix/ to test_vault/V5--2022-01-01_00-00-00_+0000--1.1_MiB-4.0_ino/.font-unix/[0m
[0mCopied metadata of test/systemd-private-bd185dc0a28746399d5a6073f8ae80c6-systemd-oomd.service-LMIZfD/ to test_vault/V5--2022-01-01_00-00-00_+0000--1.1_MiB-4.0_ino/systemd-private-bd185dc0a28746399d5a6073f8ae80c6-systemd-oomd.service-LMIZfD/[0m
[0mCopied metadata of test/systemd-private-bd185dc0a28746399d5a6073f8ae80c6-systemd-resolved.service-kMHxw4/ to test_vault/V5--2022-01-01_00-00-00_+0000--1.1_MiB-4.0_ino/systemd-private-bd185dc0a28746399d5a6073f8ae80c6-systemd-resolved.service-kMHxw4/[0m
[0mCopied metadata of test/syst

TrackingFilesFolders(files=['.X11-unix/X1024', '.X11-unix/X1025', '.X11-unix/X0', '.X11-unix/X1', '.ICE-unix/1539', '.ICE-unix/2538', '.X1024-lock', '.X1025-lock', '.X0-lock', '.X1-lock', 'pyright-13187-pB55FDl9C2qD/select-13187-z2dXS3RoPowv-.py', 'pyright-13187-pB55FDl9C2qD/builtins-13187-SO7bGpFXxpPk-.py', 'inotify_test/test/testrecursivefolder/nestedFolder/nestedFile.txt', 'inotify_test/dirMoveTest/before_move.txt', 'inotify_test/dirMoveTest/subfolder.txt', 'inotify_test/dirMoveTest/move_into_sub_folder.txt', 'inotify_test/dirMoveTest/test_temp.txt', 'inotify_test/move_test_trick.txt', 'inotify_test/same_file_move_out_in.txt', 'outside.txt', 'uv-2b31b12fc3a070b5.lock', 'tmp/.X11-unix/X1024', 'tmp/.X11-unix/X1025', 'tmp/.X11-unix/X0', 'tmp/.X11-unix/X1', 'tmp/.ICE-unix/1539', 'tmp/.ICE-unix/2538', 'tmp/pyright-13187-pB55FDl9C2qD/select-13187-z2dXS3RoPowv-.py', 'tmp/pyright-13187-pB55FDl9C2qD/builtins-13187-SO7bGpFXxpPk-.py', 'tmp/pyright-13187-pB55FDl9C2qD/builtins-13187-iO7iSL4kHl2g

In [178]:
backup_entries

OrderedDict([('test/.ICE-unix/1539',
              BackupEntryValues(iso_time='2025-04-01T16:13:42.202171', event='attrib', source_path=None)),
             ('test/.ICE-unix/2538',
              BackupEntryValues(iso_time='2025-04-01T16:13:42.202205', event='attrib', source_path=None)),
             ('test/pyright-13187-pB55FDl9C2qD/select-13187-z2dXS3RoPowv-.py',
              BackupEntryValues(iso_time='2025-04-01T16:13:42.202294', event='attrib', source_path=None)),
             ('test/pyright-13187-pB55FDl9C2qD/builtins-13187-SO7bGpFXxpPk-.py',
              BackupEntryValues(iso_time='2025-04-01T16:13:42.202313', event='attrib', source_path=None)),
             ('test/inotify_test/test/testrecursivefolder/nestedFolder/nestedFile.txt',
              BackupEntryValues(iso_time='2025-04-01T16:13:42.202335', event='attrib', source_path=None)),
             ('test/inotify_test/dirMoveTest/before_move.txt',
              BackupEntryValues(iso_time='2025-04-01T16:13:42.202353', event='at

In [179]:
def do_reverb_backup(backup_entries:dict,backup_folder:str,latest_version_info:VaultEntry,
					 only_sync_attributes:bool,trackingFilesFolders:TrackingFilesFolders,monitor_path:str):
	global DEBUG
	global tl
	global BACKUP_SEMAPHORE
	def copy_path(isDir,path,relative_path,vaultFolders,vaultFiles,vault_path,mcae):
		if isDir:
			vaultFolders.add(relative_path)
			newFiles, newFolders = get_all_files_and_folders(path)
			for file in newFiles:
				vaultFiles.add(os.path.relpath(file,monitor_path))
			for folder in newFolders:
				vaultFolders.add(os.path.relpath(folder,monitor_path)+'/')
		else:
			vaultFiles.add(relative_path)
		# we just copy the entire folder / file ( for recursive create purposes )
		cp_af_copy_path(source_path=path,dest_path=vault_path,mcae=mcae)
	def delete_path(isDir,vault_path,relative_path,mcae,vaultFolders,vaultFiles):
		if isDir:
			# we just remove the folder
			if os.path.abspath(vault_path) == '/':
				# we cannot remove root
				tl.teeerror(f'Attempting to remove root, skipping')
			else:
				mcae.run_command(['rm','-rf',vault_path])
			vaultFolders.discard(relative_path)
			# also need to remove all the files in the folder
			vaultFiles = {file for file in vaultFiles if not file.startswith(vault_path)}
		else:
			# we just remove the file
			mcae.run_command(['rm','-f',vault_path])
			vaultFiles.discard(relative_path)
	# this function does the actual backup using a referenced version and a change list to copy the source from
	# reverb backup flow:
	#   do referenced copy of the last version to the current backup folder 
	#   replay the changes chronologically ( to respect moves )
	vaultFiles, vaultFolders  = do_referenced_copy(source_path=latest_version_info.path,backup_folder=backup_folder,trackingFilesFolders=trackingFilesFolders,relative=True)
	vaultFiles = set(vaultFiles)
	vaultFolders = set(vaultFolders)
	mcae = multiCMD.AsyncExecutor(semaphore=BACKUP_SEMAPHORE)
	for path, values in backup_entries.items():
		if DEBUG:
			tl.teeprint(f'Processing {path} {values}')
		relative_path = os.path.relpath(path=path,start=monitor_path)
		vault_path = os.path.join(backup_folder,relative_path)
		isDir = path.endswith('/')
		# create, modify, attrib, move, delete
		if values.event in {'create','modify'}:
			# we just over write the vault file with the source file
			copy_path(isDir,path,relative_path,vaultFolders,vaultFiles,vault_path,mcae)
		elif values.event == 'attrib':
			if isDir:
				# we just copy the folder metadata
				os.makedirs(vault_path,exist_ok=True)
				copy_file_meta(path,vault_path)
				vaultFolders.add(relative_path)
			else:
				if only_sync_attributes:
					# we just copy the file metadata
					copy_file_meta(path,vault_path)
				else:
					# we copy the file
					cp_af_copy_path(source_path=path,dest_path=vault_path,mcae=mcae)
				vaultFiles.add(relative_path)
		elif values.event == 'delete':
			delete_path(isDir,vault_path,relative_path,mcae,vaultFolders,vaultFiles)
		elif values.event == 'move':
			source_relative_path = os.path.relpath(values.source_path,monitor_path)
			target_relative_path = relative_path
			if isDir:
				if os.path.abspath(vault_path) == '/':
					# we cannot remove root
					tl.teeerror(f'Attempting to move root, skipping')
					continue
				vaultFolders.discard(source_relative_path)
				vaultFolders.add(relative_path)
				# also need to move all the files in the folder
				oldFiles = set()
				newFiles = set()
				for file in vaultFiles:
					if file.startswith(source_relative_path):
						oldFiles.add(file)
						newFiles.add(os.path.relpath(file,monitor_path))
				vaultFiles.difference_update(oldFiles)
				vaultFiles.update(newFiles)
			else:
				vaultFiles.discard(source_relative_path)
				vaultFiles.add(target_relative_path)
			# we need to wait for cp threads to finish to allow rename
			mcae.wait()
			source_path = os.path.join(backup_folder,source_relative_path)
			target_path = os.path.join(backup_folder,target_relative_path)
			try:
				os.rename(source_path,target_path)
				if DEBUG:
					tl.teeprint(f'Moved {source_path} to {target_path}')
			except:
				tl.teeerror(f'Failed to move {source_path} to {target_path}')
				if DEBUG:
					import traceback
					tl.teeerror(traceback.format_exc())
				# if we fail to move, we just copy the file
				copy_path(isDir,source_path,target_relative_path,vaultFolders,vaultFiles,target_path,mcae)
				delete_path(isDir,source_path,source_relative_path,mcae,vaultFolders,vaultFiles)
	mcae.join()
	return TrackingFilesFolders(vaultFiles,vaultFolders)


In [180]:
do_reverb_backup(backup_entries,'test_vault/V6--2022-01-01_00-00-00_+0000--1.1_MiB-4.0_ino',
				 latest_version_info=latest_version_info,only_sync_attributes=False,trackingFilesFolders=None,monitor_path='test')

[0mCopied metadata of test_vault/V6--2021-12-31_16-00-00_-0800--2.15_MiB-482.00_ino/.X11-unix/ to test_vault/V6--2022-01-01_00-00-00_+0000--1.1_MiB-4.0_ino/.X11-unix/[0m
[0mCopied metadata of test_vault/V6--2021-12-31_16-00-00_-0800--2.15_MiB-482.00_ino/.ICE-unix/ to test_vault/V6--2022-01-01_00-00-00_+0000--1.1_MiB-4.0_ino/.ICE-unix/[0m
[0mCopied metadata of test_vault/V6--2021-12-31_16-00-00_-0800--2.15_MiB-482.00_ino/.XIM-unix/ to test_vault/V6--2022-01-01_00-00-00_+0000--1.1_MiB-4.0_ino/.XIM-unix/[0m
[0mCopied metadata of test_vault/V6--2021-12-31_16-00-00_-0800--2.15_MiB-482.00_ino/.font-unix/ to test_vault/V6--2022-01-01_00-00-00_+0000--1.1_MiB-4.0_ino/.font-unix/[0m
[0mCopied metadata of test_vault/V6--2021-12-31_16-00-00_-0800--2.15_MiB-482.00_ino/tmp/ to test_vault/V6--2022-01-01_00-00-00_+0000--1.1_MiB-4.0_ino/tmp/[0m
[0mCopied metadata of test_vault/V6--2021-12-31_16-00-00_-0800--2.15_MiB-482.00_ino/tmp/.X11-unix/ to test_vault/V6--2022-01-01_00-00-00_+0000--1.1_M

TrackingFilesFolders(files={'tmp/inotify_test/move_test_trick.txt', 'tmp/uv-2b31b12fc3a070b5.lock', 'tmp/ipykernel_111090/2868533165.py', 'tmp/ipykernel_111090/1563564754.py', 'tmp/ipykernel_111090/3464533286.py', 'tmp/ipykernel_111090/452571613.py', 'tmp/ipykernel_111090/2178759634.py', 'tmp/ipykernel_111090/2204375110.py', 'tmp/ipykernel_111090/1142987880.py', '.X11-unix/X1024', 'tmp/ipykernel_111090/1910759577.py', 'tmp/.X11-unix/X0', 'tmp/ipykernel_111090/1190707344.py', 'tmp/inotify_test/dirMoveTest/move_into_sub_folder.txt', 'outside.txt', 'tmp/ipykernel_111090/2630999843.py', 'tmp/ipykernel_111090/1072161412.py', '.ICE-unix/2538', 'tmp/ipykernel_111090/619144293.py', 'tmp/inotify_test/dirMoveTest/test_temp.txt', 'tmp/ipykernel_111090/62415265.py', 'tmp/inotify_test/same_file_move_out_in.txt', 'tmp/inotify_test/dirMoveTest/subfolder.txt', 'tmp/ipykernel_111090/3550475754.py', 'tmp/inotify_test/test/testrecursivefolder/nestedFolder/nestedFile.txt', 'tmp/ipykernel_111090/2212892568