In [0]:
# Importing concurrent.futures to use threadpool and logging for displaying the file deletion status
import concurrent.futures
import logging
# Setting logger
logger = logging.getLogger("error")

class DirDelete:
    # remove is a function which is used to
    # 1. remove the file path which we are providing
    # 2. displaying the success message
    # @param -> file_path -> is the file path which we want to delete
    @staticmethod
    def remove(file_path):
        dbutils.fs.rm(file_path, True)
        logger.warning(f"'{file_path}' deleted successfully !!!")

    # get_files_delete is a function which is used to
    # 1. list the file path to search if it's directory or file
    # 2. if it's directory, it will call the functions recursively
    # 3. if it's file, it will delete the file
    # @param -> file -> is the path of the directory/file which needs to be deleted
    @staticmethod
    def get_files_delete(file):
        sub_dir_path = dbutils.fs.ls(file.path)
        def delete_files(path) :
            if path[-1] == "/" and len(dbutils.fs.ls(path)) != 0:
                DirDelete.delete_files_and_folders(file.path)
            elif path[-1] != "/" or len(dbutils.fs.ls(path)) == 0:
                DirDelete.remove(file.path)
        paths = [item.path for item in sub_dir_path]
        list(map(lambda x: delete_files(x), paths))

    # thread_pool is a function which is used to
    # 1. execute a given function concurrently on multiple threads.
    # @param -> fun_name -> is the function to be executed concurrently.
    # @param -> responses -> is a list of arguments to be passed to the function.
    # @param -> max_workers -> is the maximum number of worker threads to use. Defaults to 32.
    @staticmethod
    def thread_pool(fun_name, responses, max_workers=32):
        with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
            futures = [executor.submit(fun_name, response) for response in responses]
            concurrent.futures.wait(futures)

    # delete_files_and_folders is a function which is used to
    # 1. list the files under a given root folder
    # 2. assign list of files to be delted to the function to thread pool function
    # 3. after the deletion search and delete any empty directories
    @staticmethod
    def delete_files_and_folders(directory):
        files = dbutils.fs.ls(directory)
        DirDelete.thread_pool(DirDelete.get_files_delete, files)
        list(map(lambda x: DirDelete.remove(x.path), dbutils.fs.ls(directory)))
# root directory path which needs to be deleted
root_directory = "dbfs:/mnt/poc/"
# calling delete_files_and_folders with the root directory as parameter
DirDelete.delete_files_and_folders(root_directory)