# File Cleaner

In [None]:
# 50% of the grade
def clean_file(src_file, dest_file, good_text="", bad_text=""):

    # open src_file
    with open(src_file) as source_file:
        lines = source_file.readlines()

    # keep only lines with good text BUT not with bad_text
    filtered_lines = [
        line for line in lines
        if good_text in line and bad_text not in line
    ]

    # save onto dest_file overwritting the existing file there
    with open(dest_file, 'w') as destination_file:
        destination_file.writelines(filtered_lines)
        
    print(f"Cleaned file {src_file} and saved onto {dest_file}")

In [None]:
# 80% of the grade
def clean_file_many(src_file, dest_file, good_texts=(), bad_texts=()):

    # open src_file
    with open(src_file) as source_file:
        lines = source_file.readlines()

    # keep only lines with any good text BUT NOT with ANY bad text
    filtered_lines = [
        line for line in lines
        if any(good in line for good in good_texts)
        and not any(bad in line for bad in bad_texts)
    ]

    # save onto dest_file overwritting the existing file there
    with open(dest_file, 'w') as destination_file:
        destination_file.writelines(filtered_lines)

    print(f"Cleaned file {src_file} and saved onto {dest_file}")

In [None]:
# 100% of the grade

import os

def clean_folder_many_texts(src_folder, dest_folder, src_postfix=".txt", dest_postfix="_cleaned.txt", good_texts=(), bad_texts=()):

    # create dest_folder if it does not exist!
    os.makedirs(dest_folder, exist_ok=True)

    # open src_folder
    files_in_folder = os.listdir(src_folder)
    
    for filename in files_in_folder:
        if filename.endswith(src_postfix):
            src_file = os.path.join(src_folder, filename) 
            dest_file = os.path.join(dest_folder, filename.replace(src_postfix, dest_postfix))
    
            with open(src_file) as source_file:
                lines = source_file.readlines()       

    # keep only lines with good text BUT not with bad_text
    # check for default edge cases when good_texts and/or bad_texts are empty
            filtered_lines = [
                line for line in lines
                if (any(good in line for good in good_texts) if good_texts else True)
                and (not any(bad in line for bad in bad_texts) if bad_texts else True)
            ]

    # save onto dest_folder with same file names but with dest_postfix added to each file
            with open(dest_file, 'w') as destination_file:
                destination_file.writelines(filtered_lines)
            
            print(f"Cleaned file {src_file} and saved onto {dest_file}")
    
    print(f"Cleaned files in {src_folder} and saved onto {dest_folder}")