From a11e91a7ea5ecad859b16df10b2f1c341ec33354 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C2=96=C2=96Stonebanks-js?= Date: Sat, 12 Oct 2024 23:54:17 +0530 Subject: [PATCH] feat: Add file type filtering and report generation features to duplicate finder --- Duplicate Finder/Readme.md | 15 ++++++++++++++- Duplicate Finder/duplicate-finder.py | 25 +++++++++++++++++++++---- 2 files changed, 35 insertions(+), 5 deletions(-) diff --git a/Duplicate Finder/Readme.md b/Duplicate Finder/Readme.md index 14055ffb..aae927f7 100644 --- a/Duplicate Finder/Readme.md +++ b/Duplicate Finder/Readme.md @@ -33,4 +33,17 @@ Always backup your data before using scripts that modify files. The author is no - \ No newline at end of file + + + +# KEY MODIFICATIONS + +File Type Filtering: + +Added an input prompt to specify file extensions for filtering. +Modified the find_duplicates function to only consider files with the specified extensions. + +Generate Report: + +Added a new generate_report function that creates a JSON report of duplicate files. +Added the option for the user to choose to generate a report instead of deleting or moving files. \ No newline at end of file diff --git a/Duplicate Finder/duplicate-finder.py b/Duplicate Finder/duplicate-finder.py index 47d7bb7e..f36fa390 100644 --- a/Duplicate Finder/duplicate-finder.py +++ b/Duplicate Finder/duplicate-finder.py @@ -1,5 +1,6 @@ import os import hashlib +import json # Import for generating reports def get_file_hash(filepath): """Return the MD5 hash of a file.""" @@ -9,13 +10,16 @@ def get_file_hash(filepath): hasher.update(buf) return hasher.hexdigest() -def find_duplicates(directory, min_size=0): - """Find duplicate files in a directory.""" +def find_duplicates(directory, min_size=0, file_extensions=None): + """Find duplicate files in a directory, with optional file type filtering.""" hashes = {} duplicates = {} for dirpath, dirnames, filenames in os.walk(directory): for filename in filenames: + if file_extensions and not filename.lower().endswith(tuple(file_extensions)): + continue # Skip files that don't match the extensions + filepath = os.path.join(dirpath, filename) if os.path.getsize(filepath) >= min_size: file_hash = get_file_hash(filepath) @@ -29,11 +33,20 @@ def find_duplicates(directory, min_size=0): return {k: v for k, v in duplicates.items() if len(v) > 1} +def generate_report(duplicates, report_path): + """Generate a report of duplicate files in JSON format.""" + with open(report_path, 'w') as report_file: + json.dump(duplicates, report_file, indent=4) + print(f"Report generated: {report_path}") + def main(): directory = input("Enter the directory to scan for duplicates: ") min_size = int(input("Enter the minimum file size to consider (in bytes, default is 0): ") or "0") - duplicates = find_duplicates(directory, min_size) + file_type_input = input("Enter the file extensions to check (comma-separated, e.g. .jpg,.png), or press Enter to check all: ") + file_extensions = [ext.strip().lower() for ext in file_type_input.split(",")] if file_type_input else None + + duplicates = find_duplicates(directory, min_size, file_extensions) if not duplicates: print("No duplicates found.") @@ -45,7 +58,7 @@ def main(): print(path) print("------") - action = input("\nChoose an action: (D)elete, (M)ove, (N)o action: ").lower() + action = input("\nChoose an action: (D)elete, (M)ove, (R)eport, (N)o action: ").lower() if action == "d": for _, paths in duplicates.items(): @@ -64,6 +77,10 @@ def main(): os.rename(path, target_path) print(f"Moved {path} to {target_path}") + elif action == "r": + report_path = input("Enter the path to save the report (e.g., duplicates_report.json): ") + generate_report(duplicates, report_path) + else: print("No action taken.")