Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 14 additions & 1 deletion Duplicate Finder/Readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,4 +33,17 @@ Always backup your data before using scripts that modify files. The author is no


<!-- Updated README links and corrected typos -->
<!-- Updated README links and corrected typos -->
<!-- Updated README links and corrected typos -->


# KEY MODIFICATIONS

File Type Filtering:

Added an input prompt to specify file extensions for filtering.
Modified the find_duplicates function to only consider files with the specified extensions.

Generate Report:

Added a new generate_report function that creates a JSON report of duplicate files.
Added the option for the user to choose to generate a report instead of deleting or moving files.
25 changes: 21 additions & 4 deletions Duplicate Finder/duplicate-finder.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import os
import hashlib
import json # Import for generating reports

def get_file_hash(filepath):
"""Return the MD5 hash of a file."""
Expand All @@ -9,13 +10,16 @@ def get_file_hash(filepath):
hasher.update(buf)
return hasher.hexdigest()

def find_duplicates(directory, min_size=0):
"""Find duplicate files in a directory."""
def find_duplicates(directory, min_size=0, file_extensions=None):
"""Find duplicate files in a directory, with optional file type filtering."""
hashes = {}
duplicates = {}

for dirpath, dirnames, filenames in os.walk(directory):
for filename in filenames:
if file_extensions and not filename.lower().endswith(tuple(file_extensions)):
continue # Skip files that don't match the extensions

filepath = os.path.join(dirpath, filename)
if os.path.getsize(filepath) >= min_size:
file_hash = get_file_hash(filepath)
Expand All @@ -29,11 +33,20 @@ def find_duplicates(directory, min_size=0):

return {k: v for k, v in duplicates.items() if len(v) > 1}

def generate_report(duplicates, report_path):
"""Generate a report of duplicate files in JSON format."""
with open(report_path, 'w') as report_file:
json.dump(duplicates, report_file, indent=4)
print(f"Report generated: {report_path}")

def main():
directory = input("Enter the directory to scan for duplicates: ")
min_size = int(input("Enter the minimum file size to consider (in bytes, default is 0): ") or "0")

duplicates = find_duplicates(directory, min_size)
file_type_input = input("Enter the file extensions to check (comma-separated, e.g. .jpg,.png), or press Enter to check all: ")
file_extensions = [ext.strip().lower() for ext in file_type_input.split(",")] if file_type_input else None

duplicates = find_duplicates(directory, min_size, file_extensions)

if not duplicates:
print("No duplicates found.")
Expand All @@ -45,7 +58,7 @@ def main():
print(path)
print("------")

action = input("\nChoose an action: (D)elete, (M)ove, (N)o action: ").lower()
action = input("\nChoose an action: (D)elete, (M)ove, (R)eport, (N)o action: ").lower()

if action == "d":
for _, paths in duplicates.items():
Expand All @@ -64,6 +77,10 @@ def main():
os.rename(path, target_path)
print(f"Moved {path} to {target_path}")

elif action == "r":
report_path = input("Enter the path to save the report (e.g., duplicates_report.json): ")
generate_report(duplicates, report_path)

else:
print("No action taken.")

Expand Down