From a11e91a7ea5ecad859b16df10b2f1c341ec33354 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C2=96=C2=96Stonebanks-js?= <aradhyachdry@outlook.com>
Date: Sat, 12 Oct 2024 23:54:17 +0530
Subject: [PATCH] feat: Add file type filtering and report generation features
 to duplicate finder

---
 Duplicate Finder/Readme.md           | 15 ++++++++++++++-
 Duplicate Finder/duplicate-finder.py | 25 +++++++++++++++++++++----
 2 files changed, 35 insertions(+), 5 deletions(-)

diff --git a/Duplicate Finder/Readme.md b/Duplicate Finder/Readme.md
index 14055ffb..aae927f7 100644
--- a/Duplicate Finder/Readme.md	
+++ b/Duplicate Finder/Readme.md	
@@ -33,4 +33,17 @@ Always backup your data before using scripts that modify files. The author is no
 
 
 <!-- Updated README links and corrected typos -->
-<!-- Updated README links and corrected typos -->
\ No newline at end of file
+<!-- Updated README links and corrected typos -->
+
+
+# KEY MODIFICATIONS 
+
+File Type Filtering:
+
+Added an input prompt to specify file extensions for filtering.
+Modified the find_duplicates function to only consider files with the specified extensions.
+
+Generate Report:
+
+Added a new generate_report function that creates a JSON report of duplicate files.
+Added the option for the user to choose to generate a report instead of deleting or moving files.
\ No newline at end of file
diff --git a/Duplicate Finder/duplicate-finder.py b/Duplicate Finder/duplicate-finder.py
index 47d7bb7e..f36fa390 100644
--- a/Duplicate Finder/duplicate-finder.py	
+++ b/Duplicate Finder/duplicate-finder.py	
@@ -1,5 +1,6 @@
 import os
 import hashlib
+import json  # Import for generating reports
 
 def get_file_hash(filepath):
     """Return the MD5 hash of a file."""
@@ -9,13 +10,16 @@ def get_file_hash(filepath):
         hasher.update(buf)
     return hasher.hexdigest()
 
-def find_duplicates(directory, min_size=0):
-    """Find duplicate files in a directory."""
+def find_duplicates(directory, min_size=0, file_extensions=None):
+    """Find duplicate files in a directory, with optional file type filtering."""
     hashes = {}
     duplicates = {}
 
     for dirpath, dirnames, filenames in os.walk(directory):
         for filename in filenames:
+            if file_extensions and not filename.lower().endswith(tuple(file_extensions)):
+                continue  # Skip files that don't match the extensions
+
             filepath = os.path.join(dirpath, filename)
             if os.path.getsize(filepath) >= min_size:
                 file_hash = get_file_hash(filepath)
@@ -29,11 +33,20 @@ def find_duplicates(directory, min_size=0):
 
     return {k: v for k, v in duplicates.items() if len(v) > 1}
 
+def generate_report(duplicates, report_path):
+    """Generate a report of duplicate files in JSON format."""
+    with open(report_path, 'w') as report_file:
+        json.dump(duplicates, report_file, indent=4)
+    print(f"Report generated: {report_path}")
+
 def main():
     directory = input("Enter the directory to scan for duplicates: ")
     min_size = int(input("Enter the minimum file size to consider (in bytes, default is 0): ") or "0")
 
-    duplicates = find_duplicates(directory, min_size)
+    file_type_input = input("Enter the file extensions to check (comma-separated, e.g. .jpg,.png), or press Enter to check all: ")
+    file_extensions = [ext.strip().lower() for ext in file_type_input.split(",")] if file_type_input else None
+
+    duplicates = find_duplicates(directory, min_size, file_extensions)
 
     if not duplicates:
         print("No duplicates found.")
@@ -45,7 +58,7 @@ def main():
             print(path)
         print("------")
 
-    action = input("\nChoose an action: (D)elete, (M)ove, (N)o action: ").lower()
+    action = input("\nChoose an action: (D)elete, (M)ove, (R)eport, (N)o action: ").lower()
 
     if action == "d":
         for _, paths in duplicates.items():
@@ -64,6 +77,10 @@ def main():
                 os.rename(path, target_path)
                 print(f"Moved {path} to {target_path}")
 
+    elif action == "r":
+        report_path = input("Enter the path to save the report (e.g., duplicates_report.json): ")
+        generate_report(duplicates, report_path)
+
     else:
         print("No action taken.")