In [15]:
import os
import shutil
import multiprocessing

# Root directory to search in
root_folder = "/mnt/c/Users/Zy565/Downloads/EXHD/dataNEOUROlift/BOOKS"
# Destination folder for SQL-related PDFs
sql_folder = os.path.join(root_folder, "SQL")

# Ensure the destination folder exists
os.makedirs(sql_folder, exist_ok=True)

# Function to process a single PDF file
def process_pdf(file_info):
    file_path, filename = file_info  # Unpack tuple

    # Check if "sql" (case insensitive) is in the filename
    if "nosql" in filename.lower():
        destination_path = os.path.join(sql_folder, filename)
        try:
            shutil.move(file_path, destination_path)
            print(f"Moved: {filename} → {sql_folder}")
        except Exception as e:
            print(f"Error moving {filename}: {e}")

if __name__ == "__main__":
    # Collect all PDF files recursively
    pdf_files = []
    for folder, _, files in os.walk(root_folder):
        for file in files:
            if file.lower().endswith(".pdf"):  # Ensure it's a PDF
                pdf_files.append((os.path.join(folder, file), file))  # Store (full_path, filename)
    
    # Use multiprocessing to speed up the process
    num_workers = min(multiprocessing.cpu_count(), len(pdf_files))  # Use optimal number of workers
    with multiprocessing.Pool(processes=num_workers) as pool:
        pool.map(process_pdf, pdf_files)


Moved: Java_Persistence_with_NoSQL.pdf → /mnt/c/Users/Zy565/Downloads/EXHD/dataNEOUROlift/BOOKS/SQL
Moved: NoSQL.pdf → /mnt/c/Users/Zy565/Downloads/EXHD/dataNEOUROlift/BOOKS/SQL
Moved: NoSQL_and_SQL_Data_Modeling.pdf → /mnt/c/Users/Zy565/Downloads/EXHD/dataNEOUROlift/BOOKS/SQL
Moved: NoSQL_Databases.pdf → /mnt/c/Users/Zy565/Downloads/EXHD/dataNEOUROlift/BOOKS/SQL
Moved: SQL_and_NoSQL_Interview_Questions__2023_.pdf → /mnt/c/Users/Zy565/Downloads/EXHD/dataNEOUROlift/BOOKS/SQL


In [16]:
import os
import shutil
import multiprocessing

# Root directory to search in
root_folder = "/mnt/c/Users/Zy565/Downloads/EXHD/dataNEOUROlift/BOOKS"

# Keywords and destination folders
categories = {
    "Computer_Science": ["computer science", "cs", "algorithms", "data structures"],
    "Java": ["java"],
    "Python": ["python"],
    "C++": ["c++"],
    "Network_Security": ["network", "security", "cybersecurity"],
}

# Ensure all destination folders exist
for folder in categories.keys():
    os.makedirs(os.path.join(root_folder, folder), exist_ok=True)

# Function to process a single PDF file
def process_pdf(file_info):
    file_path, filename = file_info  # Unpack tuple
    lower_filename = filename.lower()  # Convert filename to lowercase for case-insensitive matching

    # Check which category the file belongs to
    for category, keywords in categories.items():
        if any(keyword in lower_filename for keyword in keywords):  # Match any keyword
            destination_path = os.path.join(root_folder, category, filename)
            try:
                shutil.move(file_path, destination_path)
                print(f"Moved: {filename} → {category}/")
            except Exception as e:
                print(f"Error moving {filename}: {e}")
            return  # Stop after first match

if __name__ == "__main__":
    # Collect all PDF files recursively
    pdf_files = []
    for folder, _, files in os.walk(root_folder):
        for file in files:
            if file.lower().endswith(".pdf"):  # Ensure it's a PDF
                pdf_files.append((os.path.join(folder, file), file))  # Store (full_path, filename)
    
    # Use multiprocessing to speed up the process
    num_workers = min(multiprocessing.cpu_count(), len(pdf_files))  # Use optimal number of workers
    with multiprocessing.Pool(processes=num_workers) as pool:
        pool.map(process_pdf, pdf_files)


Moved: 07_Partioning_and_collecting_algorithms.pdf → Computer_Science/
Moved: 117_Probability_Statistics_Final_Exam_So.pdf → Computer_Science/
Moved: Cheat_Sheets_for_AI__Neural_Networks__Ma.pdf → Network_Security/
Moved: 001_Course_notes_inferential_statistics.pdf → Computer_Science/
Moved: CS3304_9_LanguageSyntax_2.pdf → Computer_Science/
Moved: intermediate_python_ch1_slides.pdf → Python/
Moved: A_guide_to_Face_Detection_in_Python___To.pdf → Python/
Moved: Build_an_Email_Spider_with_Python.pdf → Python/
Moved: Face_Detection_with_Python.pdf → Python/
Moved: Learning_Python__From_Zero_to_Hero___fre.pdf → Python/
Moved: Epic_Python_Coding.pdf → Python/
Moved: 25_most_popular_Python_scripts_for_netwo.pdf → Python/Moved: 099_Test_statistics_for_one_and_two_tail.pdf → Computer_Science/

Moved: 140_python_exercises.pdf → Python/
Moved: Comprehensive_Python_Cheatsheet.pdf → Python/
Moved: 002_Course_notes_inferential_statistics.pdf → Computer_Science/
Moved: CS3352.pdf → Computer_Science/
