In [13]:
import os
import shutil
import multiprocessing

# Source and target directories
source_folder = r"/mnt/n/New folder"
target_folder = "/mnt/c/Users/Zy565/Downloads/EXHD/dataNEOUROlift/random"

# Allowed file extensions (caption/subtitle files)
cc_extensions = {".txt", ".vtt", ".srt", ".sub", ".ass", ".dfxp", ".sbv"}

# Function to process and copy a file
def process_file(file_info):
    file_path, relative_path = file_info  # Unpack tuple
    target_path = os.path.join(target_folder, relative_path)  # Recreate original structure
    
    os.makedirs(os.path.dirname(target_path), exist_ok=True)  # Ensure parent folders exist
    try:
        shutil.copy2(file_path, target_path)  # Copy file
        print(f"Copied: {file_path} → {target_path}")
    except Exception as e:
        print(f"Error copying {file_path}: {e}")

if __name__ == "__main__":
    # Collect all matching files
    files_to_copy = []
    for root, _, files in os.walk(source_folder):
        for file in files:
            if os.path.splitext(file)[1].lower() in cc_extensions:  # Check file extension
                full_path = os.path.join(root, file)
                relative_path = os.path.relpath(full_path, source_folder)  # Preserve structure
                files_to_copy.append((full_path, relative_path))  # Store (full_path, relative_path)

    # Use multiprocessing to speed up the copying process
    num_workers = min(multiprocessing.cpu_count(), len(files_to_copy))  # Optimize worker count
    with multiprocessing.Pool(processes=num_workers) as pool:
        pool.map(process_file, files_to_copy)


Copied: /mnt/n/New folder/Assignment Question_1.txt → /mnt/c/Users/Zy565/Downloads/EXHD/dataNEOUROlift/random/Assignment Question_1.txtCopied: /mnt/n/New folder/BigDataMardeen2.txt → /mnt/c/Users/Zy565/Downloads/EXHD/dataNEOUROlift/random/BigDataMardeen2.txt

Copied: /mnt/n/New folder/sol.txt → /mnt/c/Users/Zy565/Downloads/EXHD/dataNEOUROlift/random/sol.txt
Copied: /mnt/n/New folder/json Flattener.txt → /mnt/c/Users/Zy565/Downloads/EXHD/dataNEOUROlift/random/json Flattener.txt
Copied: /mnt/n/New folder/spark-log.txt → /mnt/c/Users/Zy565/Downloads/EXHD/dataNEOUROlift/random/spark-log.txtCopied: /mnt/n/New folder/NIFTY_F1.txt → /mnt/c/Users/Zy565/Downloads/EXHD/dataNEOUROlift/random/NIFTY_F1.txt

Copied: /mnt/n/New folder/README.txt → /mnt/c/Users/Zy565/Downloads/EXHD/dataNEOUROlift/random/README.txt
Copied: /mnt/n/New folder/0. Introduction/01. Leverage generative AI for analytics and insights.srt → /mnt/c/Users/Zy565/Downloads/EXHD/dataNEOUROlift/random/0. Introduction/01. Leverage gen

In [12]:
import os
import shutil
import concurrent.futures

# Source folder
source_folder = r"/mnt/n/New folder"

# Target folders
target_subtitles = "/mnt/c/Users/Zy565/Downloads/EXHD/dataNEOUROlift/random"
target_pdfs = "/mnt/c/Users/Zy565/Downloads/EXHD/dataNEOUROlift/random/pdf"

# Ensure target directories exist
os.makedirs(target_subtitles, exist_ok=True)
os.makedirs(target_pdfs, exist_ok=True)

# File extensions to search for
subtitle_extensions = {".srt", ".vtt", ".txt", ".sub", ".ass", ".ssa"}  # Common subtitle file types
pdf_extension = ".pdf"

def copy_file(file_path, dest_folder, parent_structure=False):
    """Copy file to destination, optionally preserving parent folder structure."""
    try:
        if parent_structure:
            relative_path = os.path.relpath(os.path.dirname(file_path), source_folder)
            target_path = os.path.join(dest_folder, relative_path)
            os.makedirs(target_path, exist_ok=True)
        else:
            target_path = dest_folder
        
        shutil.copy(file_path, target_path)
        print(f"Copied: {file_path} -> {target_path}")
    except Exception as e:
        print(f"Error copying {file_path}: {e}")

def process_file(file_path):
    """Determine if a file should be copied and where."""
    _, ext = os.path.splitext(file_path)
    ext = ext.lower()
    
    if ext in subtitle_extensions:
        copy_file(file_path, target_subtitles, parent_structure=True)
    elif ext == pdf_extension:
        copy_file(file_path, target_pdfs, parent_structure=False)

def scan_and_copy():
    """Scan all files in the source folder and copy them accordingly."""
    with concurrent.futures.ThreadPoolExecutor(max_workers=20) as executor:
        for root, _, files in os.walk(source_folder):
            for file in files:
                file_path = os.path.join(root, file)
                executor.submit(process_file, file_path)

if __name__ == "__main__":
    scan_and_copy()


Copied: /mnt/n/New folder/Assignment Question_1.txt -> /mnt/c/Users/Zy565/Downloads/EXHD/dataNEOUROlift/random/.
Copied: /mnt/n/New folder/BigDataMardeen2.txt -> /mnt/c/Users/Zy565/Downloads/EXHD/dataNEOUROlift/random/.
Copied: /mnt/n/New folder/README.txt -> /mnt/c/Users/Zy565/Downloads/EXHD/dataNEOUROlift/random/.
Copied: /mnt/n/New folder/sol.txt -> /mnt/c/Users/Zy565/Downloads/EXHD/dataNEOUROlift/random/.
Copied: /mnt/n/New folder/NIFTY_F1.txt -> /mnt/c/Users/Zy565/Downloads/EXHD/dataNEOUROlift/random/.
Copied: /mnt/n/New folder/json Flattener.txt -> /mnt/c/Users/Zy565/Downloads/EXHD/dataNEOUROlift/random/.
Copied: /mnt/n/New folder/spark-log.txt -> /mnt/c/Users/Zy565/Downloads/EXHD/dataNEOUROlift/random/.
Copied: /mnt/n/New folder/0. Introduction/01. Leverage generative AI for analytics and insights.srt -> /mnt/c/Users/Zy565/Downloads/EXHD/dataNEOUROlift/random/0. Introduction
Copied: /mnt/n/New folder/0. Introduction/03. How to use the challenge exercise files.srt -> /mnt/c/Users