In [None]:
import os
import subprocess
from tqdm import tqdm

def convert_file_to_pptx(file_path, output_directory):
    """Convert a presentation file to .pptx format using LibreOffice."""
    # Use subprocess to run the LibreOffice conversion command
    libreoffice_path = "/Applications/LibreOffice.app/Contents/MacOS/soffice"
    subprocess.run([libreoffice_path, "--headless", "--convert-to", "pptx", "--outdir", output_directory, file_path])

def convert_all_presentations_in_directory(input_directory, output_directory):
    """Convert all supported presentation files in a directory to .pptx format."""
    # Define the list of file extensions to convert
    supported_extensions = (".ppt", ".pps", ".ppsx", ".pptm", ".pot")

    # Ensure the output directory exists
    os.makedirs(output_directory, exist_ok=True)

    # Get a list of files already converted in the output directory
    existing_files = set(os.listdir(output_directory))

    # Get all the presentation files in the input directory
    files_to_convert = [
        filename for filename in os.listdir(input_directory)
        if filename.endswith(supported_extensions)
    ]

    # Iterate through the files with a progress bar
    for filename in tqdm(files_to_convert, desc="Converting presentations"):
        file_path = os.path.join(input_directory, filename)
        output_file = f"{os.path.splitext(filename)[0]}.pptx"
        
        if output_file in existing_files:
            print(f"'{output_file}' already exists in the output directory. Skipping conversion.")
            continue
        
        print(f"Converting '{file_path}' to .pptx...")
        convert_file_to_pptx(file_path, output_directory)

# Replace these paths with your actual directories
input_directory = "/Users/tyrionhuu/Downloads/ppt"
output_directory = "/Users/tyrionhuu/Downloads/pptx_dataset"

convert_all_presentations_in_directory(input_directory, output_directory)