# Import Libraries

In [None]:
import os
import pdfkit
import img2pdf
from PyPDF2 import PdfMerger, PdfReader, PdfWriter
from natsort import natsorted


from reportlab.pdfgen import canvas
from reportlab.lib.pagesizes import A4
from reportlab.lib.units import mm

# Combining Images from a Folder into an A4-sized PDF

This tool is primarily used for combining multiple images from a folder into a single PDF, while automatically resizing each page to A4 format.

In [None]:
# Function to convert text or HTML files to PDF with A4 page size
def convert_text_to_pdf(file_path, output_pdf):
    options = {
        'page-size': 'A4',
        'margin-top': '0.75in',
        'margin-right': '0.75in',
        'margin-bottom': '0.75in',
        'margin-left': '0.75in'
    }
    pdfkit.from_file(file_path, output_pdf, options=options)

# Function to convert image files to PDF with A4 page size
def convert_images_to_pdf(image_files, output_pdf):
    a4_size = (img2pdf.mm_to_pt(210), img2pdf.mm_to_pt(297))  # A4 size in mm (210x297 mm)
    layout_fun = img2pdf.get_layout_fun(a4_size)
    
    with open(output_pdf, "wb") as f:
        f.write(img2pdf.convert(image_files, layout_fun=layout_fun))

# Function to merge all PDFs in a folder into one
def merge_pdfs_in_folder(pdf_files, output_pdf):
    merger = PdfMerger()
    for pdf in pdf_files:
        merger.append(pdf)
    merger.write(output_pdf)
    merger.close()

# Main function to convert all folders
def convert_folders_to_pdfs(base_folder):
    for folder_name in natsorted(os.listdir(base_folder)):  # Natural sorting of folder names
        folder_path = os.path.join(base_folder, folder_name)
        if os.path.isdir(folder_path):  # Ensure it's a folder
            print(f"Processing folder: {folder_name}")
            output_pdf = os.path.join(base_folder, f"{folder_name}.pdf")
            
            # Collecting files within the folder
            pdf_files = []
            image_files = []
            for file_name in natsorted(os.listdir(folder_path)):  # Natural sorting of file names
                file_path = os.path.join(folder_path, file_name)
                if file_name.endswith(('.txt', '.html')):
                    # Convert text/html to PDF
                    text_pdf = os.path.join(folder_path, file_name.split('.')[0] + '.pdf')
                    convert_text_to_pdf(file_path, text_pdf)
                    pdf_files.append(text_pdf)
                elif file_name.endswith(('.png', '.jpg', '.jpeg')):
                    # Collect image files for conversion
                    image_files.append(file_path)
                elif file_name.endswith('.pdf'):
                    # Add existing PDFs to merge
                    pdf_files.append(file_path)
            
            # If there are image files, convert them to a PDF
            if image_files:
                image_pdf = os.path.join(folder_path, "images_output.pdf")
                convert_images_to_pdf(image_files, image_pdf)
                pdf_files.append(image_pdf)

            # Merge all PDFs in the folder into one
            if pdf_files:
                merge_pdfs_in_folder(pdf_files, output_pdf)
            else:
                print(f"No convertible files found in {folder_name}")

# Specify the base folder where the individual folders are located
base_folder = 'path'
convert_folders_to_pdfs(base_folder)


# Save Resized PDFs in the Same Folder:

This tool resizes multiple PDFs to A4 size and saves them in the same folder.

In [None]:
# Function to check if a PDF is A4 size
def is_a4_size(pdf_path):
    reader = PdfReader(pdf_path)
    first_page = reader.pages[0]
    
    # Get the size of the first page (convert Decimal to float)
    width = float(first_page.mediabox.width)
    height = float(first_page.mediabox.height)

    # A4 size in points (1 point = 1/72 inch)
    a4_width, a4_height = A4
    
    # Allow a small margin of error
    margin = 2  # points
    
    return abs(width - a4_width) <= margin and abs(height - a4_height) <= margin

# Function to resize a PDF to A4 size and preserve content without cropping
def resize_to_a4(pdf_path, output_path):
    reader = PdfReader(pdf_path)
    writer = PdfWriter()
    
    a4_width, a4_height = A4  # A4 dimensions

    for page_num in range(len(reader.pages)):
        page = reader.pages[page_num]

        # Original page dimensions
        orig_width = float(page.mediabox.width)
        orig_height = float(page.mediabox.height)
        
        # Calculate the scale factors for width and height
        scale_x = a4_width / orig_width
        scale_y = a4_height / orig_height
        
        # Use the smaller scale to fit the page while preserving the aspect ratio
        scale = min(scale_x, scale_y)

        # Apply the scaling
        page.scale_by(scale)

        # Adjust the page media box to match A4 size
        page.mediabox.upper_right = (a4_width, a4_height)

        # Add the resized page to the writer
        writer.add_page(page)

    # Write the output to the new PDF
    with open(output_path, "wb") as out_file:
        writer.write(out_file)

# Main function to check and resize PDFs in a folder
def process_pdfs_in_folder(folder_path):
    for file_name in os.listdir(folder_path):
        if file_name.endswith('.pdf'):
            pdf_path = os.path.join(folder_path, file_name)
            output_path = os.path.join(folder_path, f"resized_{file_name}")
            
            if is_a4_size(pdf_path):
                print(f"{file_name} is already A4 size. Skipping.")
            else:
                print(f"{file_name} is not A4 size. Resizing.")
                resize_to_a4(pdf_path, output_path)

# Specify the folder containing PDFs
folder_path = 'input_folder'
process_pdfs_in_folder(folder_path)


# Save Resized PDFs in a Different Folder:

This tool resizes multiple PDFs to A4 size and saves them in another folder.

In [None]:
import os
from PyPDF2 import PdfReader, PdfWriter
from reportlab.lib.pagesizes import A4
from PyPDF2.errors import PdfReadError  # Import PdfReadError for handling corrupt PDFs

# Function to check if a PDF is A4 size
def is_a4_size(pdf_path):
    try:
        reader = PdfReader(pdf_path)
        first_page = reader.pages[0]
        
        # Get the size of the first page (convert Decimal to float)
        width = float(first_page.mediabox.width)
        height = float(first_page.mediabox.height)

        # A4 size in points (1 point = 1/72 inch)
        a4_width, a4_height = A4
        
        # Allow a small margin of error
        margin = 2  # points
        
        return abs(width - a4_width) <= margin and abs(height - a4_height) <= margin
    except PdfReadError:
        print(f"Error reading {pdf_path}: File may be corrupted or incomplete.")
        return False

# Function to resize a PDF to A4 size and preserve content without cropping
def resize_to_a4(pdf_path, output_path):
    try:
        reader = PdfReader(pdf_path)
        writer = PdfWriter()
        
        a4_width, a4_height = A4  # A4 dimensions

        for page_num in range(len(reader.pages)):
            page = reader.pages[page_num]

            # Original page dimensions
            orig_width = float(page.mediabox.width)
            orig_height = float(page.mediabox.height)
            
            # Calculate the scale factors for width and height
            scale_x = a4_width / orig_width
            scale_y = a4_height / orig_height
            
            # Use the smaller scale to fit the page while preserving the aspect ratio
            scale = min(scale_x, scale_y)

            # Apply the scaling
            page.scale_by(scale)

            # Adjust the page media box to match A4 size
            page.mediabox.upper_right = (a4_width, a4_height)

            # Add the resized page to the writer
            writer.add_page(page)

        # Write the output to the new PDF
        with open(output_path, "wb") as out_file:
            writer.write(out_file)

    except PdfReadError:
        print(f"Error reading {pdf_path}: Skipping file due to corruption or incomplete data.")

# Main function to check and resize PDFs in a folder, saving them to a different folder
def process_pdfs_in_folder(input_folder, output_folder):
    # Create the output folder if it doesn't exist
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)
    
    for file_name in os.listdir(input_folder):
        if file_name.endswith('.pdf'):
            pdf_path = os.path.join(input_folder, file_name)
            output_path = os.path.join(output_folder, file_name)  # Save with the same filename in the new folder
            
            if is_a4_size(pdf_path):
                print(f"{file_name} is already A4 size. Skipping.")
            else:
                print(f"{file_name} is not A4 size. Resizing.")
                resize_to_a4(pdf_path, output_path)

# Specify the folder containing PDFs and the folder to save resized PDFs
input_folder = 'input_folder'
output_folder = 'output_folder'
process_pdfs_in_folder(input_folder, output_folder)
