### Downloading Required Libraries

In [8]:
!pip install PyPDF2 pdfplumber pandas openpyxl



### Importing Required Libraries

In [9]:
import pdfplumber
import pandas as pd
import openpyxl
from openpyxl.styles import Font

### Define Functions

In [10]:
# Function to extract text from the PDF
def extract_text_from_pdf(pdf_path):
    text = ""
    with pdfplumber.open(pdf_path) as pdf:
        for page in pdf.pages:
            text += page.extract_text()
    return text

# Function to parse the extracted text into a DataFrame
def parse_pdf_text(text):
    lines = text.split('\n')
    data = []
    for line in lines:
        columns = line.split()  
        data.append(columns)
    df = pd.DataFrame(data)
    return df

# Function to write the DataFrame to an Excel file
def write_to_excel(df, output_path):
    df.to_excel(output_path, index=False)

# Function to customize the Excel file 
def customize_excel(output_path):
    wb = openpyxl.load_workbook(output_path)
    ws = wb.active
    
    # Example: Apply a bold font to the first row (header)
    for cell in ws[1]:
        cell.font = Font(bold=True)
    
    wb.save(output_path)

### Process the PDF and Generate the Excel File

In [11]:
# Specify the paths
pdf_path = "Voter List.pdf" 
output_path = "Output.xlsx"  

# Extract, parse, and save the data
pdf_text = extract_text_from_pdf(pdf_path)
parsed_data = parse_pdf_text(pdf_text)
write_to_excel(parsed_data, output_path)
customize_excel(output_path)

print(f"Data extracted and saved to {output_path}")


Data extracted and saved to Output.xlsx
