In [13]:
import pdfplumber
import pandas as pd
import re

# Open the PDF
with pdfplumber.open('carifta_2025_results.pdf') as pdf:
    text = ''
    for page in pdf.pages:
        text += page.extract_text() + '\n'

# Split into event sections
event_blocks = re.split(r'(?=Event \d+)', text)

event_dfs = []

for block in event_blocks:
    # Find the event header
    match = re.search(r'Event (\d+)\s+(.+)', block)
    if not match:
        continue
    event_number, event_title = match.groups()
    
    # Find swimmer lines
    swimmer_lines = re.findall(r'([A-Za-z &\.\'\-]+)\s+(\d+)([A-Za-z \-\&]+)\s+([\d:\.]+)\s+([\d:\.]+)', block)
    
    # Build a DataFrame
    df = pd.DataFrame(swimmer_lines, columns=['Team', 'Age', 'Name', 'Finals Time', 'Seed Time'])
    df['EVENT'] = event_title
    df['EVENT_NUM'] = event_number
    event_text = event_title.split(' ')
    df["GENDER"] = event_text[0]
    df["AGE_GROUP"] = event_text[1]
    df["DISTANCE"] = event_text[2]
    df["STROKE"] = event_text[5]
    df['DISTANCEXSTROKE'] = event_text[2] + event_text[5]
    df["AGE_GROUPXGENGER"] = event_text[1] + event_text[0]
    event_dfs.append(df)

results_df = pd.concat(event_dfs)
print(results_df)

results_df.to_csv('carifta_2025_results.csv')

# # Example: see the first event
# for event, df in event_dfs.items():
#     print(f"\n--- {event} ---")
#     print(df.head())
#     break


                                Team Age                         Name  \
0                             Reagan  13      Virgin Islands Swimming   
1                             Marena  13            Trinidad & Tobago   
2                               Zara  13            Trinidad & Tobago   
3                             Dndn F  14                      Bahamas   
4                             Lailah  13           Barbados Swim Team   
..                               ...  ..                          ...   
0                      Preliminaries   1                    Jamaica A   
0   Team Relay Seed Time Prelim Time  10          Cayman Islands-ZZ A   
0                              Riley  16   Virgin Islands Swimming NT   
0                              Riley  16      Virgin Islands Swimming   
1                              Malia  15                   Martinique   

   Finals Time Seed Time                                       EVENT  \
0      9:38.89   9:22.27          Girls 13-14 800 L