In [1]:
import re

def extract_dates(text):
    date_re = [
        # 1. Fixed Dates
        r'\b(?:Monday|Tuesday|Wednesday|Thursday|Friday|Saturday|Sunday)?,?(?:January|February|March|April|May|June|July|August|September|October|November|December|'
        r'Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Sept|Oct|Nov|Dec)\.? (?:0?[1-9]|[12][0-9]|3[01])(?:st|nd|rd|th)?,? (?:[1-9][0-9]{3})\b',

        r'\b(?:Monday|Tuesday|Wednesday|Thursday|Friday|Saturday|Sunday),? (?:January|February|March|April|May|June|July|August|September|October|November|December|'
        r'Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Sept|Oct|Nov|Dec)\.? (?:0?[1-9]|[12][0-9]|3[01])(?:st|nd|rd|th)?,? (?:[1-9][0-9]{3})\b',
        
        # MM/DD/YYYY
        r'\b\d{1,2}/\d{1,2}/(?:[1-9][0-9]{3})\b',

        # Only years greater than or equal to 1000
        r'\b(?:[1-9][0-9]{3})\b',

        # "the 13th of May, 2014"
        r'\bthe (?:0?[1-9]|[12][0-9]|3[01])(?:st|nd|rd|th)? of (?:January|February|March|April|May|June|July|August|September|October|November|December|Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Sept|Oct|Nov|Dec)(?:, (?:[1-9][0-9]{3}))?\b',

        # "May 1st"
        r'\b(?:January|February|March|April|May|June|July|August|September|October|November|December|Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Sept|Oct|Nov|Dec)(?: (?:0?[1-9]|[12][0-9]|3[01])(?:st|nd|rd|th))?\b',

        # weekdays with optional ordinal dates (Monday the 23rd)
        r'\b(?:Monday|Tuesday|Wednesday|Thursday|Friday|Saturday|Sunday),?\s*(?:the\s*(?:0?[1-9]|[12][0-9]|3[01])(?:st|nd|rd|th)?)\b',

        # weekdays with optional time (Monday, 2pm)
        r'\b(?:Monday|Tuesday|Wednesday|Thursday|Friday|Saturday|Sunday),?\s*(?:[1-12](?:am|pm))\b',

        # weekdays with general time of day (Monday afternoon)
        r'\b(?:Monday|Tuesday|Wednesday|Thursday|Friday|Saturday|Sunday)\s*(?:morning|afternoon|evening|night)\b',

        # Only the day of the week
        r'\b(?:Monday|Tuesday|Wednesday|Thursday|Friday|Saturday|Sunday)\b',
        
        # 2. Holidays (US Federal Holidays)
        r'\b(New Year\'s Day|Inauguration Day|Martin Luther King\, Jr\. Day|George Washington\'s Birthday|Memorial Day|Juneteenth|Independence Day|Labor Day|Columbus Day|Veterans Day|Thanksgiving Day|Christmas Day)\b'
    ]

    matches = []
    for pattern in date_re:
        matches.extend(re.findall(pattern, text, re.IGNORECASE))

    return matches

# def extract_longest_valid_dates(text):
#     matches = extract_dates(text)
    
#     if matches:
#         # Find the longest match length
#         max_len = max(len(match) for match in matches)
#         # Return all matches that have the same longest length
#         return [match for match in matches if len(match) == max_len]
#     return []

# Read input file line by line
with open("../Data/Input/input.txt", "r", encoding="utf-8") as input_file:
    lines = input_file.readlines()

# Extract dates from each line
all_dates = []
for line in lines:
    dates = extract_dates(line)
    all_dates.extend(dates)

# Write output file line by line
with open("../Data/Output/output.txt", "w", encoding="utf-8") as output_file:
    for date in all_dates:
        output_file.write(date + "\n")

print("Program ran. Results saved in output.txt.")

Program ran. Results saved in output.txt.
