In [1]:
import pandas as pd
from pandas.tseries.offsets import DateOffset


def split_range_by_mondays(
    context: tuple[str, str], duration: str
) -> list[tuple[str, str]]:
    """
    Splits a date range into non-overlapping tuples, where each split
    starts and ends on a Monday and is at least a minimum duration long.

    Args:
        context: A tuple containing the start and end date strings in "YYYYMMDD" format.
        duration: The minimum duration for each split, e.g., '1d', '30d', '1m', '2m'.

    Returns:
        A list of non-overlapping ('YYYYMMDD', 'YYYYMMDD') tuples representing the splits.
    """
    # 1. Parse all inputs into pandas objects
    start_str, end_str = context
    start_date = pd.to_datetime(start_str, format="%Y%m%d")
    end_date = pd.to_datetime(end_str, format="%Y%m%d")

    num = int(duration[:-1])
    unit = duration[-1]

    if unit == "d":
        min_duration = pd.Timedelta(days=num)
    elif unit == "m":
        # Use DateOffset for month calculations to handle varying month lengths
        min_duration = DateOffset(months=num)
    else:
        raise ValueError("Invalid duration unit. Use 'd' for days or 'm' for months.")

    # 2. Determine the effective start and end Mondays for the entire period
    # Find the first Monday that is on or after the given start_date
    # weekday mapping: Monday=0, Tuesday=1, ..., Sunday=6
    days_to_first_monday = (0 - start_date.weekday() + 7) % 7
    current_monday = start_date + pd.Timedelta(days=days_to_first_monday)

    # Find the last Monday that is on or before the given end_date
    final_monday = end_date - pd.Timedelta(days=end_date.weekday())

    # If the first valid Monday is after the last, no splits are possible
    if current_monday > final_monday:
        return []

    # 3. Iteratively generate the splits
    splits = []
    while current_monday < final_monday:
        split_start = current_monday

        # Determine the earliest possible end date for this split
        target_end_date = split_start + min_duration

        # Find the first Monday on or after that target end date
        days_to_next_monday = (0 - target_end_date.weekday() + 7) % 7
        potential_split_end = target_end_date + pd.Timedelta(days=days_to_next_monday)

        # The split cannot go beyond the overall final Monday
        split_end = min(potential_split_end, final_monday)

        splits.append((split_start, split_end))

        # The start of the next split is the end of the current one
        current_monday = split_end

    # 4. Format the output tuples into "YYYYMMDD" strings
    return [(start.strftime("%Y%m%d"), end.strftime("%Y%m%d")) for start, end in splits]

In [2]:
# Example 1: Basic month-based split
context1 = ("20250115", "20250615")  # A Wednesday to a Sunday
duration1 = "2m"  # At least 2 months
splits1 = split_range_by_mondays(context1, duration1)
print(f"Example 1 Splits:\n{splits1}\n")
# Expected Output:
# The first Monday is 2025-01-20. The final Monday is 2025-06-09.
# Split 1: Start 2025-01-20. Target end: 2025-03-20 (Thu). Next Monday is 2025-03-24. Append ('20250120', '20250324').
# Split 2: Start 2025-03-24. Target end: 2025-05-24 (Sat). Next Monday is 2025-05-26. Append ('20250324', '20250526').
# Split 3: Start 2025-05-26. Target end: 2025-07-26. Next Monday is 2025-07-28. Capped at final_monday. Append ('20250526', '20250609').
# >> Example 1 Splits:
# >> [('20250120', '20250324'), ('20250324', '20250526'), ('20250526', '20250609')]


# Example 2: Day-based split where the last period is short
context2 = ("20250901", "20251021")  # A Monday to a Tuesday
duration2 = "30d"  # At least 30 days
splits2 = split_range_by_mondays(context2, duration2)
print(f"Example 2 Splits:\n{splits2}\n")
# Expected Output:
# First Monday: 2025-09-01. Final Monday: 2025-10-20.
# Split 1: Start 2025-09-01. Target end: 2025-10-01 (Wed). Next Monday is 2025-10-06. Append ('20250901', '20251006').
# Split 2: Start 2025-10-06. Target end: 2025-11-05 (Wed). Next Monday is 2025-11-10. Capped at final_monday. Append ('20251006', '20251020').
# >> Example 2 Splits:
# >> [('20250901', '20251006'), ('20251006', '20251020')]


# Example 3: No valid splits possible
context3 = ("20250902", "20250905")  # Tuesday to Friday
duration3 = "1d"
splits3 = split_range_by_mondays(context3, duration3)
print(f"Example 3 Splits:\n{splits3}\n")
# Expected Output:
# First Monday is 2025-09-08. Final Monday is 2025-09-01. First > Final, so empty list.
# >> Example 3 Splits:
# >> []

Example 1 Splits:
[('20250120', '20250324'), ('20250324', '20250526'), ('20250526', '20250609')]

Example 2 Splits:
[('20250901', '20251006'), ('20251006', '20251020')]

Example 3 Splits:
[]



In [None]:
# Example 1: Basic month-based split
context1 = ("20250115", "20250615")  # A Wednesday to a Sunday
duration1 = "2m"  # At least 2 months
splits1 = split_range_by_mondays(context1, duration1)
print(f"Example 1 Splits:\n{splits1}\n")
# Expected Output:
# The first Monday is 2025-01-20. The final Monday is 2025-06-09.
# Split 1: Start 2025-01-20. Target end: 2025-03-20 (Thu). Next Monday is 2025-03-24. Append ('20250120', '20250324').
# Split 2: Start 2025-03-24. Target end: 2025-05-24 (Sat). Next Monday is 2025-05-26. Append ('20250324', '20250526').
# Split 3: Start 2025-05-26. Target end: 2025-07-26. Next Monday is 2025-07-28. Capped at final_monday. Append ('20250526', '20250609').
# >> Example 1 Splits:
# >> [('20250120', '20250324'), ('20250324', '20250526'), ('20250526', '20250609')]


# Example 2: Day-based split where the last period is short
context2 = ("20250901", "20251021")  # A Monday to a Tuesday
duration2 = "30d"  # At least 30 days
splits2 = split_range_by_mondays(context2, duration2)
print(f"Example 2 Splits:\n{splits2}\n")
# Expected Output:
# First Monday: 2025-09-01. Final Monday: 2025-10-20.
# Split 1: Start 2025-09-01. Target end: 2025-10-01 (Wed). Next Monday is 2025-10-06. Append ('20250901', '20251006').
# Split 2: Start 2025-10-06. Target end: 2025-11-05 (Wed). Next Monday is 2025-11-10. Capped at final_monday. Append ('20251006', '20251020').
# >> Example 2 Splits:
# >> [('20250901', '20251006'), ('20251006', '20251020')]


# Example 3: No valid splits possible
context3 = ("20250902", "20250905")  # Tuesday to Friday
duration3 = "1d"
splits3 = split_range_by_mondays(context3, duration3)
print(f"Example 3 Splits:\n{splits3}\n")
# Expected Output:
# First Monday is 2025-09-08. Final Monday is 2025-09-01. First > Final, so empty list.
# >> Example 3 Splits:
# >> []