In [None]:
# Imports
import sys
import os
from datetime import datetime
import pandas as pd

# Add utils directory to system path to allow imports
utils_path = os.path.abspath(os.path.join(os.getcwd(), '..', 'utils'))
if utils_path not in sys.path:
    sys.path.insert(0, utils_path)

# Import the function we want to test
from post_processing_utils import standardize_date
from date_extractor_utils import extract_relative_dates

Test Cases for Handler Functions

In [None]:
# Test Harness
def run_test(test_case):
    """
    Executes a single test case against the standardize_date function.
    """
    # standardize_date expects a pandas Series (like a DataFrame row)
    row = pd.Series({
        'date': test_case['input'],
        'date_type': 'relative',
        'document_timestamp': test_case['doc_date']
    })
    
    # Get the actual result from the function
    actual_date = standardize_date(row)
    
    # Define the expected result
    expected_date = pd.to_datetime(test_case['expected_date']) if test_case['expected_date'] else pd.NaT
    
    # Compare actual vs. expected and determine pass/fail
    if pd.isna(actual_date) and pd.isna(expected_date):
        result_status = "PASS"
    elif actual_date == expected_date:
        result_status = "PASS"
    else:
        result_status = f"FAIL (Expected: {expected_date}, Got: {actual_date})"
        
    print(f"Test '{test_case['name']}': {result_status}")
    return result_status == "PASS"

In [None]:
# A consistent document date for predictable results
doc_date = datetime(2023, 3, 31)

In [None]:
# Test Case Definitions
test_cases = [
    # --- Basic Time Units ---
    {'name': 'time_unit: last week', 'input': 'last week', 'doc_date': doc_date, 'expected_date': '2023-03-24'},
    {'name': 'common_no_today: yesterday', 'input': 'yesterday', 'doc_date': doc_date, 'expected_date': '2023-03-30'},
    
    # --- Numeric Relative Patterns ---
    {'name': 'numeric_relative: 3 days ago', 'input': '3 days ago', 'doc_date': doc_date, 'expected_date': '2023-03-28'},
    {'name': 'numeric_relative: 1 month ago (from Mar 31)', 'input': '1 month ago', 'doc_date': doc_date, 'expected_date': '2023-02-28'},
    {'name': 'numeric_relative: 1 year ago (from leap day)', 'input': '1 year ago', 'doc_date': datetime(2024, 2, 29), 'expected_date': '2023-02-28'},
    {'name': 'numeric_relative: number-word', 'input': 'two weeks ago', 'doc_date': doc_date, 'expected_date': '2023-03-17'},
    {'name': 'numeric_relative: decimal', 'input': '1.5 years ago', 'doc_date': datetime(2023, 12, 31), 'expected_date': '2022-06-30'},
    {'name': 'numeric_relative: qualitative (few)', 'input': 'a few months ago', 'doc_date': doc_date, 'expected_date': '2023-01-31'},

    # --- Clinical & Event-Based Patterns ---
    {'name': 'history_period: 10 year history', 'input': '10 year history', 'doc_date': doc_date, 'expected_date': '2013-03-31'},
    {'name': 'prior_to_event: prior to admission', 'input': 'prior to admission', 'doc_date': doc_date, 'expected_date': '2023-03-31'},
    {'name': 'preceding_period: preceding days', 'input': 'preceding days', 'doc_date': doc_date, 'expected_date': '2023-03-30'},

    # --- Year, Month, and Day Part Patterns ---
    {'name': 'since_year: since 2020', 'input': 'since 2020', 'doc_date': doc_date, 'expected_date': '2020-01-01'},
    {'name': 'since_month: since January 2023', 'input': 'since January 2023', 'doc_date': doc_date, 'expected_date': '2023-01-01'},
    {'name': 'month_last_year: September last year', 'input': 'September last year', 'doc_date': doc_date, 'expected_date': '2022-09-01'},
    {'name': 'part_of_day: last evening', 'input': 'last evening', 'doc_date': doc_date, 'expected_date': '2023-03-30'},

    # --- Range Patterns ---
    {'name': 'numeric_range_modified: 2-3 years ago', 'input': '2-3 years ago', 'doc_date': doc_date, 'expected_date': '2020-03-31'},

    # --- Edge Cases ---
    {'name': 'Edge Case: unsupported pattern', 'input': 'around noon', 'doc_date': doc_date, 'expected_date': None},
]

In [None]:
# Test Runner
print("--- Running Post-Processing Tests ---")
results = [run_test(case) for case in test_cases]
print("\n" + "="*30)
print("--- SUMMARY ---")
print(f"{sum(results)} / {len(results)} tests passed.")
print("="*30)

Visual Sanity Checks

In [None]:
# Define a consistent document date for predictable results
doc_date = datetime(2023, 10, 27)

In [None]:
# Define Test Cases
visual_test_cases = [
    {
        "name": "Simple Past Date",
        "text": "Patient reported symptoms started two weeks ago.",
    },
    {
        "name": "Month and Year",
        "text": "Last follow-up was in September last year.",
    },
    {
        "name": "Clinical History",
        "text": "There is a 10 year history of smoking.",
    },
    {
        "name": "Event-Based",
        "text": "Admitted for observation prior to surgery.",
    },
    {
        "name": "Multiple Dates",
        "text": "Patient was seen yesterday and a follow-up was scheduled for next week.",
    }
]

In [None]:
# Run and Display Tests
print("Running Visual Sanity Checks\n")

for case in visual_test_cases:
    print(f"Test Case: '{case['name']}'")
    print(f"Input Text: \"{case['text']}\"")
    print(f"Document Date: {doc_date.strftime('%Y-%m-%d')}")
    
    # Extract relative dates from the text
    extracted_dates = extract_relative_dates(case['text'])
    
    if not extracted_dates:
        print("  -> No relative dates extracted.")
    else:
        for date_info in extracted_dates:
            original_value = date_info['value']
            
            # Standardize the extracted date
            row = pd.Series({
                'date': original_value,
                'date_type': 'relative',
                'document_timestamp': doc_date
            })
            standardized_date = standardize_date(row)
            
            # Print the result
            std_date_str = standardized_date.strftime('%Y-%m-%d') if pd.notna(standardized_date) else "N/A"
            print(f"  -> Extracted: '{original_value}'  =>  Standardized: {std_date_str}")
            
    print("\n")