In [1]:
#Imports
import pandas as pd
import sys
import os
import json
import torch
from transformers import AutoTokenizer, AutoModelForTokenClassification
from tqdm import tqdm

# Add utils to path
utils_path = os.path.abspath(os.path.join(os.getcwd(), '..', 'utils'))
if utils_path not in sys.path:
    sys.path.insert(0, utils_path)

from date_extractor_utils import clean_value, extract_absolute_dates, normalise_relative, extract_relative_dates, add_relative_dates
from general_utils import load_data
from naive_extractor_utils import naive_extraction
from bert_relative_date_utils import predict_relative_dates

Test Absolute Dates

In [2]:
# Test on simple example
test_text = "Patient was seen on 15/06/2025 for follow-up. Next appointment scheduled for January 4th, 2026."
absolute_dates = extract_absolute_dates(test_text)
print("\nSimple test:")
print(f"Text: {test_text}")
print("Results:")
for date in absolute_dates:
    print(f"  '{date['value']}' -> (start: {date['start']}, end: {date['end']})")


Simple test:
Text: Patient was seen on 15/06/2025 for follow-up. Next appointment scheduled for January 4th, 2026.
Results:
  '15/06/2025' -> (start: 20, end: 30)


In [3]:
# Test on comprehensive examples
test_text = """
Various date formats:
1. Standard formats:
   - 15/06/2025
   - 2025-06-15
   - 15-06-2025
   
2. Month name formats:
   - June 15, 2025
   - 15 June 2025
   - Jun 15, 2025
   
3. Mixed in text:
   The patient was seen on 15/06/2025 and had a follow-up on June 15, 2025.
   Next appointment scheduled for January 4th, 2026.
"""

print("\nComprehensive test:")
print(f"Text: {test_text}")
absolute_dates = extract_absolute_dates(test_text)
print("\nResults:")
for date in absolute_dates:
    print(f"  '{date['value']}' -> (start: {date['start']}, end: {date['end']})")


Comprehensive test:
Text: 
Various date formats:
1. Standard formats:
   - 15/06/2025
   - 2025-06-15
   - 15-06-2025
   
2. Month name formats:
   - June 15, 2025
   - 15 June 2025
   - Jun 15, 2025
   
3. Mixed in text:
   The patient was seen on 15/06/2025 and had a follow-up on June 15, 2025.
   Next appointment scheduled for January 4th, 2026.


Results:
  '15/06/2025' -> (start: 49, end: 59)
  '2025-06' -> (start: 65, end: 72)
  '15-06-2025' -> (start: 81, end: 91)
  'June 15' -> (start: 124, end: 131)
  'June 2025' -> (start: 146, end: 155)
  'Jun 15' -> (start: 161, end: 167)
  '15/06/2025' -> (start: 223, end: 233)
  'June 15' -> (start: 257, end: 264)


In [4]:
# Test edge cases
edge_cases = [
    "",  # Empty string
    "No dates here",  # No dates
    "Invalid dates: 35/13/2025, 00/00/0000",  # Invalid dates
    "Partial dates: June 2025, 2025",  # Partial dates
]

print("\nTesting edge cases:")
for text in edge_cases:
    dates = extract_absolute_dates(text)
    print(f"\nText: '{text}'")
    print(f"Found {len(dates)} dates:")
    for date in dates:
        print(f"  '{date['value']}' -> (start: {date['start']}, end: {date['end']})")


Testing edge cases:

Text: ''
Found 0 dates:

Text: 'No dates here'
Found 0 dates:

Text: 'Invalid dates: 35/13/2025, 00/00/0000'
Found 0 dates:

Text: 'Partial dates: June 2025, 2025'
Found 1 dates:
  'June 2025' -> (start: 15, end: 24)


In [5]:
# Test on actual dataset sample
print("\nTesting on dataset sample:")
df = pd.read_csv("../data/data.csv")
sample_text = df.iloc[0]['note_text']
print(f"Text: {sample_text[:200]}...")
dates = extract_absolute_dates(sample_text)
print(f"\nFound {len(dates)} dates:")
for date in dates:
    print(f"  '{date['value']}' -> (start: {date['start']}, end: {date['end']})")


Testing on dataset sample:
Text: Ultrasound (30nd Jun 2024): no significant findings.imp: asthma

She denies any nausea, vomiting, or diarrhea.
C Patient reports compliance with current medication regimen. Basic metabolic panel withi...

Found 5 dates:
  'Jun 2024' -> (start: 17, end: 25)
  'Aug 2024' -> (start: 317, end: 325)
  'Sep 2024' -> (start: 368, end: 376)
  'Oct 2024' -> (start: 593, end: 601)
  'Nov 2024' -> (start: 1210, end: 1218)


Test Relative Dates

In [6]:
#Set relative date method - can be bert or regex
relative_date_method = 'bert'

In [7]:
# Model load path
if relative_date_method == 'bert':
    model_load_path = '../models/bert_model_relative_dates/'
else:
    model_load_path = None

In [8]:
# Load fine-tuned relative date extractor (if using BERT)
if relative_date_method == 'bert':
    tokenizer_rel = AutoTokenizer.from_pretrained(model_load_path)
    model_rel = AutoModelForTokenClassification.from_pretrained(model_load_path)
    model_rel.eval()
    print("Relative date model loaded successfully!")
else:
    print("Using regex-based relative date extraction.")

Relative date model loaded successfully!


In [9]:
# Test on one example
test_text = "Patient was seen last week for follow-up. Next appointment scheduled for tomorrow. Symptoms started 3 days ago."
relative_dates = extract_relative_dates(test_text)
relative_dates

[{'id': 'rel_1',
  'value': 'tomorrow',
  'start': 73,
  'end': 81,
  'pattern_type': 'common'},
 {'id': 'rel_2',
  'value': 'last week',
  'start': 17,
  'end': 26,
  'pattern_type': 'time_unit'},
 {'id': 'rel_3',
  'value': '3 days ago',
  'start': 100,
  'end': 110,
  'pattern_type': 'numeric_relative'},
 {'id': 'rel_4',
  'value': '3 days',
  'start': 100,
  'end': 106,
  'pattern_type': 'numeric_simple'}]

In [10]:
# Test on comprehensive examples to verify all pattern types
test_text = """
Patient was seen last week for follow-up. 
Next appointment scheduled for tomorrow. 
Symptoms started 3 days ago.
Last visit was on Monday.
Previous checkup was 2 weeks earlier.
Past few days have been difficult.
Several months ago the condition worsened.
Earlier this week the patient improved.
Last visit was productive.
Next few days will be critical.
"""

print("Testing comprehensive relative date extraction:")
print(f"Text: {test_text.strip()}")
print("\nResults:")

results = extract_relative_dates(test_text)
for result in results:
    print(f"  '{result['value']}' -> (pattern: {result['pattern_type']})")

print(f"\nTotal patterns found: {len(results)}")

Testing comprehensive relative date extraction:
Text: Patient was seen last week for follow-up. 
Next appointment scheduled for tomorrow. 
Symptoms started 3 days ago.
Last visit was on Monday.
Previous checkup was 2 weeks earlier.
Past few days have been difficult.
Several months ago the condition worsened.
Earlier this week the patient improved.
Last visit was productive.
Next few days will be critical.

Results:
  'tomorrow' -> (pattern: common)
  'last week' -> (pattern: time_unit)
  'this week' -> (pattern: time_unit)
  '3 days ago' -> (pattern: numeric_relative)
  '2 weeks earlier' -> (pattern: numeric_relative)
  'Several months ago' -> (pattern: numeric_relative)
  '3 days' -> (pattern: numeric_simple)
  '2 weeks' -> (pattern: numeric_simple)
  'few days' -> (pattern: numeric_simple)
  'Several months' -> (pattern: numeric_simple)
  'Past few days' -> (pattern: past_future_range)

Total patterns found: 11


In [11]:
# Load main dataset using existing load_data function
df = load_data("../data/training_dataset_synthetic2.csv")

print(f"Main dataset: {df.shape}")

Main dataset: (50, 6)


In [12]:
# Add relative dates using the simplified function
df = add_relative_dates(df)

print(f"Added relative_dates_json column")
print(f"Final dataset shape: {df.shape}")

Added relative_dates_json column
Final dataset shape: (50, 6)


In [13]:
# Count how many rows have relative dates
has_relative_dates = df['relative_dates_json'].apply(lambda x: x != '[]')
print(f"Rows with relative dates: {has_relative_dates.sum()}")
print(f"Total relative dates found: {sum(len(json.loads(rd)) for rd in df['relative_dates_json'])}")

Rows with relative dates: 50
Total relative dates found: 82


In [14]:
# Get rows that have relative dates
rows_with_relative_dates = df[has_relative_dates]

print(f"\nExamining {len(rows_with_relative_dates)} rows with relative dates:")

# Show detailed results for each row with relative dates
for i, (idx, row) in enumerate(rows_with_relative_dates.iterrows()):
    print(f"\n--- Row {i+1} (Index {idx}) ---")
    print(f"Text: {row['note_text'][:200]}...")
    
    # Parse and display relative dates
    relative_dates = json.loads(row['relative_dates_json'])
    print(f"Found {len(relative_dates)} relative dates:")
    for rd in relative_dates:
        print(f"  '{rd['value']}' -> (pattern: {rd['pattern_type']})")
    
    # Only show first 5 rows to avoid too much output
    if i >= 4:
        remaining = len(rows_with_relative_dates) - 5
        if remaining > 0:
            print(f"\n... and {remaining} more rows with relative dates")
        break


Examining 50 rows with relative dates:

--- Row 1 (Index 0) ---
Text: Ultrasound (30nd Jun 2024): no significant findings.imp: asthma

CT (02nd Aug 2024): reveals asthma.imp: asthma

X-ray (12nd Sep 2024): shows 3.1cm mass in brain.imp: pituitary_adenoma

CLINIC VISIT (...
Found 1 relative dates:
  'last year' -> (pattern: time_unit)

--- Row 2 (Index 1) ---
Text: Labs (27th Sep 2024): anemia. resolving multiple_sclerosis

Phone note((22/11/24)): relapse. taper medication

URGENT REVIEW (26/12/24): weight loss. likely bronchitis

URGENT REVIEW (26 Jan 2025): ch...
Found 2 relative dates:
  '2 weeks ago' -> (pattern: numeric_relative)
  '2 weeks' -> (pattern: numeric_simple)

--- Row 3 (Index 2) ---
Text: URGENT REVIEW (2024-10-04): cough. suspect osteoarthritis

New patient((10nd Nov 2024)): pt presents w/ cough. rule out bronchitis

PET scan (21-11-2024): evidence of hemorrhage. imp: GERD

Visit((12n...
Found 1 relative dates:
  'yesterday' -> (pattern: common)

--- Row 4 (Index 3) -

In [15]:
# Get the row with relative dates
row_with_relative = df[has_relative_dates].iloc[0]
print("Testing naive extractor with relative dates:")
print(f"Text: {row_with_relative['note_text'][:200]}...")

# Get entities and dates
entities = row_with_relative['entities_json']
absolute_dates = row_with_relative['dates_json']
relative_dates = json.loads(row_with_relative['relative_dates_json'])

print(f"\nEntities: {len(entities)}")
for i, entity in enumerate(entities[:3]):  # Show first 3
    print(f"  {i+1}. {entity['value']} (pos: {entity['start']})")

print(f"\nAbsolute dates: {len(absolute_dates)}")
for i, date in enumerate(absolute_dates[:3]):  # Show first 3
    print(f"  {i+1}. {date['value']} (pos: {date['start']})")

print(f"\nRelative dates: {len(relative_dates)}")
for i, date in enumerate(relative_dates):
    print(f"  {i+1}. {date['value']} (pos: {date['start']})")

Testing naive extractor with relative dates:
Text: Ultrasound (30nd Jun 2024): no significant findings.imp: asthma

CT (02nd Aug 2024): reveals asthma.imp: asthma

X-ray (12nd Sep 2024): shows 3.1cm mass in brain.imp: pituitary_adenoma

CLINIC VISIT (...

Entities: 15
  1. Ultrasound (pos: 0)
  2. asthma (pos: 57)
  3. CT (pos: 65)

Absolute dates: 7
  1. 30nd Jun 2024 (pos: 12)
  2. 02nd Aug 2024 (pos: 69)
  3. 12nd Sep 2024 (pos: 120)

Relative dates: 1
  1. last year (pos: 508)


In [16]:
# Test naive extraction with absolute dates only
print("=== NAIVE EXTRACTION WITH ABSOLUTE DATES ONLY ===")
relationships_absolute = naive_extraction(entities, absolute_dates, max_distance=400)
print(f"Found {len(relationships_absolute)} relationships:")
for rel in relationships_absolute:
    print(f"  {rel['entity_label']} -> {rel['date']} (distance: {rel['distance']})")

=== NAIVE EXTRACTION WITH ABSOLUTE DATES ONLY ===
Found 15 relationships:
  Ultrasound -> 30nd Jun 2024 (distance: 12)
  asthma -> 02nd Aug 2024 (distance: 12)
  CT -> 02nd Aug 2024 (distance: 4)
  asthma -> 12nd Sep 2024 (distance: 15)
  X-ray -> 12nd Sep 2024 (distance: 7)
  pituitary_adenoma -> 16 Sep'24 (distance: 33)
  nausea/vomiting -> 16 Sep'24 (distance: 12)
  nausea -> 16 Sep'24 (distance: 12)
  vomiting -> 16 Sep'24 (distance: 19)
  rheumatoid_arthritis -> 16 Sep'24 (distance: 48)
  aspirin -> 23rd Oct 2024 (distance: 24)
  headache -> 23rd Oct 2024 (distance: 16)
  pneumonia -> 16st Nov 2024 (distance: 16)
  pituitary_adenoma -> 16st Nov 2024 (distance: 34)
  gerd -> 17.12.24 (distance: 29)


In [17]:
# Test naive extraction with relative dates only
print("=== NAIVE EXTRACTION WITH RELATIVE DATES ONLY ===")
relationships_relative = naive_extraction(entities, relative_dates, max_distance=400)
print(f"Found {len(relationships_relative)} relationships:")
for rel in relationships_relative:
    print(f"  {rel['entity_label']} -> {rel['date']} (distance: {rel['distance']})")

=== NAIVE EXTRACTION WITH RELATIVE DATES ONLY ===
Found 11 relationships:
  X-ray -> last year (distance: 395)
  pituitary_adenoma -> last year (distance: 341)
  nausea/vomiting -> last year (distance: 296)
  nausea -> last year (distance: 296)
  vomiting -> last year (distance: 289)
  rheumatoid_arthritis -> last year (distance: 260)
  aspirin -> last year (distance: 229)
  headache -> last year (distance: 189)
  pneumonia -> last year (distance: 163)
  pituitary_adenoma -> last year (distance: 113)
  gerd -> last year (distance: 37)


In [18]:
# Test naive extraction with combined dates
print("=== NAIVE EXTRACTION WITH COMBINED DATES ===")
all_dates = absolute_dates + relative_dates
print(f"Total dates: {len(all_dates)} (absolute: {len(absolute_dates)}, relative: {len(relative_dates)})")

relationships_combined = naive_extraction(entities, all_dates, max_distance=400)
print(f"Found {len(relationships_combined)} relationships:")
for rel in relationships_combined:
    print(f"  {rel['entity_label']} -> {rel['date']} (distance: {rel['distance']})")

=== NAIVE EXTRACTION WITH COMBINED DATES ===
Total dates: 8 (absolute: 7, relative: 1)
Found 15 relationships:
  Ultrasound -> 30nd Jun 2024 (distance: 12)
  asthma -> 02nd Aug 2024 (distance: 12)
  CT -> 02nd Aug 2024 (distance: 4)
  asthma -> 12nd Sep 2024 (distance: 15)
  X-ray -> 12nd Sep 2024 (distance: 7)
  pituitary_adenoma -> 16 Sep'24 (distance: 33)
  nausea/vomiting -> 16 Sep'24 (distance: 12)
  nausea -> 16 Sep'24 (distance: 12)
  vomiting -> 16 Sep'24 (distance: 19)
  rheumatoid_arthritis -> 16 Sep'24 (distance: 48)
  aspirin -> 23rd Oct 2024 (distance: 24)
  headache -> 23rd Oct 2024 (distance: 16)
  pneumonia -> 16st Nov 2024 (distance: 16)
  pituitary_adenoma -> 16st Nov 2024 (distance: 34)
  gerd -> 17.12.24 (distance: 29)


In [19]:
# Debug - show all entity-date distances
print("=== ENTITY-DATE DISTANCE ANALYSIS ===")
for entity in entities:
    print(f"\nEntity: {entity['value']} (pos: {entity['start']})")
    print("Distances to dates:")
    
    for date in all_dates:
        distance = abs(entity['start'] - date['start'])
        date_type = "absolute" if date in absolute_dates else "relative"
        print(f"  {date['value']} ({date_type}): distance = {distance}")

=== ENTITY-DATE DISTANCE ANALYSIS ===

Entity: Ultrasound (pos: 0)
Distances to dates:
  30nd Jun 2024 (absolute): distance = 12
  02nd Aug 2024 (absolute): distance = 69
  12nd Sep 2024 (absolute): distance = 120
  16 Sep'24 (absolute): distance = 200
  23rd Oct 2024 (absolute): distance = 303
  16st Nov 2024 (absolute): distance = 361
  17.12.24 (absolute): distance = 442
  last year (relative): distance = 508

Entity: asthma (pos: 57)
Distances to dates:
  30nd Jun 2024 (absolute): distance = 45
  02nd Aug 2024 (absolute): distance = 12
  12nd Sep 2024 (absolute): distance = 63
  16 Sep'24 (absolute): distance = 143
  23rd Oct 2024 (absolute): distance = 246
  16st Nov 2024 (absolute): distance = 304
  17.12.24 (absolute): distance = 385
  last year (relative): distance = 451

Entity: CT (pos: 65)
Distances to dates:
  30nd Jun 2024 (absolute): distance = 53
  02nd Aug 2024 (absolute): distance = 4
  12nd Sep 2024 (absolute): distance = 55
  16 Sep'24 (absolute): distance = 135
  23

Test Against Labelled Training Data

In [20]:
# Load training dataset using existing load_data function
df = load_data("../data/training_dataset.csv")

print(f"Main dataset: {df.shape}")

Main dataset: (119, 6)


In [21]:
# --- Initialize counters ---
abs_results = []
rel_results = []

# --- Loop through each note ---
for _, row in df.iterrows():
    text = row['note_text']

    validated_abs = row.get('dates_json', []) or []
    validated_rel = row.get('relative_dates_json', []) or []

    if isinstance(validated_rel, str):
        try:
            validated_rel = json.loads(validated_rel)
        except json.JSONDecodeError:
            validated_rel = []

    # --- Normalise gold values ---
    gold_abs_values = {clean_value(d['value']) for d in validated_abs if isinstance(d, dict) and d.get('value')}
    gold_rel_values = {normalise_relative(d['value']) for d in validated_rel if isinstance(d, dict) and d.get('value')}

    # --- Generate predictions ---
    pred_abs = extract_absolute_dates(text)

    if relative_date_method == 'bert':
        pred_rel = predict_relative_dates(text, model_rel, tokenizer_rel)
    elif relative_date_method == 'regex':
        pred_rel = extract_relative_dates(text)
    else:
        raise ValueError(f"Invalid method: {relative_date_method}. Must be either 'bert' or 'regex'.")

    pred_rel = extract_relative_dates(text)

    # --- Normalise predicted values ---
    pred_abs_values = {clean_value(p['value']) for p in pred_abs if isinstance(p, dict) and p.get('value')}
    pred_rel_values = {normalise_relative(p['value']) for p in pred_rel if isinstance(p, dict) and p.get('value')}

    # --- Compare sets using same normalisation ---
    tp_abs = len(pred_abs_values & gold_abs_values)
    fp_abs = len(pred_abs_values - gold_abs_values)
    fn_abs = len(gold_abs_values - pred_abs_values)

    tp_rel = len(pred_rel_values & gold_rel_values)
    fp_rel = len(pred_rel_values - gold_rel_values)
    fn_rel = len(gold_rel_values - pred_rel_values)

    abs_results.append((tp_abs, fp_abs, fn_abs))
    rel_results.append((tp_rel, fp_rel, fn_rel))

In [22]:
# --- Compute metrics ---
def compute_metrics(results):
    tp = sum(r[0] for r in results)
    fp = sum(r[1] for r in results)
    fn = sum(r[2] for r in results)
    precision = tp / (tp + fp) if (tp + fp) > 0 else 0
    recall = tp / (tp + fn) if (tp + fn) > 0 else 0
    f1 = 2 * precision * recall / (precision + recall) if (precision + recall) > 0 else 0
    return precision, recall, f1, tp, fp, fn

In [23]:
# --- Print results ---
abs_precision, abs_recall, abs_f1, tp_abs, fp_abs, fn_abs = compute_metrics(abs_results)
rel_precision, rel_recall, rel_f1, tp_rel, fp_rel, fn_rel = compute_metrics(rel_results)

print("=== Absolute Dates ===")
print(f"TP={tp_abs}, FP={fp_abs}, FN={fn_abs}")
print(f"Precision={abs_precision:.3f}, Recall={abs_recall:.3f}, F1={abs_f1:.3f}")

print("\n=== Relative Dates ===")
print(f"TP={tp_rel}, FP={fp_rel}, FN={fn_rel}")
print(f"Precision={rel_precision:.3f}, Recall={rel_recall:.3f}, F1={rel_f1:.3f}")

=== Absolute Dates ===
TP=292, FP=19, FN=174
Precision=0.939, Recall=0.627, F1=0.752

=== Relative Dates ===
TP=179, FP=182, FN=116
Precision=0.496, Recall=0.607, F1=0.546


In [24]:
# --- Debug absolute dates (first 10 rows) ---
for _, row in df.head(10).iterrows():
    text = row['note_text']

    abs_data = row.get('dates_json')
    if isinstance(abs_data, str):
        try:
            abs_data = json.loads(abs_data)
        except json.JSONDecodeError:
            abs_data = []

    # Extract validated and predicted values
    gold_abs = {d['value'].strip() for d in abs_data if isinstance(d, dict) and 'value' in d}
    pred_abs = {p['value'].strip() for p in extract_absolute_dates(text) if isinstance(p, dict) and 'value' in p}

    # Only show rows where something exists
    if gold_abs or pred_abs:
        print(f"\nDoc {row.get('doc_id', 'N/A')}")
        print("Validated absolute:", gold_abs)
        print("Predicted absolute:", pred_abs)
        print("Overlap:", gold_abs & pred_abs)
        print("-" * 80)


Doc 26342
Validated absolute: {'10/09/19', 'Dec 2018', '10/09/2019', '11/9/2019'}
Predicted absolute: {'11/9/2019', '10/09/19', 'Nov 2018', '5/6/19', '10/09/2019', 'Dec 2018'}
Overlap: {'10/09/19', 'Dec 2018', '10/09/2019', '11/9/2019'}
--------------------------------------------------------------------------------

Doc 26343
Validated absolute: {'7/5/2019', '05/09/19', '11/5/2019', '11/05/19', '05/11/19', '05/10/19', '10/05/19'}
Predicted absolute: {'7/5/2019', '05/09/19', '11/5/2019', '11/05/19', '05/11/19', '05/10/19', '10/05/19'}
Overlap: {'7/5/2019', '05/09/19', '11/5/2019', '11/05/19', '05/11/19', '05/10/19', '10/05/19'}
--------------------------------------------------------------------------------

Doc 26344
Validated absolute: {'20/05/2020', '1/1/2011', '15/05/2020', '2012'}
Predicted absolute: {'July 2012', '20/05/2020', '1/1/2011', '15/05/2020'}
Overlap: {'1/1/2011', '20/05/2020', '15/05/2020'}
------------------------------------------------------------------------------

In [25]:
# --- Debug relative date extraction ---
for _, row in df.head(10).iterrows():
    text = row['note_text']

    # Load gold relative dates safely
    rel_data = row.get('relative_dates_json')
    if isinstance(rel_data, str):
        try:
            rel_data = json.loads(rel_data)
        except json.JSONDecodeError:
            rel_data = []
    elif not isinstance(rel_data, list):
        rel_data = []

    # --- Raw and normalised sets ---
    gold_raw = {d['value'] for d in rel_data if isinstance(d, dict) and 'value' in d}
    pred_raw = {p['value'] for p in extract_relative_dates(text)}

    gold_norm = {normalise_relative(v) for v in gold_raw}
    pred_norm = {normalise_relative(v) for v in pred_raw}

    # --- Only print docs that have something to inspect ---
    if gold_raw or pred_raw:
        print(f"\nDoc {row.get('doc_id', 'N/A')}")
        print("Validated relative (raw):", gold_raw)
        print("Predicted relative (raw):", pred_raw)
        print("Overlap (raw):", gold_raw & pred_raw)
        print("Overlap (normalised):", gold_norm & pred_norm)


Doc 26342
Validated relative (raw): {'today', ' start of 2018', '2/7', ' last 3 months', 'last few month'}
Predicted relative (raw): {'today', 'tomorrow', 'start of 2018', '3 months', 'currently', 'few month'}
Overlap (raw): {'today'}
Overlap (normalised): {'today', 'start 2018'}

Doc 26343
Validated relative (raw): {'Today', 'last night'}
Predicted relative (raw): {'Last 24 hours', 'Today', '4 Days', 'last night'}
Overlap (raw): {'Today', 'last night'}
Overlap (normalised): {'today', 'past night'}

Doc 26344
Validated relative (raw): {'July 2012'}
Predicted relative (raw): set()
Overlap (raw): set()
Overlap (normalised): set()

Doc 26345
Validated relative (raw): {'today', '6 years prior', 'last night', ' last 24 hours', 'Today', ' 8 years', 'last 7 days', '10 year history'}
Predicted relative (raw): {'6 years prior', 'last night', '7 days', 'Today', 'Last 24 hours', '6 years', '8 years', '10 year history'}
Overlap (raw): {'6 years prior', 'Today', 'last night', '10 year history'}
Ov