In [None]:
import data_functions
from viz_handler import plot_metric_comparison, plot_histogram, plot_comparison_with_difference
import os
from natsort import natsorted
from IPython.display import display, Markdown # type: ignore
import ipywidgets as widgets
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import pandas as pd # type: ignore
import json
# Optional: Reload data_functions if making changes during development
# import importlib
# importlib.reload(data_functions) # Use this in code cell to reload the module

# Configure pandas display
pd.set_option('display.max_columns', None)

# Configure plotting style (optional)
sns.set_theme(style="whitegrid")

# --- Configuration: Define Project Paths ---
# You can change BASE_PROJECT_DIR if your data/results aren't relative to the notebook
BASE_PROJECT_DIR = '.' # Assumes seqs, summary, results are subdirs of the notebook's dir or a linked dir

# Define specific directories relative to the base
SEQS_DIR = os.path.join(BASE_PROJECT_DIR, 'seqs')
SUMMARY_DIR = os.path.join(BASE_PROJECT_DIR, 'summary')
RESULTS_DIR = os.path.join(BASE_PROJECT_DIR, 'results')

# Create results directory if it doesn't exist
os.makedirs(RESULTS_DIR, exist_ok=True)

print(f"Using Sequences Directory: {os.path.abspath(SEQS_DIR)}")
print(f"Using Summary Directory:   {os.path.abspath(SUMMARY_DIR)}")
print(f"Using Results Directory:   {os.path.abspath(RESULTS_DIR)}")


## Test loading sequences

In [None]:
test_run_id = 'OMDL1' # Replace with a valid run ID from your data
test_basecaller = 'dorado' # Replace 'dorado' or 'guppy' as available

loaded_sequences = data_functions.load_sequences(test_run_id, test_basecaller, SEQS_DIR)

if loaded_sequences is not None:
    print(f"Successfully loaded data for {test_run_id} {test_basecaller}.")
    print(f"Found data for {len(loaded_sequences)} unique sample IDs.")

    # Example: Inspect data for one sample ID (replace 'OMDLxxxxx' with a real ID)
    example_sample_id = list(loaded_sequences.keys())[0] # Get the first sample ID found
    print(f"\nData for sample ID '{example_sample_id}':")
    # Pretty print the list of sequence dictionaries for this sample
    print(json.dumps(loaded_sequences[example_sample_id], indent=2, default=str)) # Use default=str to handle SeqRecord object if present

    # Verify structure of one sequence entry
    first_seq_data = loaded_sequences[example_sample_id][0]
    print("\nStructure of one sequence entry:")
    print(f"  Header: {first_seq_data.get('header')[:50]}...") # Show first 50 chars
    print(f"  Length: {first_seq_data.get('length')}")
    print(f"  RiC: {first_seq_data.get('ric')}")
    print(f"  Sequence snippet: {first_seq_data.get('sequence')[:50]}...") # Show first 50 chars
else:
    print(f"Failed to load data for {test_run_id} {test_basecaller}. Check file path and format.")

## Test K-mer matching

In [None]:
seq_a = "ATGCGATGCGATGCG"
seq_b = "ATGCGATGCGATGCG" # Identical
seq_c = "ATGCGATTCGATGCG" # One mismatch
seq_d = "AAAAAAAAAAAAAAA" # Different
seq_e = "ATGCG"             # Too short for k=7
seq_f = ""                # Empty

k_val = 7
print(f"Similarity A vs B (k={k_val}): {data_functions.calculate_kmer_similarity(seq_a, seq_b, k=k_val):.2f}%")
print(f"Similarity A vs C (k={k_val}): {data_functions.calculate_kmer_similarity(seq_a, seq_c, k=k_val):.2f}%") # Test mismatch 22.22%
print(f"Similarity B vs A (k={k_val}): {data_functions.calculate_kmer_similarity(seq_b, seq_a, k=k_val):.2f}%") # Should be symmetric? Test.
print(f"Similarity A vs D (k={k_val}): {data_functions.calculate_kmer_similarity(seq_a, seq_d, k=k_val):.2f}%") # Test different sequence 0.00%
print(f"Similarity A vs E (k={k_val}): {data_functions.calculate_kmer_similarity(seq_a, seq_e, k=k_val):.2f}%") # Test too short sequence 0.00%
print(f"Similarity A vs F (k={k_val}): {data_functions.calculate_kmer_similarity(seq_a, seq_f, k=k_val):.2f}%") # Test empty sequence 0.00%

k_val = 3
print(f"\nSimilarity A vs C (k={k_val}): {data_functions.calculate_kmer_similarity(seq_a, seq_c, k=k_val):.2f}%") # Test smaller k, 76.92%
print(f"Similarity A vs E (k={k_val}): {data_functions.calculate_kmer_similarity(seq_a, seq_e, k=k_val):.2f}%") # Should work now

## Test Global Alignments

In [None]:
seq_a = "ATGCGATGCGATGCG"
seq_b = "ATGCGATGCGATGCG" # Identical
seq_c = "ATGCGATTCGATGCG" # One mismatch
seq_d = "AAAAAAAAAAAAAAA" # Different
seq_indel = "ATGCGATG---ATGCG" # Example with deletion relative to A

align_ab = data_functions.align_sequences(seq_a, seq_b)
align_ac = data_functions.align_sequences(seq_a, seq_c)
align_ad = data_functions.align_sequences(seq_a, seq_d)
align_a_indel = data_functions.align_sequences(seq_a, seq_indel)

print("Alignment A vs B:")
print(json.dumps(align_ab, indent=2, default=str)) # Use default=str to handle alignment obj if needed

print("\nAlignment A vs C:")
print(json.dumps(align_ac, indent=2, default=str))

print("\nAlignment A vs D:")
print(json.dumps(align_ad, indent=2, default=str))

print("\nAlignment A vs Indel:")
print(json.dumps(align_a_indel, indent=2, default=str))

# Test empty sequence
align_a_empty = data_functions.align_sequences(seq_a, "")
print("\nAlignment A vs Empty:")
print(align_a_empty)

## Sequence Matching Logic

In [None]:
# --- Define Test Sequences ---
seq_a = "ATGCGATGCGATGCG"     # Base sequence
seq_b = "ATGCGATGCGATGCG"     # Identical to A
seq_c = "ATGCGATTCGATGCG"     # One mismatch vs A
seq_d = "AAAAAAAAAAAAAAA"     # Very different from A
seq_indel = "ATGCGATG---ATGCG"  # Contains gaps (Note: align_sequences takes raw seqs, not pre-aligned)
seq_e = "ATGCG"                 # Short sequence
seq_f = ""                    # Empty sequence
# Create sequences similar to D and C for testing many:many and ambiguous cases
seq_d_like = "AAAAAAAAAAAAAAC" # Similar to D
seq_c_prime = "ATGCGATTCAATGCG" # Similar to C (two mismatches vs A)

# --- Helper Function to Create Sequence Records ---
# Mimics the structure produced by load_sequences
def create_record(seq_id: str, sequence: str, ric: int, source: str, sample: str, rep_num: int = 1):
    """Creates a dictionary representing a sequence record."""
    # Create a somewhat realistic header based on inputs
    header = f">ONT01.01-{sample}-{seq_id}-iNat0000{rep_num} ric={ric}"
    return {
        'header': header,
        'sequence': sequence,
        'length': len(sequence),
        'ric': ric,
        'seq_object': None # Placeholder, not needed for matching logic testing
    }

# --- Create Mock Dorado Sequences Dictionary ---
mock_dorado_seqs = {
    # Scenario S1: Simple 1:1 High Identity
    'S1': [create_record('D_S1_1', seq_a, 100, 'dorado', 'S1')],
    # Scenario S2: Simple 1:1 Lower Identity
    'S2': [create_record('D_S2_1', seq_a, 90, 'dorado', 'S2')],
    # Scenario S3: Unmatched Pair
    'S3': [create_record('D_S3_1', seq_a, 80, 'dorado', 'S3')],
    # Scenario S4: Dorado Only Sample
    'S4': [create_record('D_S4_1', seq_a, 70, 'dorado', 'S4')],
    # Scenario S6: 1 Dorado, 2 Guppy
    'S6': [create_record('D_S6_1', seq_a, 110, 'dorado', 'S6')],
    # Scenario S7: 2 Dorado, 2 Guppy (Clear matches)
    'S7': [
        create_record('D_S7_1', seq_a, 120, 'dorado', 'S7', rep_num=1),
        create_record('D_S7_2', seq_d, 50, 'dorado', 'S7', rep_num=2)
    ],
    # Scenario S8: 1 Dorado, 2 Guppy (Ambiguous matches)
    'S8': [create_record('D_S8_1', seq_a, 130, 'dorado', 'S8')],
}

# --- Create Mock Guppy Sequences Dictionary ---
mock_guppy_seqs = {
    # Scenario S1: Simple 1:1 High Identity
    'S1': [create_record('G_S1_1', seq_b, 95, 'guppy', 'S1')],
    # Scenario S2: Simple 1:1 Lower Identity
    'S2': [create_record('G_S2_1', seq_c, 85, 'guppy', 'S2')],
    # Scenario S3: Unmatched Pair
    'S3': [create_record('G_S3_1', seq_d, 75, 'guppy', 'S3')],
    # Scenario S5: Guppy Only Sample
    'S5': [create_record('G_S5_1', seq_a, 65, 'guppy', 'S5')],
    # Scenario S6: 1 Dorado, 2 Guppy
    'S6': [
        create_record('G_S6_1', seq_b, 105, 'guppy', 'S6', rep_num=1), # Should match D_S6_1 well
        create_record('G_S6_2', seq_c, 45, 'guppy', 'S6', rep_num=2)  # Should match D_S6_1 less well
    ],
    # Scenario S7: 2 Dorado, 2 Guppy (Clear matches)
    'S7': [
        create_record('G_S7_1', seq_b, 115, 'guppy', 'S7', rep_num=1), # Should match D_S7_1 (A)
        create_record('G_S7_2', seq_d_like, 55, 'guppy', 'S7', rep_num=2) # Should match D_S7_2 (D)
    ],
    # Scenario S8: 1 Dorado, 2 Guppy (Ambiguous matches)
    'S8': [
        create_record('G_S8_1', seq_c, 125, 'guppy', 'S8', rep_num=1),       # Similar match to D_S8_1 (A)
        create_record('G_S8_2', seq_c_prime, 110, 'guppy', 'S8', rep_num=2) # Also similar match to D_S8_1 (A)
    ],
}

print("Mock data dictionaries created: mock_dorado_seqs, mock_guppy_seqs")
# Print a sample entry to verify structure
example_sample_id = 'S7'
print(f"\nExample entry for {example_sample_id} in mock_dorado_seqs:")
print(json.dumps(mock_dorado_seqs.get(example_sample_id, 'Not Found'), indent=2))
print(f"\nExample entry for {example_sample_id} in mock_guppy_seqs:")
print(json.dumps(mock_guppy_seqs.get(example_sample_id, 'Not Found'), indent=2))

matched, dorado_unmatched, guppy_unmatched = data_functions.match_sequences(mock_dorado_seqs, mock_guppy_seqs)

print(f"Matched pairs: {len(matched)}")
print(f"Dorado-only: {len(dorado_unmatched)}")
print(f"Guppy-only: {len(guppy_unmatched)}")

print("\n--- Matched Pairs ---")
for pair in matched:
    print(f"  Sample: {pair['sample_id']}, "
          f"D_Header: {pair['dorado'].get('header','N/A')}, "
          f"G_Header: {pair['guppy'].get('header','N/A')}, "
          f"Identity: {pair['alignment']['identity']:.2f}%, "
          f"Multiple: {pair['multiple_matches']}, "
          f"Confidence: {pair['match_confidence']}")

print("\n--- Dorado Only ---")
for item in dorado_unmatched:
    print(f"  Sample: {item['sample_id']}, Header: {item['record'].get('header','N/A')}")

print("\n--- Guppy Only ---")
for item in guppy_unmatched:
    print(f"  Sample: {item['sample_id']}, Header: {item['record'].get('header','N/A')}")

## Testing GC Content

In [None]:
# Test cases for GC content
seq1 = "ATGCATGC" # Expected GC: 0.5
seq2 = "AAAAATTTTT" # Expected GC: 0.0
seq3 = "GCGCGCGC" # Expected GC: 1.0
seq4 = "ATGCNNNNATGC" # Expected GC: 0.5 (gc_fraction ignores 'N')
seq5 = "" # Expected: None (or handle as 0.0 if preferred)
seq6 = "ATGC-ATGC" # Expected GC: 0.5 (gc_fraction handles gaps)

print(f"Sequence: '{seq1}', GC Content: {data_functions.calculate_gc_content(seq1)}")
print(f"Sequence: '{seq2}', GC Content: {data_functions.calculate_gc_content(seq2)}")
print(f"Sequence: '{seq3}', GC Content: {data_functions.calculate_gc_content(seq3)}")
print(f"Sequence: '{seq4}', GC Content: {data_functions.calculate_gc_content(seq4)}")
print(f"Sequence: '{seq5}', GC Content: {data_functions.calculate_gc_content(seq5)}")
print(f"Sequence: '{seq6}', GC Content: {data_functions.calculate_gc_content(seq6)}")

# Test with invalid input
print(f"Sequence: {123}, GC Content: {data_functions.calculate_gc_content(123)}")

## Testing Homopolymers

In [None]:

# Test cases for homopolymer analysis
seq_hp1 = "AAAGGGGGTTTTTTCXXXAAAAA" # A: 3, 5; G: 5; T: 6; C: 1
seq_hp2 = "ACGTACGT" # No homopolymers >= 3
seq_hp3 = "AAAAAAAAAAAAAAAAAAAA" # A: 20
seq_hp4 = "acgtgggggaaaaa" # g: 5, a: 5 (test case insensitivity)
seq_hp5 = "" # Empty sequence

print(f"--- Testing sequence: '{seq_hp1}' ---")
print(f"min_len=5: {json.dumps(data_functions.analyze_homopolymers(seq_hp1, min_len=5), indent=2)}")
# Expected for min_len=5: {'A': [5], 'C': [], 'G': [5], 'T': [6], 'total_count': 3, 'max_len': 6}
print(f"min_len=3: {json.dumps(data_functions.analyze_homopolymers(seq_hp1, min_len=3), indent=2)}")
# Expected for min_len=3: {'A': [3, 5], 'C': [], 'G': [5], 'T': [6], 'total_count': 4, 'max_len': 6}

print(f"\n--- Testing sequence: '{seq_hp2}' ---")
print(f"min_len=3: {json.dumps(data_functions.analyze_homopolymers(seq_hp2, min_len=3), indent=2)}")
# Expected for min_len=3: {'A': [], 'C': [], 'G': [], 'T': [], 'total_count': 0, 'max_len': 0}

print(f"\n--- Testing sequence: '{seq_hp3}' ---")
print(f"min_len=10: {json.dumps(data_functions.analyze_homopolymers(seq_hp3, min_len=10), indent=2)}")
# Expected for min_len=10: {'A': [20], 'C': [], 'G': [], 'T': [], 'total_count': 1, 'max_len': 20}

print(f"\n--- Testing sequence: '{seq_hp4}' ---")
print(f"min_len=4: {json.dumps(data_functions.analyze_homopolymers(seq_hp4, min_len=4), indent=2)}")
# Expected for min_len=4: {'A': [5], 'C': [], 'G': [5], 'T': [], 'total_count': 2, 'max_len': 5}


print(f"\n--- Testing sequence: '{seq_hp5}' ---")
print(f"min_len=5: {json.dumps(data_functions.analyze_homopolymers(seq_hp5, min_len=5), indent=2)}")
# Expected for min_len=5: None

## Testing Ambiguity Codes

In [None]:
# Test cases for ambiguity analysis
seq_amb1 = "ATGCYATGR" # Y=1, R=1, total=2
seq_amb2 = "ACGTACGT" # No ambiguity
seq_amb3 = "NNNNNNNNNN" # N=10, total=10
seq_amb4 = "ATGCnATGCy" # n=1, y=1, total=2 (test case insensitivity)
seq_amb5 = "" # Empty sequence

print(f"--- Testing sequence: '{seq_amb1}' ---")
print(json.dumps(data_functions.analyze_ambiguity(seq_amb1), indent=2))
# Expected: {'total_count': 2, 'frequency': 0.25, 'counts_per_code': {'Y': 1, 'R': 1}}

print(f"\n--- Testing sequence: '{seq_amb2}' ---")
print(json.dumps(data_functions.analyze_ambiguity(seq_amb2), indent=2))
# Expected: {'total_count': 0, 'frequency': 0.0, 'counts_per_code': {}}

print(f"\n--- Testing sequence: '{seq_amb3}' ---")
print(json.dumps(data_functions.analyze_ambiguity(seq_amb3), indent=2))
# Expected: {'total_count': 10, 'frequency': 1.0, 'counts_per_code': {'N': 10}}

print(f"\n--- Testing sequence: '{seq_amb4}' ---")
print(json.dumps(data_functions.analyze_ambiguity(seq_amb4), indent=2))
# Expected: {'total_count': 2, 'frequency': 0.2, 'counts_per_code': {'N': 1, 'Y': 1}}

print(f"\n--- Testing sequence: '{seq_amb5}' ---")
print(json.dumps(data_functions.analyze_ambiguity(seq_amb5), indent=2))
# Expected: None

## Consolidate Match Metrics

In [None]:
# Assuming 'matched' is the list of matched pairs from Step 2.3 testing
# If you ran Step 2.3 testing, 'matched' should be available.
# If not, you might need to re-run that cell or create a small sample list:
# matched_mock = [{'sample_id': 'S1', 'dorado': {...}, 'guppy': {...}, 'alignment': {...}, ...}, ...] # From Step 2.3 output
try:
     # Check if 'matched' exists from previous steps
     if 'matched' in globals() and isinstance(matched, list):
          print(f"Using 'matched' list with {len(matched)} pairs.")
          input_matched_list = matched
     else:
          # Add fallback or error if 'matched' isn't available
          print("Warning: 'matched' list not found. Testing with empty list.")
          input_matched_list = []
except NameError:
     print("Warning: 'matched' list not found. Testing with empty list.")
     input_matched_list = []
run_comparison_df = data_functions.generate_comparison_dataframe(input_matched_list)

print("\nDataFrame Info:")
run_comparison_df.info()

print("\nDataFrame Head:")
display(run_comparison_df.head())

print("\nDataFrame Description (Numeric columns):")
display(run_comparison_df.describe())

## Test Statistical Functions

In [None]:
# --- Test Statistical Wrapper ---
print("--- Testing perform_paired_nonparametric_test ---")
list1 = [10, 12, 15, 11, 14, 16]
list2 = [8, 11, 13, 10, 12, 13] # Generally lower
list3 = [10, 12, 15, 11, 14, 16] # Identical to list1
list4 = [20, 22, 25, 21, 24, 26] # Generally higher

result12 = data_functions.perform_paired_nonparametric_test(list1, list2)
result13 = data_functions.perform_paired_nonparametric_test(list1, list3) # Should handle zero differences
result14 = data_functions.perform_paired_nonparametric_test(list1, list4)

print(f"Test List1 vs List2: Stat={result12[0] if result12 else 'N/A'}, p={result12[1] if result12 else 'N/A'}") # Expect potentially significant
print(f"Test List1 vs List3 (Identical): Stat={result13[0] if result13 else 'N/A'}, p={result13[1] if result13 else 'N/A'}") # Expect p=1.0 or warning
print(f"Test List1 vs List4: Stat={result14[0] if result14 else 'N/A'}, p={result14[1] if result14 else 'N/A'}") # Expect potentially significant

# Test edge cases
result_short = data_functions.perform_paired_nonparametric_test([1], [2])
result_empty = data_functions.perform_paired_nonparametric_test([], [])
result_mismatch = data_functions.perform_paired_nonparametric_test([1, 2], [3])
print(f"Test Short: {result_short}")
print(f"Test Empty: {result_empty}")
print(f"Test Mismatch Length: {result_mismatch}")

In [None]:
# --- Test Run-Specific Statistics ---
print("\n--- Testing calculate_run_statistics ---")

# Assume 'run_comparison_df' is available from Step 3.4 for a selected run
if 'run_comparison_df' in globals() and isinstance(run_comparison_df, pd.DataFrame) and not run_comparison_df.empty:
    run_stats_results = data_functions.calculate_run_statistics(run_comparison_df)

    if run_stats_results:
        print("Successfully calculated run statistics:")
        # Pretty print the results dictionary
        print(json.dumps(run_stats_results, indent=2, default=str)) # Use default=str for potential numpy types

        # Example: Check RiC results
        if 'RiC' in run_stats_results:
            print(f"\nRiC Median Difference: {run_stats_results['RiC'].get('median_diff')}")
            print(f"RiC p-value: {run_stats_results['RiC'].get('p_value')}")
    else:
        print("Failed to calculate run statistics.")
else:
    print("Skipping test: 'run_comparison_df' not available or empty.")
    # Create a small mock DataFrame for testing if needed
    # mock_df = pd.DataFrame({
    #      'Dorado_RiC': [10, 12, 15, 11, 14, 16, np.nan],
    #      'Guppy_RiC': [8, 11, 13, 10, 12, 13, 9],
    #      'Dorado_Length': [100, 102, 105, 101, 104, 106, 100],
    #      'Guppy_Length': [98, 101, 103, 100, 102, 103, 99],
    #      # Add other columns as needed... ensure they match metric_pairs
    # })
    # mock_results = data_functions.calculate_run_statistics(mock_df)
    # print("Mock Results:")
    # print(json.dumps(mock_results, indent=2, default=str))

## Generate Summary TSVs

In [None]:
# --- Test Run-Specific Output ---
print("\n--- Testing save_run_comparison ---")

# Assume 'run_comparison_df' is available from Step 3.4 for a selected run
# Assume 'selected_run_id' and 'RESULTS_DIR' are defined
test_run_id = 'OMDL_Test' # Use a mock ID or the selected_run_id
mock_df_for_saving = run_comparison_df # Or create a small mock DataFrame if needed

if 'mock_df_for_saving' in globals() and isinstance(mock_df_for_saving, pd.DataFrame) and not mock_df_for_saving.empty:
    # Test saving as TSV
    saved_tsv_path = data_functions.save_run_comparison(
        mock_df_for_saving,
        test_run_id,
        RESULTS_DIR,
        format='tsv'
    )
    if saved_tsv_path and os.path.exists(saved_tsv_path):
         print(f"TSV file check successful: {saved_tsv_path}")
         # Optional: Read back file to verify content
         # check_df = pd.read_csv(saved_tsv_path, sep='\t')
         # print(f"Read back {len(check_df)} rows from TSV.")

    # Test saving as CSV
    saved_csv_path = data_functions.save_run_comparison(
         mock_df_for_saving,
         test_run_id,
         RESULTS_DIR,
         format='csv'
    )
    if saved_csv_path and os.path.exists(saved_csv_path):
         print(f"CSV file check successful: {saved_csv_path}")

else:
     print("Skipping test: DataFrame for saving is not available or empty.")

In [None]:
# --- Test Overall Summary Output ---
print("\n--- Testing generate_overall_summary ---")

# Assume 'all_runs_analysis_results' is populated by the notebook's main loop
# It should look like: {'OMDL1': {'stats': {...}, 'counts': {...}}, 'OMDL2': {...}, ...}

# Create a mock results dictionary for testing if needed:
mock_all_runs_results = {
    'OMDL1': {
        'stats': {
            'RiC': {'median_diff': 5.0, 'p_value': 0.21, 'n_pairs': 150},
            'Length': {'median_diff': -1.0, 'p_value': 0.04, 'n_pairs': 150}
            # Add other metrics...
        },
        'counts': {'matched': 150, 'dorado_only': 5, 'guppy_only': 10}
    },
    'OMDL2': {
         'stats': {
            'RiC': {'median_diff': 10.0, 'p_value': 0.001, 'n_pairs': 200},
            'Length': {'median_diff': 0.0, 'p_value': 0.95, 'n_pairs': 200}
            # Add other metrics...
         },
         'counts': {'matched': 200, 'dorado_only': 2, 'guppy_only': 3}
    }
}

# Test saving as TSV using mock data
saved_summary_path = data_functions.generate_overall_summary(
    mock_all_runs_results, # Use mock_all_runs_results or the real all_runs_analysis_results
    RESULTS_DIR,
    format='tsv'
)

if saved_summary_path and os.path.exists(saved_summary_path):
     print(f"Overall summary file check successful: {saved_summary_path}")
     # Optional: Read back file to verify content
     # check_summary_df = pd.read_csv(saved_summary_path, sep='\t')
     # print("Overall Summary DataFrame Head:")
     # display(check_summary_df.head())
else:
     print("Failed to save overall summary file.")

## Test vizualizations

In [None]:
# Inside the function that displays run analysis (called by dropdown observer)
run_id = selected_run_id
run_data = all_runs_analysis_results.get(run_id)
if run_data and 'comparison_df' in run_data:
    run_df = run_data['comparison_df']
    if not run_df.empty:
        # Example call for RiC comparison
        fig_ric, _ = plot_comparison_with_difference(
            run_df,
            dorado_col='Dorado_RiC',
            guppy_col='Guppy_RiC',
            diff_col='RiC_Difference',
            figure_title=f'{run_id} - RiC Comparison'
        )
        plt.show(fig_ric) # Display the plot

        # Example call for Identity Distribution
        fig_identity, _ = plot_histogram(
             run_df,
             metric_col='Identity_Percent',
             title=f'{run_id} - Sequence Identity Distribution',
             xlabel='Sequence Identity (%)'
        )
        plt.show(fig_identity) # Display the plot
    else:
        print("Comparison DataFrame is empty.")
else:
    print("No comparison data available for plotting.")