# Data Comparison Visualization

This notebook compares datasets across all timesteps and identifies differences.

In [None]:
# Parameters (injected by papermill)
ref_file = ""
new_file = ""
variables = ["ffmc", "dmc", "dc", "isi", "bui", "fwi"]
shapefile_path = "../shp/ecos.shp"
qc_utils_path = ""  # Path to qc_utils.py directory

In [None]:
import sys
import warnings

# Add qc_utils path to Python path
if qc_utils_path and qc_utils_path not in sys.path:
    sys.path.insert(0, qc_utils_path)

from qc_utils import (
    load_datasets_and_shapefile,
    run_comparison_analysis,
    display_summary_tables,
    plot_max_value_differences,
    plot_max_nan_differences,
    generate_qc_summary,
)

warnings.filterwarnings("ignore")

print(f"Reference file: {ref_file}")
print(f"New file: {new_file}")
print(f"Variables to compare: {variables}")

In [None]:
ds_ref, ds_new, gdf, bounds = load_datasets_and_shapefile(
    ref_file, new_file, shapefile_path
)

## Timestep-by-Timestep Comparison

Comparing all timesteps to identify where differences occur.

In [None]:
comparison_results = run_comparison_analysis(ds_ref, ds_new, variables)

## Summary Statistics for Differing Timesteps

Tables showing summary statistics for each timestep where differences were detected.

In [None]:
display_summary_tables(comparison_results, variables)

## Visualization: Maximum Value Difference

Plotting the timestep with the greatest maximum absolute difference in values.

In [None]:
plot_max_value_differences(ds_ref, ds_new, comparison_results, variables, gdf, bounds)

## Visualization: Maximum NaN Pattern Difference

Plotting the timestep with the greatest number of NaN pattern changes.

In [None]:
plot_max_nan_differences(ds_ref, ds_new, comparison_results, variables, gdf, bounds)

In [None]:
print(generate_qc_summary(comparison_results, variables))

In [None]:
# Clean up
ds_ref.close()
ds_new.close()
print("\nDatasets closed. Analysis complete!")