## Cell 1: Import Packages and Initialisation with Performance Libraries

In [5]:
# Import packages, initialisation 
import h5py
import numpy as np 
import netCDF4
import pandas as pd
from datetime import datetime
import os
import glob

## Cell 2: Load CryoSat-2 Data for Weddell Sea Analysis (2021-2024)

In [6]:
#%% CryoSat-2 Data for Weddell Sea Analysis

# Base directory for CS2 data in Weddell Sea region
cs2_base_path = r"D:\phd\data\chap2\cs2_l2_sar_basel_e_weddell_winter"

# Years to process
years = ["2021", "2022", "2023", "2024"]

# File extension for CryoSat-2 data
cs2_file_extension = ".nc"

# Collect all files across the years
cs2_files_weddell = []
cs2_file_count_by_year = {}

# Process each year subfolder
for year in years:
    year_path = os.path.join(cs2_base_path, year)
    
    # Skip if directory doesn't exist
    if not os.path.exists(year_path):
        print(f"Warning: Directory for year {year} not found at {year_path}")
        continue
    
    # Find all .nc files in this year's directory
    year_files = glob.glob(os.path.join(year_path, f"*{cs2_file_extension}"))
    
    # Store count by year
    cs2_file_count_by_year[year] = len(year_files)
    
    # Add to master list
    cs2_files_weddell.extend(year_files)

# Store in variable for later use
files_check_CS2 = cs2_files_weddell

# Print summary of found files
print(f"Found {len(cs2_files_weddell)} total CS2 files for Weddell Sea analysis")
print("Files by year:")
for year, count in cs2_file_count_by_year.items():
    print(f"  {year}: {count} files")

# Optional: Check for expected file pattern
sample_files = [os.path.basename(f) for f in cs2_files_weddell[:3]]
if cs2_files_weddell:
    print(f"\nSample filenames: {', '.join(sample_files)}")

Found 5370 total CS2 files for Weddell Sea analysis
Files by year:
  2021: 1285 files
  2022: 1280 files
  2023: 1403 files
  2024: 1402 files

Sample filenames: CS_LTA__SIR_SAR_2__20210501T004324_20210501T004940_E001_segment_640.nc, CS_LTA__SIR_SAR_2__20210501T084902_20210501T084944_E001_segment_641.nc, CS_LTA__SIR_SAR_2__20210501T102753_20210501T102933_E001_segment_642.nc


## Cell 3: Load ICESat-2 ATL10 Version 6 Data for Weddell Sea Analysis (2021-2024)

In [7]:
# Base directory for IS2 ATL10v6 data in Weddell Sea region
is2_base_path = r"D:\phd\data\chap2\is2_atl10v6_weddell_winter"

# Years to process
years = ["2021", "2022", "2023", "2024"]

# File extension for ICESat-2 data
is2_file_extension = ".h5"

# Collect all files across the years
is2_files_weddell = []
is2_file_count_by_year = {}

# Process each year subfolder
for year in years:
    year_path = os.path.join(is2_base_path, year)
    
    # Skip if directory doesn't exist
    if not os.path.exists(year_path):
        print(f"Warning: Directory for year {year} not found at {year_path}")
        continue
    
    # Find all .h5 files in this year's directory
    year_files = glob.glob(os.path.join(year_path, f"*{is2_file_extension}"))
    
    # Store count by year
    is2_file_count_by_year[year] = len(year_files)
    
    # Add to master list
    is2_files_weddell.extend(year_files)

# Store all IS2 files from Weddell Sea
files_total_IS2 = is2_files_weddell

# Print summary of found files
print(f"Found {len(is2_files_weddell)} total IS2 ATL10v6 files for Weddell Sea analysis")
print("Files by year:")
for year, count in is2_file_count_by_year.items():
    print(f"  {year}: {count} files")

# Optional: Check for expected file pattern
sample_files = [os.path.basename(f) for f in is2_files_weddell[:3]]
if is2_files_weddell:
    print(f"\nSample filenames: {', '.join(sample_files)}")

Found 4538 total IS2 ATL10v6 files for Weddell Sea analysis
Files by year:
  2021: 1270 files
  2022: 1319 files
  2023: 1010 files
  2024: 939 files

Sample filenames: ATL10-02_20210501044315_05711101_006_01.h5, ATL10-02_20210501061732_05721101_006_01.h5, ATL10-02_20210501075149_05731101_006_01.h5


## Cell 4: Load CryoSat-2 Data for Ross Sea Analysis by Hemisphere (2021-2024)

In [8]:
#%% CryoSat-2 Data for Ross Sea Analysis

# Base directory for CS2 data in Ross Sea region
cs2_ross_base_path = r"D:\phd\data\chap2\cs2_l2_sar_basel_e_ross_winter"

# Hemispheres to process (Eastern: 160E to 180E, Western: 180W to 140W)
hemispheres = ["EH", "WH"]

# Years to process
years = ["2021", "2022", "2023", "2024"]

# File extension for CryoSat-2 data
cs2_file_extension = ".nc"

# Collect all files across hemispheres and years
cs2_files_ross = []
cs2_file_count_by_hemisphere = {}

# Process each hemisphere
for hemisphere in hemispheres:
    hemisphere_path = os.path.join(cs2_ross_base_path, hemisphere)
    
    # Skip if hemisphere directory doesn't exist
    if not os.path.exists(hemisphere_path):
        print(f"Warning: Directory for hemisphere {hemisphere} not found at {hemisphere_path}")
        continue
    
    # Initialize counter for this hemisphere
    cs2_file_count_by_hemisphere[hemisphere] = {}
    hemisphere_files = []
    
    # Process each year within this hemisphere
    for year in years:
        year_path = os.path.join(hemisphere_path, year)
        
        # Skip if year directory doesn't exist
        if not os.path.exists(year_path):
            print(f"Warning: Directory for year {year} in {hemisphere} not found at {year_path}")
            continue
        
        # Find all .nc files in this year's directory
        year_files = glob.glob(os.path.join(year_path, f"*{cs2_file_extension}"))
        
        # Store count by year for this hemisphere
        cs2_file_count_by_hemisphere[hemisphere][year] = len(year_files)
        
        # Add to hemisphere list
        hemisphere_files.extend(year_files)
    
    # Add to master list
    cs2_files_ross.extend(hemisphere_files)
    print(f"Found {len(hemisphere_files)} CS2 files for {hemisphere} hemisphere in Ross Sea")

# Store in variable for later use
files_check_CS2_ross = cs2_files_ross

# Print detailed summary of found files
print(f"\nFound {len(cs2_files_ross)} total CS2 files for Ross Sea analysis")
print("\nFiles by hemisphere and year:")
for hemisphere, years_dict in cs2_file_count_by_hemisphere.items():
    hemisphere_total = sum(years_dict.values())
    print(f"  {hemisphere} hemisphere: {hemisphere_total} files")
    for year, count in years_dict.items():
        print(f"    {year}: {count} files")

# Optional: Check for expected file pattern
if cs2_files_ross:
    sample_files = [os.path.basename(f) for f in cs2_files_ross[:3]]
    print(f"\nSample filenames: {', '.join(sample_files)}")

Found 1456 CS2 files for EH hemisphere in Ross Sea
Found 2623 CS2 files for WH hemisphere in Ross Sea

Found 4079 total CS2 files for Ross Sea analysis

Files by hemisphere and year:
  EH hemisphere: 1456 files
    2021: 364 files
    2022: 360 files
    2023: 367 files
    2024: 365 files
  WH hemisphere: 2623 files
    2021: 658 files
    2022: 651 files
    2023: 653 files
    2024: 661 files

Sample filenames: CS_LTA__SIR_SAR_2__20210501T103911_20210501T103930_E001_segment_184.nc, CS_LTA__SIR_SAR_2__20210501T104020_20210501T104211_E001_segment_185.nc, CS_LTA__SIR_SAR_2__20210501T220233_20210501T220350_E001_segment_186.nc


## Cell 5: Load ICESat-2 ATL10 Version 6 Data for Ross Sea Analysis (2021-2024)

In [9]:
#%% ICESat-2 ATL10 Version 6 Data for Ross Sea Analysis

# Base directory for IS2 ATL10v6 data in Ross Sea region
is2_ross_base_path = r"D:\phd\data\chap2\is2_atl10v6_ross_winter"

# Years to process
years = ["2021", "2022", "2023", "2024"]

# File extension for ICESat-2 data
is2_file_extension = ".h5"

# Collect all files across the years
is2_files_ross = []
is2_file_count_by_year = {}

# Process each year subfolder
for year in years:
    year_path = os.path.join(is2_ross_base_path, year)
    
    # Skip if directory doesn't exist
    if not os.path.exists(year_path):
        print(f"Warning: Directory for year {year} not found at {year_path}")
        continue
    
    # Find all .h5 files in this year's directory
    year_files = glob.glob(os.path.join(year_path, f"*{is2_file_extension}"))
    
    # Store count by year
    is2_file_count_by_year[year] = len(year_files)
    
    # Add to master list
    is2_files_ross.extend(year_files)

# Store all IS2 files from Ross Sea
files_total_IS2_ross = is2_files_ross

# Print summary of found files
print(f"Found {len(is2_files_ross)} total IS2 ATL10 v6 files for Ross Sea analysis")
print("Files by year:")
for year, count in is2_file_count_by_year.items():
    print(f"  {year}: {count} files")

# Optional: Check for expected file pattern
sample_files = [os.path.basename(f) for f in is2_files_ross[:3]]
if is2_files_ross:
    print(f"\nSample filenames: {', '.join(sample_files)}")

Found 3612 total IS2 ATL10 v6 files for Ross Sea analysis
Files by year:
  2021: 972 files
  2022: 987 files
  2023: 928 files
  2024: 725 files

Sample filenames: ATL10-02_20210430160856_05631101_006_02.h5, ATL10-02_20210430174313_05641101_006_02.h5, ATL10-02_20210501044315_05711101_006_01.h5


## Cell 6: Combined Dataset Analysis Setup

In [None]:
#%% Prepare Combined Dataset for Analysis

# Create a dictionary to store all available datasets
available_datasets = {
    'weddell_sea': {
        'cs2': {
            'files': cs2_files_weddell,
            'count': len(cs2_files_weddell),
            'description': 'CryoSat-2 data for Weddell Sea'
        },
        'is2': {
            'files': is2_files_weddell,
            'count': len(is2_files_weddell),
            'description': 'ICESat-2 ATL10 Version 6 data for Weddell Sea'
        }
    },
    'ross_sea': {
        'cs2': {
            'files': cs2_files_ross,
            'count': len(cs2_files_ross),
            'description': 'CryoSat-2 data for Ross Sea'
        },
        'is2': {
            'files': is2_files_ross,
            'count': len(is2_files_ross),
            'description': 'ICESat-2 ATL10 Version 6 data for Ross Sea'
        }
    }
}

# Configuration for current analysis
# Set these variables to choose which datasets to use
region = 'weddell_sea'  # Options: 'weddell_sea', 'ross_sea', 'both'
include_cs2 = True
include_is2 = True

# Initialize selected file lists
selected_cs2_files = []
selected_is2_files = []

# Select files based on configuration
if region == 'weddell_sea':
    if include_cs2:
        selected_cs2_files = available_datasets['weddell_sea']['cs2']['files']
    if include_is2:
        selected_is2_files = available_datasets['weddell_sea']['is2']['files']
    print(f"Selected Weddell Sea region for analysis")
elif region == 'ross_sea':
    if include_cs2:
        selected_cs2_files = available_datasets['ross_sea']['cs2']['files']
    if include_is2:
        selected_is2_files = available_datasets['ross_sea']['is2']['files']
    print(f"Selected Ross Sea region for analysis")
elif region == 'both':
    if include_cs2:
        selected_cs2_files = available_datasets['weddell_sea']['cs2']['files'] + available_datasets['ross_sea']['cs2']['files']
    if include_is2:
        selected_is2_files = available_datasets['weddell_sea']['is2']['files'] + available_datasets['ross_sea']['is2']['files']
    print(f"Selected both Weddell and Ross Sea regions for analysis")

# Store in standard variables for downstream processing
files_check_CS2 = selected_cs2_files
files_total_IS2 = selected_is2_files

# Print summary of dataset to be processed
print(f"Processing {len(files_total_IS2)} ICESat-2 files and {len(files_check_CS2)} CryoSat-2 files")

# Display counts by region if both regions are selected
if region == 'both':
    print("\nBreakdown by region:")
    print(f"  Weddell Sea: {available_datasets['weddell_sea']['cs2']['count']} CS2 files, "
          f"{available_datasets['weddell_sea']['is2']['count']} IS2 files")
    print(f"  Ross Sea: {available_datasets['ross_sea']['cs2']['count']} CS2 files, "
          f"{available_datasets['ross_sea']['is2']['count']} IS2 files")

print("\nAnalysis configuration complete. Ready for CRYO2ICE track extraction.")