<font size="5">**Extract and Print Radiance Layer Data**</font>

In [3]:
import os
import xarray as xr
import numpy as np

# Define base path and data directory
base_path = 'D:/CloudDetection/'
data_dir = os.path.join(base_path, 'images')

# Get all subdirectories
subdirs = [d for d in os.listdir(data_dir) if os.path.isdir(os.path.join(data_dir, d))]

# Extract Radiance layer data
for subdir in subdirs:
    subdir_path = os.path.join(data_dir, subdir)
    
    # Load Radiance dataset
    radiance_ds = xr.open_mfdataset(f'{subdir_path}/S*_radiance_in.nc', combine='by_coords')
    
    # Stack all Radiance layers
    radiance_data = []
    for var in radiance_ds.data_vars:
        radiance_data.append(radiance_ds[var].values)
    radiance_data = np.stack(radiance_data, axis=-1)
    
    # Print Radiance data shape
    print(f"Radiance data shape for {subdir}: {radiance_data.shape}")


Radiance data shape for 162: (1200, 1500, 6)
Radiance data shape for 164: (1200, 1500, 6)
Radiance data shape for 171: (1200, 1500, 6)
Radiance data shape for 181: (1200, 1500, 6)
Radiance data shape for 183: (1200, 1500, 6)


<font size="5">**Extract and Print BT Layer Data**</font>

In [4]:
# Extract BT layer data
for subdir in subdirs:
    subdir_path = os.path.join(data_dir, subdir)
    
    # Load BT dataset
    bt_ds = xr.open_mfdataset(f'{subdir_path}/S*_BT_in.nc', combine='by_coords')
    
    # Stack BT layers
    bt_data = []
    for var in ['S7_BT_in', 'S8_BT_in', 'S9_BT_in']:
        data = bt_ds[var].values
        bt_data.append(data)
    bt_data = np.stack(bt_data, axis=-1)
    
    # Print BT data shape
    print(f"BT data shape for {subdir}: {bt_data.shape}")

BT data shape for 162: (1200, 1500, 3)
BT data shape for 164: (1200, 1500, 3)
BT data shape for 171: (1200, 1500, 3)
BT data shape for 181: (1200, 1500, 3)
BT data shape for 183: (1200, 1500, 3)


<font size="5">**Merge Radiance and BT Data and Print Combined Shape**</font>

In [6]:
# Merge Radiance and BT data
combined_data_dict = {}

for subdir in subdirs:
    subdir_path = os.path.join(data_dir, subdir)
    
    # Load Radiance dataset and stack layers
    radiance_ds = xr.open_mfdataset(f'{subdir_path}/S*_radiance_in.nc', combine='by_coords')
    radiance_data = []
    for var in radiance_ds.data_vars:
        radiance_data.append(radiance_ds[var].values)
    radiance_data = np.stack(radiance_data, axis=-1)
    
    # Load BT dataset and stack layers
    bt_ds = xr.open_mfdataset(f'{subdir_path}/S*_BT_in.nc', combine='by_coords')
    bt_data = []
    for var in ['S7_BT_in', 'S8_BT_in', 'S9_BT_in']:
        data = bt_ds[var].values
        bt_data.append(data)
    bt_data = np.stack(bt_data, axis=-1)
    
    # Merge Radiance and BT data into one array
    combined_data = np.concatenate([radiance_data, bt_data], axis=-1)
    
    # Store combined data for later use
    combined_data_dict[subdir] = combined_data
    
    # Print combined data shape
    print(f"Combined data shape for {subdir}: {combined_data.shape}")


Combined data shape for 162: (1200, 1500, 9)
Combined data shape for 164: (1200, 1500, 9)
Combined data shape for 171: (1200, 1500, 9)
Combined data shape for 181: (1200, 1500, 9)
Combined data shape for 183: (1200, 1500, 9)


<font size="5">**Extract and Process Features and Labels**</font>

In [8]:
import pandas as pd

# Initialize a list to store all rows from all subdirectories
all_rows = []

# Label encoding: Clear -> 0, Ice -> 1, Cloud -> 2
label_encoding = {'Clear': 0, 'Ice': 1, 'Cloud': 2}

# Extract and process features and labels for each subdirectory
for subdir in subdirs:
    subdir_path = os.path.join(data_dir, subdir)
    
    # Get the combined feature data (Radiance + BT)
    combined_data = combined_data_dict[subdir]
    
    # Load label data (Clear, Ice, Cloud)
    clear_labels = xr.open_dataset(os.path.join(subdir_path, 'clear_labels.nc'))['Clear']
    ice_labels = xr.open_dataset(os.path.join(subdir_path, 'ice_labels.nc'))['Ice']
    cloud_labels = xr.open_dataset(os.path.join(subdir_path, 'cloud_labels.nc'))['Cloud']
    
    # Create masks for each label
    clear_mask = (clear_labels > 0)
    ice_mask = (ice_labels > 0)
    cloud_mask = (cloud_labels > 0)
    
    # Process Clear label features
    clear_features = combined_data[clear_mask.values]
    for feature in clear_features:
        all_rows.append([label_encoding['Clear']] + feature.tolist())
    
    # Process Ice label features
    ice_features = combined_data[ice_mask.values]
    for feature in ice_features:
        all_rows.append([label_encoding['Ice']] + feature.tolist())
    
    # Process Cloud label features
    cloud_features = combined_data[cloud_mask.values]
    for feature in cloud_features:
        all_rows.append([label_encoding['Cloud']] + feature.tolist())


<font size="5">**Merge Data and Save to CSV**</font>

In [9]:
# Convert all rows to a DataFrame
df = pd.DataFrame(all_rows, columns=['label'] + [f'feature{i+1}' for i in range(combined_data.shape[-1])])

# Sort the DataFrame by the label column
df = df.sort_values(by='label')

# Define output path for the merged CSV file
output_csv_file = os.path.join(base_path, 'NODE/data/csv', 'merged_features.csv')

# Ensure the CSV output directory exists
os.makedirs(os.path.dirname(output_csv_file), exist_ok=True)

# Save the merged data to a single CSV file
df.to_csv(output_csv_file, index=False)

print(f"Saved merged CSV: {df.shape[0]} rows, saved to {output_csv_file}")


Saved merged CSV: 638937 rows, saved to D:/CloudDetection/NODE/data/csv\merged_features.csv


In [5]:
import pandas as pd

# Load the dataset
csv_path = r'D:\CloudDetection\NODE\data\merged_features.csv'
data = pd.read_csv(csv_path)

# Split data based on label values
class_0 = data[data['label'] == 0]
class_1 = data[data['label'] == 1]
class_2 = data[data['label'] == 2]

# Undersample each class to 70,000 samples
class_0_under = class_0.sample(n=70000, random_state=42)
class_1_under = class_1.sample(n=70000, random_state=42)
class_2_under = class_2.sample(n=70000, random_state=42)

# Concatenate the undersampled data
balanced_data = pd.concat([class_0_under, class_1_under, class_2_under])

# Shuffle the data
balanced_data = balanced_data.sample(frac=1, random_state=42).reset_index(drop=True)

# Save the balanced data to a new CSV file
balanced_data.to_csv(r'D:\CloudDetection\NODE\data\balanced_merged_features.csv', index=False)
