In [None]:
import os
import numpy as np
import pydicom
from pathlib import Path
from collections import defaultdict
from tqdm import tqdm

def should_rescale_ct(ds, pixel_array):
    """Determine if CT should be rescaled"""
    if ds.get('Modality', '') != 'CT':
        return False
    if not (hasattr(ds, 'RescaleSlope') and hasattr(ds, 'RescaleIntercept')):
        return False
    min_pixel = pixel_array.min()
    if min_pixel >= -100 or min_pixel == -2000:
        return True
    return False

def validate_series_rescale_consistency(series_dir):
    """Check if all DICOM files in each CT series have consistent should_rescale results"""
    
    series_folders = [f for f in Path(series_dir).iterdir() if f.is_dir()]
    
    inconsistent_series = []
    
    print("Validating CT series for rescale consistency...")
    
    for series_folder in tqdm(series_folders):
        dcm_files = list(series_folder.glob("*.dcm"))
        
        if len(dcm_files) == 0:
            continue
        
        # Check first file to see if it's CT
        try:
            first_ds = pydicom.dcmread(dcm_files[0])
            if first_ds.get('Modality', '') != 'CT':
                continue
        except Exception as e:
            print(f"\nError reading {dcm_files[0]}: {e}")
            continue
        
        # Check all files in the series
        rescale_results = []
        file_info = []
        
        for dcm_file in dcm_files:
            try:
                ds = pydicom.dcmread(dcm_file)
                pixel_array = ds.pixel_array
                should_rescale = should_rescale_ct(ds, pixel_array)
                
                rescale_results.append(should_rescale)
                file_info.append({
                    'file': dcm_file.name,
                    'should_rescale': should_rescale,
                    'min_pixel': pixel_array.min(),
                    'max_pixel': pixel_array.max(),
                    'has_rescale_slope': hasattr(ds, 'RescaleSlope'),
                    'has_rescale_intercept': hasattr(ds, 'RescaleIntercept'),
                    'rescale_slope': ds.RescaleSlope if hasattr(ds, 'RescaleSlope') else None,
                    'rescale_intercept': ds.RescaleIntercept if hasattr(ds, 'RescaleIntercept') else None
                })
            except Exception as e:
                print(f"\nError processing {dcm_file}: {e}")
                continue
        
        # Check if all results are the same
        if len(set(rescale_results)) > 1:
            inconsistent_series.append({
                'series_uid': first_ds.SeriesInstanceUID,
                'series_folder': series_folder.name,
                'total_files': len(dcm_files),
                'file_details': file_info
            })
    
    return inconsistent_series

# Main execution
series_dir = Path(r'E:\data_old\series')

inconsistent = validate_series_rescale_consistency(series_dir)

print("\n" + "="*80)
print("VALIDATION RESULTS")
print("="*80)

if len(inconsistent) == 0:
    print("✓ All CT series have consistent should_rescale results across all slices")
else:
    print(f"✗ Found {len(inconsistent)} CT series with INCONSISTENT should_rescale results:\n")
    
    for series_info in inconsistent:
        print(f"\nSeries: {series_info['series_folder']}")
        print(f"Series UID: {series_info['series_uid']}")
        print(f"Total files: {series_info['total_files']}")
        print("-" * 80)
        
        # Group by rescale decision
        rescale_true = [f for f in series_info['file_details'] if f['should_rescale']]
        rescale_false = [f for f in series_info['file_details'] if not f['should_rescale']]
        
        print(f"Files with should_rescale=True: {len(rescale_true)}")
        if rescale_true:
            print(f"  Example: {rescale_true[0]['file']}")
            print(f"    min_pixel: {rescale_true[0]['min_pixel']}")
            print(f"    max_pixel: {rescale_true[0]['max_pixel']}")
            print(f"    RescaleSlope: {rescale_true[0]['rescale_slope']}")
            print(f"    RescaleIntercept: {rescale_true[0]['rescale_intercept']}")
        
        print(f"\nFiles with should_rescale=False: {len(rescale_false)}")
        if rescale_false:
            print(f"  Example: {rescale_false[0]['file']}")
            print(f"    min_pixel: {rescale_false[0]['min_pixel']}")
            print(f"    max_pixel: {rescale_false[0]['max_pixel']}")
            print(f"    RescaleSlope: {rescale_false[0]['rescale_slope']}")
            print(f"    RescaleIntercept: {rescale_false[0]['rescale_intercept']}")
        
        print("="*80)

print(f"\nTotal inconsistent series: {len(inconsistent)}")

Validating CT series for rescale consistency...


 65%|██████▍   | 2826/4348 [3:58:05<11:57:52, 28.30s/it]